# HG changeset patch
# User coffeys
# Date 1408477288 -3600
# Node ID 03c5d509a8116f24df4b7802fc363a6a623ee7e7
# Parent  f52cb91647590fe4a12af295a8a87e2cb761b044# Parent  f06c7b654d63c3c1d38f91ef43d74dc6494d1b04
Merge

diff -r f06c7b654d63 -r 03c5d509a811 .hgtags
--- a/.hgtags	Thu Jul 31 09:58:53 2014 +0100
+++ b/.hgtags	Tue Aug 19 20:41:28 2014 +0100
@@ -501,3 +501,10 @@
 00cf2b6f51b9560b01030e8f4c28c466f0b21fe3 hs25.20-b23
 19408d5fd31c25ce60c43dd33e92b96e8df4a4ea jdk8u20-b25
 eaa4074a7e3975cd33ec55e6b584586e2ac681bd jdk8u20-b26
+4828415ebbf11e205dcc08e97ad5ae7dd03522f9 jdk8u40-b00
+d952af8cf67dd1e7ab5fec9a299c6c6dafd1863e hs25.40-b01
+f0afba33c928ddaa2d5f003b90d683c143f78ea3 hs25.40-b02
+e2976043eac37c8036f6a6dfa454787f64fa3f56 hs25.40-b03
+cb95655ef06fece507bbc2792474411ab2e899ab hs25.40-b04
+dc06b830ea95ed953cac02e9e67a75ab682edb97 jdk8u40-b01
+897333c7e5874625bd26d09fdaf242196024e9c2 hs25.40-b05
diff -r f06c7b654d63 -r 03c5d509a811 agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegion.java
--- a/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegion.java	Thu Jul 31 09:58:53 2014 +0100
+++ b/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegion.java	Tue Aug 19 20:41:28 2014 +0100
@@ -24,23 +24,26 @@
 
 package sun.jvm.hotspot.gc_implementation.g1;
 
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Observable;
 import java.util.Observer;
-
 import sun.jvm.hotspot.debugger.Address;
-import sun.jvm.hotspot.memory.ContiguousSpace;
+import sun.jvm.hotspot.memory.CompactibleSpace;
+import sun.jvm.hotspot.memory.MemRegion;
 import sun.jvm.hotspot.runtime.VM;
+import sun.jvm.hotspot.types.AddressField;
 import sun.jvm.hotspot.types.CIntegerField;
 import sun.jvm.hotspot.types.Type;
 import sun.jvm.hotspot.types.TypeDataBase;
 
 // Mirror class for HeapRegion. Currently we don't actually include
-// any of its fields but only iterate over it (which we get "for free"
-// as HeapRegion ultimately inherits from ContiguousSpace).
+// any of its fields but only iterate over it.
 
-public class HeapRegion extends ContiguousSpace {
+public class HeapRegion extends CompactibleSpace {
     // static int GrainBytes;
     static private CIntegerField grainBytesField;
+    static private AddressField topField;
 
     static {
         VM.registerVMInitializedObserver(new Observer() {
@@ -54,6 +57,8 @@
         Type type = db.lookupType("HeapRegion");
 
         grainBytesField = type.getCIntegerField("GrainBytes");
+        topField = type.getAddressField("_top");
+
     }
 
     static public long grainBytes() {
@@ -63,4 +68,25 @@
     public HeapRegion(Address addr) {
         super(addr);
     }
+
+    public Address top() {
+        return topField.getValue(addr);
+    }
+
+    @Override
+    public List getLiveRegions() {
+        List res = new ArrayList();
+        res.add(new MemRegion(bottom(), top()));
+        return res;
+    }
+
+    @Override
+    public long used() {
+        return top().minus(bottom());
+    }
+
+    @Override
+    public long free() {
+        return end().minus(top());
+    }
 }
diff -r f06c7b654d63 -r 03c5d509a811 make/excludeSrc.make
--- a/make/excludeSrc.make	Thu Jul 31 09:58:53 2014 +0100
+++ b/make/excludeSrc.make	Tue Aug 19 20:41:28 2014 +0100
@@ -77,30 +77,40 @@
       CXXFLAGS += -DINCLUDE_ALL_GCS=0
       CFLAGS += -DINCLUDE_ALL_GCS=0
 
-      Src_Files_EXCLUDE += \
-	cmsAdaptiveSizePolicy.cpp cmsCollectorPolicy.cpp \
-	cmsGCAdaptivePolicyCounters.cpp cmsLockVerifier.cpp compactibleFreeListSpace.cpp \
-	concurrentMarkSweepGeneration.cpp concurrentMarkSweepThread.cpp \
-	freeChunk.cpp adaptiveFreeList.cpp promotionInfo.cpp vmCMSOperations.cpp \
-	collectionSetChooser.cpp concurrentG1Refine.cpp concurrentG1RefineThread.cpp \
-	concurrentMark.cpp concurrentMarkThread.cpp dirtyCardQueue.cpp g1AllocRegion.cpp \
-	g1BlockOffsetTable.cpp g1CardCounts.cpp g1CollectedHeap.cpp g1CollectorPolicy.cpp \
-	g1ErgoVerbose.cpp g1GCPhaseTimes.cpp g1HRPrinter.cpp g1HotCardCache.cpp g1Log.cpp \
-	g1MMUTracker.cpp g1MarkSweep.cpp g1MemoryPool.cpp g1MonitoringSupport.cpp g1OopClosures.cpp \
-	g1RemSet.cpp g1RemSetSummary.cpp g1SATBCardTableModRefBS.cpp g1StringDedup.cpp g1StringDedupStat.cpp \
-	g1StringDedupTable.cpp g1StringDedupThread.cpp g1StringDedupQueue.cpp g1_globals.cpp heapRegion.cpp \
-	g1BiasedArray.cpp heapRegionRemSet.cpp heapRegionSeq.cpp heapRegionSet.cpp heapRegionSets.cpp \
-	ptrQueue.cpp satbQueue.cpp sparsePRT.cpp survRateGroup.cpp vm_operations_g1.cpp g1CodeCacheRemSet.cpp \
-	adjoiningGenerations.cpp adjoiningVirtualSpaces.cpp asPSOldGen.cpp asPSYoungGen.cpp \
-	cardTableExtension.cpp gcTaskManager.cpp gcTaskThread.cpp objectStartArray.cpp \
-	parallelScavengeHeap.cpp parMarkBitMap.cpp pcTasks.cpp psAdaptiveSizePolicy.cpp \
-	psCompactionManager.cpp psGCAdaptivePolicyCounters.cpp psGenerationCounters.cpp \
-	psMarkSweep.cpp psMarkSweepDecorator.cpp psMemoryPool.cpp psOldGen.cpp \
-	psParallelCompact.cpp psPromotionLAB.cpp psPromotionManager.cpp psScavenge.cpp \
-	psTasks.cpp psVirtualspace.cpp psYoungGen.cpp vmPSOperations.cpp asParNewGeneration.cpp \
-	parCardTableModRefBS.cpp parGCAllocBuffer.cpp parNewGeneration.cpp mutableSpace.cpp \
-	gSpaceCounters.cpp allocationStats.cpp spaceCounters.cpp gcAdaptivePolicyCounters.cpp \
-	mutableNUMASpace.cpp immutableSpace.cpp yieldingWorkGroup.cpp hSpaceCounters.cpp
+      gc_impl := $(GAMMADIR)/src/share/vm/gc_implementation
+      gc_exclude :=							\
+	$(notdir $(wildcard $(gc_impl)/concurrentMarkSweep/*.cpp))	\
+	$(notdir $(wildcard $(gc_impl)/g1/*.cpp))			\
+	$(notdir $(wildcard $(gc_impl)/parallelScavenge/*.cpp))		\
+	$(notdir $(wildcard $(gc_impl)/parNew/*.cpp))
+      Src_Files_EXCLUDE += $(gc_exclude)
+
+      # Exclude everything in $(gc_impl)/shared except the files listed
+      # in $(gc_shared_keep).
+      gc_shared_all := $(notdir $(wildcard $(gc_impl)/shared/*.cpp))
+      gc_shared_keep :=							\
+	adaptiveSizePolicy.cpp						\
+	ageTable.cpp							\
+	collectorCounters.cpp						\
+	cSpaceCounters.cpp						\
+	gcPolicyCounters.cpp						\
+	gcStats.cpp							\
+	gcTimer.cpp							\
+	gcTrace.cpp							\
+	gcTraceSend.cpp							\
+	gcTraceTime.cpp							\
+	gcUtil.cpp							\
+	generationCounters.cpp						\
+	markSweep.cpp							\
+	objectCountEventSender.cpp					\
+	spaceDecorator.cpp						\
+	vmGCOperations.cpp
+      Src_Files_EXCLUDE += $(filter-out $(gc_shared_keep),$(gc_shared_all))
+
+      # src/share/vm/services
+      Src_Files_EXCLUDE +=						\
+	g1MemoryPool.cpp						\
+	psMemoryPool.cpp
 endif
 
 ifeq ($(INCLUDE_NMT), false)
diff -r f06c7b654d63 -r 03c5d509a811 make/hotspot_version
--- a/make/hotspot_version	Thu Jul 31 09:58:53 2014 +0100
+++ b/make/hotspot_version	Tue Aug 19 20:41:28 2014 +0100
@@ -34,8 +34,8 @@
 HOTSPOT_VM_COPYRIGHT=Copyright 2014
 
 HS_MAJOR_VER=25
-HS_MINOR_VER=20
-HS_BUILD_NUMBER=23
+HS_MINOR_VER=40
+HS_BUILD_NUMBER=05
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
diff -r f06c7b654d63 -r 03c5d509a811 src/cpu/ppc/vm/compiledIC_ppc.cpp
--- a/src/cpu/ppc/vm/compiledIC_ppc.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/cpu/ppc/vm/compiledIC_ppc.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -50,34 +50,6 @@
   return is_icholder_entry(call->destination());
 }
 
-//-----------------------------------------------------------------------------
-// High-level access to an inline cache. Guaranteed to be MT-safe.
-
-CompiledIC::CompiledIC(nmethod* nm, NativeCall* call)
-  : _ic_call(call)
-{
-  address ic_call = call->instruction_address();
-
-  assert(ic_call != NULL, "ic_call address must be set");
-  assert(nm != NULL, "must pass nmethod");
-  assert(nm->contains(ic_call), "must be in nmethod");
-
-  // Search for the ic_call at the given address.
-  RelocIterator iter(nm, ic_call, ic_call+1);
-  bool ret = iter.next();
-  assert(ret == true, "relocInfo must exist at this address");
-  assert(iter.addr() == ic_call, "must find ic_call");
-  if (iter.type() == relocInfo::virtual_call_type) {
-    virtual_call_Relocation* r = iter.virtual_call_reloc();
-    _is_optimized = false;
-    _value = nativeMovConstReg_at(r->cached_value());
-  } else {
-    assert(iter.type() == relocInfo::opt_virtual_call_type, "must be a virtual call");
-    _is_optimized = true;
-    _value = NULL;
-  }
-}
-
 // ----------------------------------------------------------------------------
 
 // A PPC CompiledStaticCall looks like this:
diff -r f06c7b654d63 -r 03c5d509a811 src/cpu/sparc/vm/assembler_sparc.hpp
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -123,6 +123,7 @@
     fpop2_op3    = 0x35,
     impdep1_op3  = 0x36,
     aes3_op3     = 0x36,
+    sha_op3      = 0x36,
     alignaddr_op3  = 0x36,
     faligndata_op3 = 0x36,
     flog3_op3    = 0x36,
@@ -223,7 +224,11 @@
     mwtos_opf          = 0x119,
 
     aes_kexpand0_opf   = 0x130,
-    aes_kexpand2_opf   = 0x131
+    aes_kexpand2_opf   = 0x131,
+
+    sha1_opf           = 0x141,
+    sha256_opf         = 0x142,
+    sha512_opf         = 0x143
   };
 
   enum op5s {
@@ -595,6 +600,11 @@
   // AES crypto instructions supported only on certain processors
   static void aes_only() { assert( VM_Version::has_aes(), "This instruction only works on SPARC with AES instructions support"); }
 
+  // SHA crypto instructions supported only on certain processors
+  static void sha1_only()   { assert( VM_Version::has_sha1(),   "This instruction only works on SPARC with SHA1"); }
+  static void sha256_only() { assert( VM_Version::has_sha256(), "This instruction only works on SPARC with SHA256"); }
+  static void sha512_only() { assert( VM_Version::has_sha512(), "This instruction only works on SPARC with SHA512"); }
+
   // instruction only in VIS1
   static void vis1_only() { assert( VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }
 
@@ -1179,7 +1189,6 @@
                                                u_field(3, 29, 25) | immed(true) | simm(simm13a, 13)); }
   inline void wrfprs( Register d) { v9_only(); emit_int32( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); }
 
-
   //  VIS1 instructions
 
   void alignaddr( Register s1, Register s2, Register d ) { vis1_only(); emit_int32( op(arith_op) | rd(d) | op3(alignaddr_op3) | rs1(s1) | opf(alignaddr_opf) | rs2(s2)); }
@@ -1203,6 +1212,12 @@
   void movwtos( Register s, FloatRegister d ) { vis3_only();  emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); }
   void movxtod( Register s, FloatRegister d ) { vis3_only();  emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); }
 
+  // Crypto SHA instructions
+
+  void sha1()   { sha1_only();    emit_int32( op(arith_op) | op3(sha_op3) | opf(sha1_opf)); }
+  void sha256() { sha256_only();  emit_int32( op(arith_op) | op3(sha_op3) | opf(sha256_opf)); }
+  void sha512() { sha512_only();  emit_int32( op(arith_op) | op3(sha_op3) | opf(sha512_opf)); }
+
   // Creation
   Assembler(CodeBuffer* code) : AbstractAssembler(code) {
 #ifdef CHECK_DELAY
diff -r f06c7b654d63 -r 03c5d509a811 src/cpu/sparc/vm/compiledIC_sparc.cpp
--- a/src/cpu/sparc/vm/compiledIC_sparc.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/cpu/sparc/vm/compiledIC_sparc.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -50,34 +50,6 @@
   return is_icholder_entry(call->destination());
 }
 
-//-----------------------------------------------------------------------------
-// High-level access to an inline cache. Guaranteed to be MT-safe.
-
-CompiledIC::CompiledIC(nmethod* nm, NativeCall* call)
-  : _ic_call(call)
-{
-  address ic_call = call->instruction_address();
-
-  assert(ic_call != NULL, "ic_call address must be set");
-  assert(nm != NULL, "must pass nmethod");
-  assert(nm->contains(ic_call), "must be in nmethod");
-
-  // Search for the ic_call at the given address.
-  RelocIterator iter(nm, ic_call, ic_call+1);
-  bool ret = iter.next();
-  assert(ret == true, "relocInfo must exist at this address");
-  assert(iter.addr() == ic_call, "must find ic_call");
-  if (iter.type() == relocInfo::virtual_call_type) {
-    virtual_call_Relocation* r = iter.virtual_call_reloc();
-    _is_optimized = false;
-    _value = nativeMovConstReg_at(r->cached_value());
-  } else {
-    assert(iter.type() == relocInfo::opt_virtual_call_type, "must be a virtual call");
-    _is_optimized = true;
-    _value = NULL;
-  }
-}
-
 // ----------------------------------------------------------------------------
 
 #define __ _masm.
diff -r f06c7b654d63 -r 03c5d509a811 src/cpu/sparc/vm/sparc.ad
--- a/src/cpu/sparc/vm/sparc.ad	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/cpu/sparc/vm/sparc.ad	Tue Aug 19 20:41:28 2014 +0100
@@ -6184,7 +6184,11 @@
   ins_cost(DEFAULT_COST * 3/2);
   format %{ "SET    $con,$dst\t! non-oop ptr" %}
   ins_encode %{
-    __ set($con$$constant, $dst$$Register);
+    if (_opnds[1]->constant_reloc() == relocInfo::metadata_type) {
+      __ set_metadata_constant((Metadata*)$con$$constant, $dst$$Register);
+    } else {
+      __ set($con$$constant, $dst$$Register);
+    }
   %}
   ins_pipe(loadConP);
 %}
diff -r f06c7b654d63 -r 03c5d509a811 src/cpu/sparc/vm/stubGenerator_sparc.cpp
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -4575,6 +4575,219 @@
     return start;
   }
 
+  address generate_sha1_implCompress(bool multi_block, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    Label L_sha1_loop, L_sha1_unaligned_input, L_sha1_unaligned_input_loop;
+    int i;
+
+    Register buf   = O0; // byte[] source+offset
+    Register state = O1; // int[]  SHA.state
+    Register ofs   = O2; // int    offset
+    Register limit = O3; // int    limit
+
+    // load state into F0-F4
+    for (i = 0; i < 5; i++) {
+      __ ldf(FloatRegisterImpl::S, state, i*4, as_FloatRegister(i));
+    }
+
+    __ andcc(buf, 7, G0);
+    __ br(Assembler::notZero, false, Assembler::pn, L_sha1_unaligned_input);
+    __ delayed()->nop();
+
+    __ BIND(L_sha1_loop);
+    // load buf into F8-F22
+    for (i = 0; i < 8; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8));
+    }
+    __ sha1();
+    if (multi_block) {
+      __ add(ofs, 64, ofs);
+      __ add(buf, 64, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha1_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F4 into state and return
+    for (i = 0; i < 4; i++) {
+      __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::S, F4, state, 0x10);
+
+    __ BIND(L_sha1_unaligned_input);
+    __ alignaddr(buf, G0, buf);
+
+    __ BIND(L_sha1_unaligned_input_loop);
+    // load buf into F8-F22
+    for (i = 0; i < 9; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8));
+    }
+    for (i = 0; i < 8; i++) {
+      __ faligndata(as_FloatRegister(i*2 + 8), as_FloatRegister(i*2 + 10), as_FloatRegister(i*2 + 8));
+    }
+    __ sha1();
+    if (multi_block) {
+      __ add(ofs, 64, ofs);
+      __ add(buf, 64, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha1_unaligned_input_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F4 into state and return
+    for (i = 0; i < 4; i++) {
+      __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::S, F4, state, 0x10);
+
+    return start;
+  }
+
+  address generate_sha256_implCompress(bool multi_block, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    Label L_sha256_loop, L_sha256_unaligned_input, L_sha256_unaligned_input_loop;
+    int i;
+
+    Register buf   = O0; // byte[] source+offset
+    Register state = O1; // int[]  SHA2.state
+    Register ofs   = O2; // int    offset
+    Register limit = O3; // int    limit
+
+    // load state into F0-F7
+    for (i = 0; i < 8; i++) {
+      __ ldf(FloatRegisterImpl::S, state, i*4, as_FloatRegister(i));
+    }
+
+    __ andcc(buf, 7, G0);
+    __ br(Assembler::notZero, false, Assembler::pn, L_sha256_unaligned_input);
+    __ delayed()->nop();
+
+    __ BIND(L_sha256_loop);
+    // load buf into F8-F22
+    for (i = 0; i < 8; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8));
+    }
+    __ sha256();
+    if (multi_block) {
+      __ add(ofs, 64, ofs);
+      __ add(buf, 64, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha256_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F7 into state and return
+    for (i = 0; i < 7; i++) {
+      __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::S, F7, state, 0x1c);
+
+    __ BIND(L_sha256_unaligned_input);
+    __ alignaddr(buf, G0, buf);
+
+    __ BIND(L_sha256_unaligned_input_loop);
+    // load buf into F8-F22
+    for (i = 0; i < 9; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8));
+    }
+    for (i = 0; i < 8; i++) {
+      __ faligndata(as_FloatRegister(i*2 + 8), as_FloatRegister(i*2 + 10), as_FloatRegister(i*2 + 8));
+    }
+    __ sha256();
+    if (multi_block) {
+      __ add(ofs, 64, ofs);
+      __ add(buf, 64, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha256_unaligned_input_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F7 into state and return
+    for (i = 0; i < 7; i++) {
+      __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::S, F7, state, 0x1c);
+
+    return start;
+  }
+
+  address generate_sha512_implCompress(bool multi_block, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    Label L_sha512_loop, L_sha512_unaligned_input, L_sha512_unaligned_input_loop;
+    int i;
+
+    Register buf   = O0; // byte[] source+offset
+    Register state = O1; // long[] SHA5.state
+    Register ofs   = O2; // int    offset
+    Register limit = O3; // int    limit
+
+    // load state into F0-F14
+    for (i = 0; i < 8; i++) {
+      __ ldf(FloatRegisterImpl::D, state, i*8, as_FloatRegister(i*2));
+    }
+
+    __ andcc(buf, 7, G0);
+    __ br(Assembler::notZero, false, Assembler::pn, L_sha512_unaligned_input);
+    __ delayed()->nop();
+
+    __ BIND(L_sha512_loop);
+    // load buf into F16-F46
+    for (i = 0; i < 16; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 16));
+    }
+    __ sha512();
+    if (multi_block) {
+      __ add(ofs, 128, ofs);
+      __ add(buf, 128, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha512_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F14 into state and return
+    for (i = 0; i < 7; i++) {
+      __ stf(FloatRegisterImpl::D, as_FloatRegister(i*2), state, i*8);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::D, F14, state, 0x38);
+
+    __ BIND(L_sha512_unaligned_input);
+    __ alignaddr(buf, G0, buf);
+
+    __ BIND(L_sha512_unaligned_input_loop);
+    // load buf into F16-F46
+    for (i = 0; i < 17; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 16));
+    }
+    for (i = 0; i < 16; i++) {
+      __ faligndata(as_FloatRegister(i*2 + 16), as_FloatRegister(i*2 + 18), as_FloatRegister(i*2 + 16));
+    }
+    __ sha512();
+    if (multi_block) {
+      __ add(ofs, 128, ofs);
+      __ add(buf, 128, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha512_unaligned_input_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F14 into state and return
+    for (i = 0; i < 7; i++) {
+      __ stf(FloatRegisterImpl::D, as_FloatRegister(i*2), state, i*8);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::D, F14, state, 0x38);
+
+    return start;
+  }
+
   void generate_initial() {
     // Generates all stubs and initializes the entry points
 
@@ -4647,6 +4860,20 @@
       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
     }
+
+    // generate SHA1/SHA256/SHA512 intrinsics code
+    if (UseSHA1Intrinsics) {
+      StubRoutines::_sha1_implCompress     = generate_sha1_implCompress(false,   "sha1_implCompress");
+      StubRoutines::_sha1_implCompressMB   = generate_sha1_implCompress(true,    "sha1_implCompressMB");
+    }
+    if (UseSHA256Intrinsics) {
+      StubRoutines::_sha256_implCompress   = generate_sha256_implCompress(false, "sha256_implCompress");
+      StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true,  "sha256_implCompressMB");
+    }
+    if (UseSHA512Intrinsics) {
+      StubRoutines::_sha512_implCompress   = generate_sha512_implCompress(false, "sha512_implCompress");
+      StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true,  "sha512_implCompressMB");
+    }
   }
 
 
diff -r f06c7b654d63 -r 03c5d509a811 src/cpu/sparc/vm/stubRoutines_sparc.hpp
--- a/src/cpu/sparc/vm/stubRoutines_sparc.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/cpu/sparc/vm/stubRoutines_sparc.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -41,7 +41,7 @@
 enum /* platform_dependent_constants */ {
   // %%%%%%%% May be able to shrink this a lot
   code_size1 = 20000,           // simply increase if too small (assembler will crash if too small)
-  code_size2 = 22000            // simply increase if too small (assembler will crash if too small)
+  code_size2 = 23000            // simply increase if too small (assembler will crash if too small)
 };
 
 class Sparc {
diff -r f06c7b654d63 -r 03c5d509a811 src/cpu/sparc/vm/vm_version_sparc.cpp
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -234,7 +234,7 @@
   assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size");
 
   char buf[512];
-  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                (has_v9() ? ", v9" : (has_v8() ? ", v8" : "")),
                (has_hardware_popc() ? ", popc" : ""),
                (has_vis1() ? ", vis1" : ""),
@@ -243,6 +243,9 @@
                (has_blk_init() ? ", blk_init" : ""),
                (has_cbcond() ? ", cbcond" : ""),
                (has_aes() ? ", aes" : ""),
+               (has_sha1() ? ", sha1" : ""),
+               (has_sha256() ? ", sha256" : ""),
+               (has_sha512() ? ", sha512" : ""),
                (is_ultra3() ? ", ultra3" : ""),
                (is_sun4v() ? ", sun4v" : ""),
                (is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")),
@@ -301,6 +304,58 @@
     }
   }
 
+  // SHA1, SHA256, and SHA512 instructions were added to SPARC T-series at different times
+  if (has_sha1() || has_sha256() || has_sha512()) {
+    if (UseVIS > 0) { // SHA intrinsics use VIS1 instructions
+      if (FLAG_IS_DEFAULT(UseSHA)) {
+        FLAG_SET_DEFAULT(UseSHA, true);
+      }
+    } else {
+      if (UseSHA) {
+        warning("SPARC SHA intrinsics require VIS1 instruction support. Intrinsics will be disabled.");
+        FLAG_SET_DEFAULT(UseSHA, false);
+      }
+    }
+  } else if (UseSHA) {
+    warning("SHA instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseSHA, false);
+  }
+
+  if (!UseSHA) {
+    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+  } else {
+    if (has_sha1()) {
+      if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
+        FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
+      }
+    } else if (UseSHA1Intrinsics) {
+      warning("SHA1 instruction is not available on this CPU.");
+      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+    }
+    if (has_sha256()) {
+      if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
+        FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
+      }
+    } else if (UseSHA256Intrinsics) {
+      warning("SHA256 instruction (for SHA-224 and SHA-256) is not available on this CPU.");
+      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+    }
+
+    if (has_sha512()) {
+      if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
+        FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
+      }
+    } else if (UseSHA512Intrinsics) {
+      warning("SHA512 instruction (for SHA-384 and SHA-512) is not available on this CPU.");
+      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+    }
+    if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA, false);
+    }
+  }
+
   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
     (cache_line_size > ContendedPaddingWidth))
     ContendedPaddingWidth = cache_line_size;
diff -r f06c7b654d63 -r 03c5d509a811 src/cpu/sparc/vm/vm_version_sparc.hpp
--- a/src/cpu/sparc/vm/vm_version_sparc.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/cpu/sparc/vm/vm_version_sparc.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -50,7 +50,10 @@
     T_family             = 16,
     T1_model             = 17,
     sparc5_instructions  = 18,
-    aes_instructions     = 19
+    aes_instructions     = 19,
+    sha1_instruction     = 20,
+    sha256_instruction   = 21,
+    sha512_instruction   = 22
   };
 
   enum Feature_Flag_Set {
@@ -77,6 +80,9 @@
     T1_model_m              = 1 << T1_model,
     sparc5_instructions_m   = 1 << sparc5_instructions,
     aes_instructions_m      = 1 << aes_instructions,
+    sha1_instruction_m      = 1 << sha1_instruction,
+    sha256_instruction_m    = 1 << sha256_instruction,
+    sha512_instruction_m    = 1 << sha512_instruction,
 
     generic_v8_m        = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
     generic_v9_m        = generic_v8_m | v9_instructions_m,
@@ -129,6 +135,9 @@
   static bool has_cbcond()              { return (_features & cbcond_instructions_m) != 0; }
   static bool has_sparc5_instr()        { return (_features & sparc5_instructions_m) != 0; }
   static bool has_aes()                 { return (_features & aes_instructions_m) != 0; }
+  static bool has_sha1()                { return (_features & sha1_instruction_m) != 0; }
+  static bool has_sha256()              { return (_features & sha256_instruction_m) != 0; }
+  static bool has_sha512()              { return (_features & sha512_instruction_m) != 0; }
 
   static bool supports_compare_and_exchange()
                                         { return has_v9(); }
diff -r f06c7b654d63 -r 03c5d509a811 src/cpu/x86/vm/assembler_x86.cpp
--- a/src/cpu/x86/vm/assembler_x86.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -3854,6 +3854,15 @@
 }
 
 // Carry-Less Multiplication Quadword
+void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
+  assert(VM_Version::supports_clmul(), "");
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
+  emit_int8(0x44);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8((unsigned char)mask);
+}
+
+// Carry-Less Multiplication Quadword
 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
   bool vector256 = false;
diff -r f06c7b654d63 -r 03c5d509a811 src/cpu/x86/vm/assembler_x86.hpp
--- a/src/cpu/x86/vm/assembler_x86.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1837,6 +1837,7 @@
   void vpbroadcastd(XMMRegister dst, XMMRegister src);
 
   // Carry-Less Multiplication Quadword
+  void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
   void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
 
   // AVX instruction which is used to clear upper 128 bits of YMM registers and
diff -r f06c7b654d63 -r 03c5d509a811 src/cpu/x86/vm/compiledIC_x86.cpp
--- a/src/cpu/x86/vm/compiledIC_x86.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/cpu/x86/vm/compiledIC_x86.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -47,34 +47,6 @@
   return is_icholder_entry(call->destination());
 }
 
-//-----------------------------------------------------------------------------
-// High-level access to an inline cache. Guaranteed to be MT-safe.
-
-CompiledIC::CompiledIC(nmethod* nm, NativeCall* call)
-  : _ic_call(call)
-{
-  address ic_call = call->instruction_address();
-
-  assert(ic_call != NULL, "ic_call address must be set");
-  assert(nm != NULL, "must pass nmethod");
-  assert(nm->contains(ic_call), "must be in nmethod");
-
-  // Search for the ic_call at the given address.
-  RelocIterator iter(nm, ic_call, ic_call+1);
-  bool ret = iter.next();
-  assert(ret == true, "relocInfo must exist at this address");
-  assert(iter.addr() == ic_call, "must find ic_call");
-  if (iter.type() == relocInfo::virtual_call_type) {
-    virtual_call_Relocation* r = iter.virtual_call_reloc();
-    _is_optimized = false;
-    _value = nativeMovConstReg_at(r->cached_value());
-  } else {
-    assert(iter.type() == relocInfo::opt_virtual_call_type, "must be a virtual call");
-    _is_optimized = true;
-    _value = NULL;
-  }
-}
-
 // ----------------------------------------------------------------------------
 
 #define __ _masm.
diff -r f06c7b654d63 -r 03c5d509a811 src/cpu/x86/vm/globals_x86.hpp
--- a/src/cpu/x86/vm/globals_x86.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/cpu/x86/vm/globals_x86.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -130,16 +130,16 @@
           "Use fast-string operation for zeroing: rep stosb")               \
                                                                             \
   /* Use Restricted Transactional Memory for lock eliding */                \
-  experimental(bool, UseRTMLocking, false,                                  \
+  product(bool, UseRTMLocking, false,                                       \
           "Enable RTM lock eliding for inflated locks in compiled code")    \
                                                                             \
   experimental(bool, UseRTMForStackLocks, false,                            \
           "Enable RTM lock eliding for stack locks in compiled code")       \
                                                                             \
-  experimental(bool, UseRTMDeopt, false,                                    \
+  product(bool, UseRTMDeopt, false,                                         \
           "Perform deopt and recompilation based on RTM abort ratio")       \
                                                                             \
-  experimental(uintx, RTMRetryCount, 5,                                     \
+  product(uintx, RTMRetryCount, 5,                                          \
           "Number of RTM retries on lock abort or busy")                    \
                                                                             \
   experimental(intx, RTMSpinLoopCount, 100,                                 \
diff -r f06c7b654d63 -r 03c5d509a811 src/cpu/x86/vm/macroAssembler_x86.cpp
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -7316,17 +7316,34 @@
  * Fold 128-bit data chunk
  */
 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) {
-  vpclmulhdq(xtmp, xK, xcrc); // [123:64]
-  vpclmulldq(xcrc, xK, xcrc); // [63:0]
-  vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */);
-  pxor(xcrc, xtmp);
+  if (UseAVX > 0) {
+    vpclmulhdq(xtmp, xK, xcrc); // [123:64]
+    vpclmulldq(xcrc, xK, xcrc); // [63:0]
+    vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */);
+    pxor(xcrc, xtmp);
+  } else {
+    movdqa(xtmp, xcrc);
+    pclmulhdq(xtmp, xK);   // [123:64]
+    pclmulldq(xcrc, xK);   // [63:0]
+    pxor(xcrc, xtmp);
+    movdqu(xtmp, Address(buf, offset));
+    pxor(xcrc, xtmp);
+  }
 }
 
 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) {
-  vpclmulhdq(xtmp, xK, xcrc);
-  vpclmulldq(xcrc, xK, xcrc);
-  pxor(xcrc, xbuf);
-  pxor(xcrc, xtmp);
+  if (UseAVX > 0) {
+    vpclmulhdq(xtmp, xK, xcrc);
+    vpclmulldq(xcrc, xK, xcrc);
+    pxor(xcrc, xbuf);
+    pxor(xcrc, xtmp);
+  } else {
+    movdqa(xtmp, xcrc);
+    pclmulhdq(xtmp, xK);
+    pclmulldq(xcrc, xK);
+    pxor(xcrc, xbuf);
+    pxor(xcrc, xtmp);
+  }
 }
 
 /**
@@ -7444,9 +7461,17 @@
   // Fold 128 bits in xmm1 down into 32 bits in crc register.
   BIND(L_fold_128b);
   movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr()));
-  vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
-  vpand(xmm3, xmm0, xmm2, false /* vector256 */);
-  vpclmulqdq(xmm0, xmm0, xmm3, 0x1);
+  if (UseAVX > 0) {
+    vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
+    vpand(xmm3, xmm0, xmm2, false /* vector256 */);
+    vpclmulqdq(xmm0, xmm0, xmm3, 0x1);
+  } else {
+    movdqa(xmm2, xmm0);
+    pclmulqdq(xmm2, xmm1, 0x1);
+    movdqa(xmm3, xmm0);
+    pand(xmm3, xmm2);
+    pclmulqdq(xmm0, xmm3, 0x1);
+  }
   psrldq(xmm1, 8);
   psrldq(xmm2, 4);
   pxor(xmm0, xmm1);
diff -r f06c7b654d63 -r 03c5d509a811 src/cpu/x86/vm/macroAssembler_x86.hpp
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -966,6 +966,16 @@
   void mulss(XMMRegister dst, Address src)        { Assembler::mulss(dst, src); }
   void mulss(XMMRegister dst, AddressLiteral src);
 
+  // Carry-Less Multiplication Quadword
+  void pclmulldq(XMMRegister dst, XMMRegister src) {
+    // 0x00 - multiply lower 64 bits [0:63]
+    Assembler::pclmulqdq(dst, src, 0x00);
+  }
+  void pclmulhdq(XMMRegister dst, XMMRegister src) {
+    // 0x11 - multiply upper 64 bits [64:127]
+    Assembler::pclmulqdq(dst, src, 0x11);
+  }
+
   void sqrtsd(XMMRegister dst, XMMRegister src)    { Assembler::sqrtsd(dst, src); }
   void sqrtsd(XMMRegister dst, Address src)        { Assembler::sqrtsd(dst, src); }
   void sqrtsd(XMMRegister dst, AddressLiteral src);
diff -r f06c7b654d63 -r 03c5d509a811 src/cpu/x86/vm/vm_version_x86.cpp
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -568,7 +568,7 @@
     FLAG_SET_DEFAULT(UseCLMUL, false);
   }
 
-  if (UseCLMUL && (UseAVX > 0) && (UseSSE > 2)) {
+  if (UseCLMUL && (UseSSE > 2)) {
     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
       UseCRC32Intrinsics = true;
     }
@@ -590,6 +590,17 @@
     FLAG_SET_DEFAULT(UseAESIntrinsics, false);
   }
 
+  if (UseSHA) {
+    warning("SHA instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseSHA, false);
+  }
+  if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
+    warning("SHA intrinsics are not available on this CPU");
+    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+  }
+
   // Adjust RTM (Restricted Transactional Memory) flags
   if (!supports_rtm() && UseRTMLocking) {
     // Can't continue because UseRTMLocking affects UseBiasedLocking flag
@@ -803,6 +814,21 @@
         }
       }
     }
+    if ((cpu_family() == 0x06) &&
+        ((extended_cpu_model() == 0x36) || // Centerton
+         (extended_cpu_model() == 0x37) || // Silvermont
+         (extended_cpu_model() == 0x4D))) {
+#ifdef COMPILER2
+      if (FLAG_IS_DEFAULT(OptoScheduling)) {
+        OptoScheduling = true;
+      }
+#endif
+      if (supports_sse4_2()) { // Silvermont
+        if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
+          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
+        }
+      }
+    }
   }
 
   // Use count leading zeros count instruction if available.
@@ -890,23 +916,25 @@
   AllocatePrefetchDistance = allocate_prefetch_distance();
   AllocatePrefetchStyle    = allocate_prefetch_style();
 
-  if( is_intel() && cpu_family() == 6 && supports_sse3() ) {
-    if( AllocatePrefetchStyle == 2 ) { // watermark prefetching on Core
+  if (is_intel() && cpu_family() == 6 && supports_sse3()) {
+    if (AllocatePrefetchStyle == 2) { // watermark prefetching on Core
 #ifdef _LP64
       AllocatePrefetchDistance = 384;
 #else
       AllocatePrefetchDistance = 320;
 #endif
     }
-    if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus
+    if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
       AllocatePrefetchDistance = 192;
       AllocatePrefetchLines = 4;
+    }
 #ifdef COMPILER2
-      if (AggressiveOpts && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
+    if (supports_sse4_2()) {
+      if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
       }
+    }
 #endif
-    }
   }
   assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
 
diff -r f06c7b654d63 -r 03c5d509a811 src/cpu/zero/vm/compiledIC_zero.cpp
--- a/src/cpu/zero/vm/compiledIC_zero.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/cpu/zero/vm/compiledIC_zero.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -58,34 +58,6 @@
   return is_icholder_entry(call->destination());
 }
 
-//-----------------------------------------------------------------------------
-// High-level access to an inline cache. Guaranteed to be MT-safe.
-
-CompiledIC::CompiledIC(nmethod* nm, NativeCall* call)
-  : _ic_call(call)
-{
-  address ic_call = call->instruction_address();
-
-  assert(ic_call != NULL, "ic_call address must be set");
-  assert(nm != NULL, "must pass nmethod");
-  assert(nm->contains(ic_call), "must be in nmethod");
-
-  // Search for the ic_call at the given address.
-  RelocIterator iter(nm, ic_call, ic_call+1);
-  bool ret = iter.next();
-  assert(ret == true, "relocInfo must exist at this address");
-  assert(iter.addr() == ic_call, "must find ic_call");
-  if (iter.type() == relocInfo::virtual_call_type) {
-    virtual_call_Relocation* r = iter.virtual_call_reloc();
-    _is_optimized = false;
-    _value = nativeMovConstReg_at(r->cached_value());
-  } else {
-    assert(iter.type() == relocInfo::opt_virtual_call_type, "must be a virtual call");
-    _is_optimized = true;
-    _value = NULL;
-  }
-}
-
 // ----------------------------------------------------------------------------
 
 void CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {
diff -r f06c7b654d63 -r 03c5d509a811 src/cpu/zero/vm/cppInterpreter_zero.cpp
--- a/src/cpu/zero/vm/cppInterpreter_zero.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/cpu/zero/vm/cppInterpreter_zero.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -40,6 +40,7 @@
 #include "runtime/deoptimization.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/interfaceSupport.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "runtime/synchronizer.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/os/aix/vm/os_aix.cpp
--- a/src/os/aix/vm/os_aix.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os/aix/vm/os_aix.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -55,6 +55,7 @@
 #include "runtime/javaCalls.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/objectMonitor.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/osThread.hpp"
 #include "runtime/perfMemory.hpp"
 #include "runtime/sharedRuntime.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/os/aix/vm/os_aix.inline.hpp
--- a/src/os/aix/vm/os_aix.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os/aix/vm/os_aix.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -26,12 +26,9 @@
 #ifndef OS_AIX_VM_OS_AIX_INLINE_HPP
 #define OS_AIX_VM_OS_AIX_INLINE_HPP
 
-#include "runtime/atomic.hpp"
+#include "runtime/atomic.inline.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/os.hpp"
-#ifdef TARGET_OS_ARCH_aix_ppc
-# include "atomic_aix_ppc.inline.hpp"
-# include "orderAccess_aix_ppc.inline.hpp"
-#endif
 
 // System includes
 
diff -r f06c7b654d63 -r 03c5d509a811 src/os/aix/vm/thread_aix.inline.hpp
--- a/src/os/aix/vm/thread_aix.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os/aix/vm/thread_aix.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -26,15 +26,9 @@
 #ifndef OS_AIX_VM_THREAD_AIX_INLINE_HPP
 #define OS_AIX_VM_THREAD_AIX_INLINE_HPP
 
-#include "runtime/atomic.hpp"
-#include "runtime/prefetch.hpp"
 #include "runtime/thread.hpp"
 #include "runtime/threadLocalStorage.hpp"
 
-#include "atomic_aix_ppc.inline.hpp"
-#include "orderAccess_aix_ppc.inline.hpp"
-#include "prefetch_aix_ppc.inline.hpp"
-
 // Contains inlined functions for class Thread and ThreadLocalStorage
 
 inline void ThreadLocalStorage::pd_invalidate_all() {} // nothing to do
diff -r f06c7b654d63 -r 03c5d509a811 src/os/bsd/dtrace/libjvm_db.c
--- a/src/os/bsd/dtrace/libjvm_db.c	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os/bsd/dtrace/libjvm_db.c	Tue Aug 19 20:41:28 2014 +0100
@@ -260,6 +260,9 @@
   uint64_t base;
   int err;
 
+  /* Clear *vmp now in case we jump to fail: */
+  memset(vmp, 0, sizeof(VMStructEntry));
+
   err = ps_pglobal_lookup(J->P, LIBJVM_SO, "gHotSpotVMStructs", &sym_addr);
   CHECK_FAIL(err);
   err = read_pointer(J, sym_addr, &gHotSpotVMStructs);
diff -r f06c7b654d63 -r 03c5d509a811 src/os/bsd/vm/os_bsd.cpp
--- a/src/os/bsd/vm/os_bsd.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os/bsd/vm/os_bsd.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -48,6 +48,7 @@
 #include "runtime/javaCalls.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/objectMonitor.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/osThread.hpp"
 #include "runtime/perfMemory.hpp"
 #include "runtime/sharedRuntime.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/os/bsd/vm/os_bsd.inline.hpp
--- a/src/os/bsd/vm/os_bsd.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os/bsd/vm/os_bsd.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -26,15 +26,9 @@
 #define OS_BSD_VM_OS_BSD_INLINE_HPP
 
 #include "runtime/atomic.inline.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/os.hpp"
 
-#ifdef TARGET_OS_ARCH_bsd_x86
-# include "orderAccess_bsd_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_bsd_zero
-# include "orderAccess_bsd_zero.inline.hpp"
-#endif
-
 // System includes
 
 #include <unistd.h>
diff -r f06c7b654d63 -r 03c5d509a811 src/os/bsd/vm/thread_bsd.inline.hpp
--- a/src/os/bsd/vm/thread_bsd.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os/bsd/vm/thread_bsd.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -29,20 +29,8 @@
 #error "This file should only be included from thread.inline.hpp"
 #endif
 
-#include "runtime/atomic.hpp"
-#include "runtime/prefetch.hpp"
 #include "runtime/thread.hpp"
 #include "runtime/threadLocalStorage.hpp"
-#ifdef TARGET_OS_ARCH_bsd_x86
-# include "atomic_bsd_x86.inline.hpp"
-# include "orderAccess_bsd_x86.inline.hpp"
-# include "prefetch_bsd_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_bsd_zero
-# include "atomic_bsd_zero.inline.hpp"
-# include "orderAccess_bsd_zero.inline.hpp"
-# include "prefetch_bsd_zero.inline.hpp"
-#endif
 
 // Contains inlined functions for class Thread and ThreadLocalStorage
 
diff -r f06c7b654d63 -r 03c5d509a811 src/os/linux/vm/os_linux.cpp
--- a/src/os/linux/vm/os_linux.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os/linux/vm/os_linux.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -49,6 +49,7 @@
 #include "runtime/javaCalls.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/objectMonitor.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/osThread.hpp"
 #include "runtime/perfMemory.hpp"
 #include "runtime/sharedRuntime.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/os/linux/vm/os_linux.inline.hpp
--- a/src/os/linux/vm/os_linux.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os/linux/vm/os_linux.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -26,24 +26,9 @@
 #define OS_LINUX_VM_OS_LINUX_INLINE_HPP
 
 #include "runtime/atomic.inline.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/os.hpp"
 
-#ifdef TARGET_OS_ARCH_linux_x86
-# include "orderAccess_linux_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_sparc
-# include "orderAccess_linux_sparc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_zero
-# include "orderAccess_linux_zero.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_arm
-# include "orderAccess_linux_arm.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_ppc
-# include "orderAccess_linux_ppc.inline.hpp"
-#endif
-
 // System includes
 
 #include <unistd.h>
diff -r f06c7b654d63 -r 03c5d509a811 src/os/linux/vm/thread_linux.inline.hpp
--- a/src/os/linux/vm/thread_linux.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os/linux/vm/thread_linux.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -29,35 +29,8 @@
 #error "This file should only be included from thread.inline.hpp"
 #endif
 
-#include "runtime/atomic.hpp"
-#include "runtime/prefetch.hpp"
 #include "runtime/thread.hpp"
 #include "runtime/threadLocalStorage.hpp"
-#ifdef TARGET_OS_ARCH_linux_x86
-# include "atomic_linux_x86.inline.hpp"
-# include "orderAccess_linux_x86.inline.hpp"
-# include "prefetch_linux_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_sparc
-# include "atomic_linux_sparc.inline.hpp"
-# include "orderAccess_linux_sparc.inline.hpp"
-# include "prefetch_linux_sparc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_zero
-# include "atomic_linux_zero.inline.hpp"
-# include "orderAccess_linux_zero.inline.hpp"
-# include "prefetch_linux_zero.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_arm
-# include "atomic_linux_arm.inline.hpp"
-# include "orderAccess_linux_arm.inline.hpp"
-# include "prefetch_linux_arm.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_ppc
-# include "atomic_linux_ppc.inline.hpp"
-# include "orderAccess_linux_ppc.inline.hpp"
-# include "prefetch_linux_ppc.inline.hpp"
-#endif
 
 // Contains inlined functions for class Thread and ThreadLocalStorage
 
diff -r f06c7b654d63 -r 03c5d509a811 src/os/solaris/dtrace/libjvm_db.c
--- a/src/os/solaris/dtrace/libjvm_db.c	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os/solaris/dtrace/libjvm_db.c	Tue Aug 19 20:41:28 2014 +0100
@@ -260,6 +260,9 @@
   uint64_t base;
   int err;
 
+  /* Clear *vmp now in case we jump to fail: */
+  memset(vmp, 0, sizeof(VMStructEntry));
+
   err = ps_pglobal_lookup(J->P, LIBJVM_SO, "gHotSpotVMStructs", &sym_addr);
   CHECK_FAIL(err);
   err = read_pointer(J, sym_addr, &gHotSpotVMStructs);
diff -r f06c7b654d63 -r 03c5d509a811 src/os/solaris/vm/os_solaris.cpp
--- a/src/os/solaris/vm/os_solaris.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os/solaris/vm/os_solaris.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -48,6 +48,7 @@
 #include "runtime/javaCalls.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/objectMonitor.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/osThread.hpp"
 #include "runtime/perfMemory.hpp"
 #include "runtime/sharedRuntime.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/os/solaris/vm/os_solaris.inline.hpp
--- a/src/os/solaris/vm/os_solaris.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os/solaris/vm/os_solaris.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -26,15 +26,9 @@
 #define OS_SOLARIS_VM_OS_SOLARIS_INLINE_HPP
 
 #include "runtime/atomic.inline.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/os.hpp"
 
-#ifdef TARGET_OS_ARCH_solaris_x86
-# include "orderAccess_solaris_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_solaris_sparc
-# include "orderAccess_solaris_sparc.inline.hpp"
-#endif
-
 // System includes
 #include <sys/param.h>
 #include <dlfcn.h>
diff -r f06c7b654d63 -r 03c5d509a811 src/os/solaris/vm/thread_solaris.inline.hpp
--- a/src/os/solaris/vm/thread_solaris.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os/solaris/vm/thread_solaris.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -29,20 +29,9 @@
 #error "This file should only be included from thread.inline.hpp"
 #endif
 
-#include "runtime/atomic.hpp"
-#include "runtime/prefetch.hpp"
+#include "runtime/atomic.inline.hpp"
 #include "runtime/thread.hpp"
 #include "runtime/threadLocalStorage.hpp"
-#ifdef TARGET_OS_ARCH_solaris_x86
-# include "atomic_solaris_x86.inline.hpp"
-# include "orderAccess_solaris_x86.inline.hpp"
-# include "prefetch_solaris_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_solaris_sparc
-# include "atomic_solaris_sparc.inline.hpp"
-# include "orderAccess_solaris_sparc.inline.hpp"
-# include "prefetch_solaris_sparc.inline.hpp"
-#endif
 
 // Thread::current is "hot" it's called > 128K times in the 1st 500 msecs of
 // startup.
diff -r f06c7b654d63 -r 03c5d509a811 src/os/windows/vm/os_windows.cpp
--- a/src/os/windows/vm/os_windows.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os/windows/vm/os_windows.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -51,6 +51,7 @@
 #include "runtime/javaCalls.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/objectMonitor.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/osThread.hpp"
 #include "runtime/perfMemory.hpp"
 #include "runtime/sharedRuntime.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/os/windows/vm/os_windows.inline.hpp
--- a/src/os/windows/vm/os_windows.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os/windows/vm/os_windows.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -26,12 +26,9 @@
 #define OS_WINDOWS_VM_OS_WINDOWS_INLINE_HPP
 
 #include "runtime/atomic.inline.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/os.hpp"
 
-#ifdef TARGET_OS_ARCH_windows_x86
-# include "orderAccess_windows_x86.inline.hpp"
-#endif
-
 inline const char* os::file_separator()                { return "\\"; }
 inline const char* os::line_separator()                { return "\r\n"; }
 inline const char* os::path_separator()                { return ";"; }
diff -r f06c7b654d63 -r 03c5d509a811 src/os/windows/vm/thread_windows.inline.hpp
--- a/src/os/windows/vm/thread_windows.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os/windows/vm/thread_windows.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -29,15 +29,8 @@
 #error "This file should only be included from thread.inline.hpp"
 #endif
 
-#include "runtime/atomic.hpp"
-#include "runtime/prefetch.hpp"
 #include "runtime/thread.hpp"
 #include "runtime/threadLocalStorage.hpp"
-#ifdef TARGET_OS_ARCH_windows_x86
-# include "atomic_windows_x86.inline.hpp"
-# include "orderAccess_windows_x86.inline.hpp"
-# include "prefetch_windows_x86.inline.hpp"
-#endif
 
 // Contains inlined functions for class Thread and ThreadLocalStorage
 
diff -r f06c7b654d63 -r 03c5d509a811 src/os_cpu/aix_ppc/vm/atomic_aix_ppc.inline.hpp
--- a/src/os_cpu/aix_ppc/vm/atomic_aix_ppc.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os_cpu/aix_ppc/vm/atomic_aix_ppc.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -26,7 +26,6 @@
 #ifndef OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_INLINE_HPP
 #define OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_INLINE_HPP
 
-#include "orderAccess_aix_ppc.inline.hpp"
 #include "runtime/atomic.hpp"
 #include "runtime/os.hpp"
 #include "vm_version_ppc.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/os_cpu/linux_ppc/vm/atomic_linux_ppc.inline.hpp
--- a/src/os_cpu/linux_ppc/vm/atomic_linux_ppc.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os_cpu/linux_ppc/vm/atomic_linux_ppc.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -26,7 +26,6 @@
 #ifndef OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_INLINE_HPP
 #define OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_INLINE_HPP
 
-#include "orderAccess_linux_ppc.inline.hpp"
 #include "runtime/atomic.hpp"
 #include "runtime/os.hpp"
 #include "vm_version_ppc.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp
--- a/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -118,7 +118,7 @@
       *ret_sp = os::Linux::ucontext_get_sp(uc);
     }
     if (ret_fp) {
-      *ret_fp = os::Linux::ucontext_get_fp(uc);
+      *ret_fp = (intptr_t*)NULL;
     }
   } else {
     // construct empty ExtendedPC for return value checking
@@ -136,18 +136,15 @@
 
 frame os::fetch_frame_from_context(void* ucVoid) {
   intptr_t* sp;
-  intptr_t* fp;
-  ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp);
-  return frame(sp, fp, epc.pc());
+  ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, NULL);
+  return frame(sp, frame::unpatchable, epc.pc());
 }
 
 frame os::get_sender_for_C_frame(frame* fr) {
-  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
+  return frame(fr->sender_sp(), frame::unpatchable, fr->sender_pc());
 }
 
 frame os::current_frame() {
-  fprintf(stderr, "current_frame()");
-
   intptr_t* sp = StubRoutines::Sparc::flush_callers_register_windows_func()();
   frame myframe(sp, frame::unpatchable,
                 CAST_FROM_FN_PTR(address, os::current_frame));
diff -r f06c7b654d63 -r 03c5d509a811 src/os_cpu/linux_sparc/vm/vm_version_linux_sparc.cpp
--- a/src/os_cpu/linux_sparc/vm/vm_version_linux_sparc.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os_cpu/linux_sparc/vm/vm_version_linux_sparc.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -55,7 +55,7 @@
 
   if (detect_niagara()) {
     NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Detected Linux on Niagara");)
-    features = niagara1_m;
+    features = niagara1_m | T_family_m;
   }
 
   return features;
diff -r f06c7b654d63 -r 03c5d509a811 src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp
--- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -137,6 +137,21 @@
 #endif
     if (av & AV_SPARC_AES)       features |= aes_instructions_m;
 
+#ifndef AV_SPARC_SHA1
+#define AV_SPARC_SHA1   0x00400000  /* sha1 instruction supported */
+#endif
+    if (av & AV_SPARC_SHA1)         features |= sha1_instruction_m;
+
+#ifndef AV_SPARC_SHA256
+#define AV_SPARC_SHA256 0x00800000  /* sha256 instruction supported */
+#endif
+    if (av & AV_SPARC_SHA256)       features |= sha256_instruction_m;
+
+#ifndef AV_SPARC_SHA512
+#define AV_SPARC_SHA512 0x01000000  /* sha512 instruction supported */
+#endif
+    if (av & AV_SPARC_SHA512)       features |= sha512_instruction_m;
+
   } else {
     // getisax(2) failed, use the old legacy code.
 #ifndef PRODUCT
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/c1/c1_LinearScan.cpp
--- a/src/share/vm/c1/c1_LinearScan.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/c1/c1_LinearScan.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1628,25 +1628,22 @@
   Interval* precolored_cpu_intervals, *not_precolored_cpu_intervals;
   Interval* precolored_fpu_intervals, *not_precolored_fpu_intervals;
 
-  create_unhandled_lists(&precolored_cpu_intervals, &not_precolored_cpu_intervals, is_precolored_cpu_interval, is_virtual_cpu_interval);
-  if (has_fpu_registers()) {
-    create_unhandled_lists(&precolored_fpu_intervals, &not_precolored_fpu_intervals, is_precolored_fpu_interval, is_virtual_fpu_interval);
-#ifdef ASSERT
-  } else {
-    // fpu register allocation is omitted because no virtual fpu registers are present
-    // just check this again...
-    create_unhandled_lists(&precolored_fpu_intervals, &not_precolored_fpu_intervals, is_precolored_fpu_interval, is_virtual_fpu_interval);
-    assert(not_precolored_fpu_intervals == Interval::end(), "missed an uncolored fpu interval");
-#endif
-  }
-
   // allocate cpu registers
+  create_unhandled_lists(&precolored_cpu_intervals, &not_precolored_cpu_intervals,
+                         is_precolored_cpu_interval, is_virtual_cpu_interval);
+
+  // allocate fpu registers
+  create_unhandled_lists(&precolored_fpu_intervals, &not_precolored_fpu_intervals,
+                         is_precolored_fpu_interval, is_virtual_fpu_interval);
+
+  // the fpu interval allocation cannot be moved down below with the fpu section as
+  // the cpu_lsw.walk() changes interval positions.
+
   LinearScanWalker cpu_lsw(this, precolored_cpu_intervals, not_precolored_cpu_intervals);
   cpu_lsw.walk();
   cpu_lsw.finish_allocation();
 
   if (has_fpu_registers()) {
-    // allocate fpu registers
     LinearScanWalker fpu_lsw(this, precolored_fpu_intervals, not_precolored_fpu_intervals);
     fpu_lsw.walk();
     fpu_lsw.finish_allocation();
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/c1/c1_Runtime1.cpp
--- a/src/share/vm/c1/c1_Runtime1.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/c1/c1_Runtime1.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1018,6 +1018,7 @@
               n_copy->set_data((intx) (load_klass()));
             } else {
               assert(mirror() != NULL, "klass not set");
+              // Don't need a G1 pre-barrier here since we assert above that data isn't an oop.
               n_copy->set_data(cast_from_oop<intx>(mirror()));
             }
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/ci/ciEnv.cpp
--- a/src/share/vm/ci/ciEnv.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/ci/ciEnv.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -51,6 +51,7 @@
 #include "runtime/init.hpp"
 #include "runtime/reflection.hpp"
 #include "runtime/sharedRuntime.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/dtrace.hpp"
 #include "utilities/macros.hpp"
 #ifdef COMPILER1
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/ci/ciEnv.hpp
--- a/src/share/vm/ci/ciEnv.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/ci/ciEnv.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -184,6 +184,10 @@
     }
   }
 
+  void ensure_metadata_alive(ciMetadata* m) {
+    _factory->ensure_metadata_alive(m);
+  }
+
   ciInstance* get_instance(oop o) {
     if (o == NULL) return NULL;
     return get_object(o)->as_instance();
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/ci/ciKlass.hpp
--- a/src/share/vm/ci/ciKlass.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/ci/ciKlass.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -43,6 +43,7 @@
   friend class ciMethod;
   friend class ciMethodData;
   friend class ciObjArrayKlass;
+  friend class ciReceiverTypeData;
 
 private:
   ciSymbol* _name;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/ci/ciMethodData.cpp
--- a/src/share/vm/ci/ciMethodData.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/ci/ciMethodData.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -170,6 +170,7 @@
     Klass* k = data->as_ReceiverTypeData()->receiver(row);
     if (k != NULL) {
       ciKlass* klass = CURRENT_ENV->get_klass(k);
+      CURRENT_ENV->ensure_metadata_alive(klass);
       set_receiver(row, klass);
     }
   }
@@ -191,6 +192,7 @@
 void ciSpeculativeTrapData::translate_from(const ProfileData* data) {
   Method* m = data->as_SpeculativeTrapData()->method();
   ciMethod* ci_m = CURRENT_ENV->get_method(m);
+  CURRENT_ENV->ensure_metadata_alive(ci_m);
   set_method(ci_m);
 }
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/ci/ciMethodData.hpp
--- a/src/share/vm/ci/ciMethodData.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/ci/ciMethodData.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -70,6 +70,7 @@
     Klass* v = TypeEntries::valid_klass(k);
     if (v != NULL) {
       ciKlass* klass = CURRENT_ENV->get_klass(v);
+      CURRENT_ENV->ensure_metadata_alive(klass);
       return with_status(klass, k);
     }
     return with_status(NULL, k);
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/ci/ciObjectFactory.cpp
--- a/src/share/vm/ci/ciObjectFactory.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/ci/ciObjectFactory.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -46,6 +46,9 @@
 #include "oops/oop.inline.hpp"
 #include "oops/oop.inline2.hpp"
 #include "runtime/fieldType.hpp"
+#if INCLUDE_ALL_GCS
+# include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#endif
 
 // ciObjectFactory
 //
@@ -374,6 +377,37 @@
   return NULL;
 }
 
+// ------------------------------------------------------------------
+// ciObjectFactory::ensure_metadata_alive
+//
+// Ensure that the metadata wrapped by the ciMetadata is kept alive by GC.
+// This is primarily useful for metadata which is considered as weak roots
+// by the GC but need to be strong roots if reachable from a current compilation.
+//
+void ciObjectFactory::ensure_metadata_alive(ciMetadata* m) {
+  ASSERT_IN_VM; // We're handling raw oops here.
+
+#if INCLUDE_ALL_GCS
+  if (!UseG1GC) {
+    return;
+  }
+  Klass* metadata_owner_klass;
+  if (m->is_klass()) {
+    metadata_owner_klass = m->as_klass()->get_Klass();
+  } else if (m->is_method()) {
+    metadata_owner_klass = m->as_method()->get_Method()->constants()->pool_holder();
+  } else {
+    fatal("Not implemented for other types of metadata");
+  }
+
+  oop metadata_holder = metadata_owner_klass->klass_holder();
+  if (metadata_holder != NULL) {
+    G1SATBCardTableModRefBS::enqueue(metadata_holder);
+  }
+
+#endif
+}
+
 //------------------------------------------------------------------
 // ciObjectFactory::get_unloaded_method
 //
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/ci/ciObjectFactory.hpp
--- a/src/share/vm/ci/ciObjectFactory.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/ci/ciObjectFactory.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -75,6 +75,8 @@
   ciObject* create_new_object(oop o);
   ciMetadata* create_new_object(Metadata* o);
 
+  void ensure_metadata_alive(ciMetadata* m);
+
   static bool is_equal(NonPermObject* p, oop key) {
     return p->object()->get_oop() == key;
   }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/classfile/classLoaderData.cpp
--- a/src/share/vm/classfile/classLoaderData.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/classfile/classLoaderData.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -73,7 +73,11 @@
 
 ClassLoaderData::ClassLoaderData(Handle h_class_loader, bool is_anonymous, Dependencies dependencies) :
   _class_loader(h_class_loader()),
-  _is_anonymous(is_anonymous), _keep_alive(is_anonymous), // initially
+  _is_anonymous(is_anonymous),
+  // An anonymous class loader data doesn't have anything to keep
+  // it from being unloaded during parsing of the anonymous class.
+  // The null-class-loader should always be kept alive.
+  _keep_alive(is_anonymous || h_class_loader.is_null()),
   _metaspace(NULL), _unloading(false), _klasses(NULL),
   _claimed(0), _jmethod_ids(NULL), _handles(NULL), _deallocate_list(NULL),
   _next(NULL), _dependencies(dependencies),
@@ -317,12 +321,45 @@
   }
 }
 
+#ifdef ASSERT
+class AllAliveClosure : public OopClosure {
+  BoolObjectClosure* _is_alive_closure;
+  bool _found_dead;
+ public:
+  AllAliveClosure(BoolObjectClosure* is_alive_closure) : _is_alive_closure(is_alive_closure), _found_dead(false) {}
+  template <typename T> void do_oop_work(T* p) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+      if (!_is_alive_closure->do_object_b(obj)) {
+        _found_dead = true;
+      }
+    }
+  }
+  void do_oop(oop* p)       { do_oop_work<oop>(p); }
+  void do_oop(narrowOop* p) { do_oop_work<narrowOop>(p); }
+  bool found_dead()         { return _found_dead; }
+};
+#endif
+
+oop ClassLoaderData::keep_alive_object() const {
+  assert(!keep_alive(), "Don't use with CLDs that are artificially kept alive");
+  return is_anonymous() ? _klasses->java_mirror() : class_loader();
+}
+
 bool ClassLoaderData::is_alive(BoolObjectClosure* is_alive_closure) const {
-  bool alive =
-    is_anonymous() ?
-       is_alive_closure->do_object_b(_klasses->java_mirror()) :
-       class_loader() == NULL || is_alive_closure->do_object_b(class_loader());
-  assert(!alive || claimed(), "must be claimed");
+  bool alive = keep_alive() // null class loader and incomplete anonymous klasses.
+      || is_alive_closure->do_object_b(keep_alive_object());
+
+#ifdef ASSERT
+  if (alive) {
+    AllAliveClosure all_alive_closure(is_alive_closure);
+    KlassToOopClosure klass_closure(&all_alive_closure);
+    const_cast<ClassLoaderData*>(this)->oops_do(&all_alive_closure, &klass_closure, false);
+    assert(!all_alive_closure.found_dead(), err_msg("Found dead oop in alive cld: " PTR_FORMAT, p2i(this)));
+  }
+#endif
+
   return alive;
 }
 
@@ -601,11 +638,36 @@
 
 void ClassLoaderDataGraph::always_strong_oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) {
   if (ClassUnloading) {
-    ClassLoaderData::the_null_class_loader_data()->oops_do(f, klass_closure, must_claim);
-    // keep any special CLDs alive.
-    ClassLoaderDataGraph::keep_alive_oops_do(f, klass_closure, must_claim);
+    keep_alive_oops_do(f, klass_closure, must_claim);
   } else {
-    ClassLoaderDataGraph::oops_do(f, klass_closure, must_claim);
+    oops_do(f, klass_closure, must_claim);
+  }
+}
+
+void ClassLoaderDataGraph::cld_do(CLDClosure* cl) {
+  for (ClassLoaderData* cld = _head; cl != NULL && cld != NULL; cld = cld->next()) {
+    cl->do_cld(cld);
+  }
+}
+
+void ClassLoaderDataGraph::roots_cld_do(CLDClosure* strong, CLDClosure* weak) {
+  for (ClassLoaderData* cld = _head;  cld != NULL; cld = cld->_next) {
+    CLDClosure* closure = cld->keep_alive() ? strong : weak;
+    if (closure != NULL) {
+      closure->do_cld(cld);
+    }
+  }
+}
+
+void ClassLoaderDataGraph::keep_alive_cld_do(CLDClosure* cl) {
+  roots_cld_do(cl, NULL);
+}
+
+void ClassLoaderDataGraph::always_strong_cld_do(CLDClosure* cl) {
+  if (ClassUnloading) {
+    keep_alive_cld_do(cl);
+  } else {
+    cld_do(cl);
   }
 }
 
@@ -660,6 +722,16 @@
   return array;
 }
 
+bool ClassLoaderDataGraph::unload_list_contains(const void* x) {
+  assert(SafepointSynchronize::is_at_safepoint(), "only safe to call at safepoint");
+  for (ClassLoaderData* cld = _unloading; cld != NULL; cld = cld->next()) {
+    if (cld->metaspace_or_null() != NULL && cld->metaspace_or_null()->contains(x)) {
+      return true;
+    }
+  }
+  return false;
+}
+
 #ifndef PRODUCT
 bool ClassLoaderDataGraph::contains_loader_data(ClassLoaderData* loader_data) {
   for (ClassLoaderData* data = _head; data != NULL; data = data->next()) {
@@ -689,7 +761,7 @@
   bool has_redefined_a_class = JvmtiExport::has_redefined_a_class();
   MetadataOnStackMark md_on_stack;
   while (data != NULL) {
-    if (data->keep_alive() || data->is_alive(is_alive_closure)) {
+    if (data->is_alive(is_alive_closure)) {
       if (has_redefined_a_class) {
         data->classes_do(InstanceKlass::purge_previous_versions);
       }
@@ -780,6 +852,60 @@
   return _rw_metaspace;
 }
 
+ClassLoaderDataGraphKlassIteratorAtomic::ClassLoaderDataGraphKlassIteratorAtomic()
+    : _next_klass(NULL) {
+  ClassLoaderData* cld = ClassLoaderDataGraph::_head;
+  Klass* klass = NULL;
+
+  // Find the first klass in the CLDG.
+  while (cld != NULL) {
+    klass = cld->_klasses;
+    if (klass != NULL) {
+      _next_klass = klass;
+      return;
+    }
+    cld = cld->next();
+  }
+}
+
+Klass* ClassLoaderDataGraphKlassIteratorAtomic::next_klass_in_cldg(Klass* klass) {
+  Klass* next = klass->next_link();
+  if (next != NULL) {
+    return next;
+  }
+
+  // No more klasses in the current CLD. Time to find a new CLD.
+  ClassLoaderData* cld = klass->class_loader_data();
+  while (next == NULL) {
+    cld = cld->next();
+    if (cld == NULL) {
+      break;
+    }
+    next = cld->_klasses;
+  }
+
+  return next;
+}
+
+Klass* ClassLoaderDataGraphKlassIteratorAtomic::next_klass() {
+  Klass* head = (Klass*)_next_klass;
+
+  while (head != NULL) {
+    Klass* next = next_klass_in_cldg(head);
+
+    Klass* old_head = (Klass*)Atomic::cmpxchg_ptr(next, &_next_klass, head);
+
+    if (old_head == head) {
+      return head; // Won the CAS.
+    }
+
+    head = old_head;
+  }
+
+  // Nothing more for the iterator to hand out.
+  assert(head == NULL, err_msg("head is " PTR_FORMAT ", expected not null:", p2i(head)));
+  return NULL;
+}
 
 ClassLoaderDataGraphMetaspaceIterator::ClassLoaderDataGraphMetaspaceIterator() {
   _data = ClassLoaderDataGraph::_head;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/classfile/classLoaderData.hpp
--- a/src/share/vm/classfile/classLoaderData.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/classfile/classLoaderData.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -31,7 +31,6 @@
 #include "memory/metaspaceCounters.hpp"
 #include "runtime/mutex.hpp"
 #include "utilities/growableArray.hpp"
-
 #if INCLUDE_TRACE
 # include "utilities/ticks.hpp"
 #endif
@@ -59,6 +58,7 @@
 class ClassLoaderDataGraph : public AllStatic {
   friend class ClassLoaderData;
   friend class ClassLoaderDataGraphMetaspaceIterator;
+  friend class ClassLoaderDataGraphKlassIteratorAtomic;
   friend class VMStructs;
  private:
   // All CLDs (except the null CLD) can be reached by walking _head->_next->...
@@ -75,9 +75,16 @@
   static ClassLoaderData* find_or_create(Handle class_loader, TRAPS);
   static void purge();
   static void clear_claimed_marks();
+  // oops do
   static void oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim);
+  static void keep_alive_oops_do(OopClosure* blk, KlassClosure* klass_closure, bool must_claim);
   static void always_strong_oops_do(OopClosure* blk, KlassClosure* klass_closure, bool must_claim);
-  static void keep_alive_oops_do(OopClosure* blk, KlassClosure* klass_closure, bool must_claim);
+  // cld do
+  static void cld_do(CLDClosure* cl);
+  static void roots_cld_do(CLDClosure* strong, CLDClosure* weak);
+  static void keep_alive_cld_do(CLDClosure* cl);
+  static void always_strong_cld_do(CLDClosure* cl);
+  // klass do
   static void classes_do(KlassClosure* klass_closure);
   static void classes_do(void f(Klass* const));
   static void loaded_classes_do(KlassClosure* klass_closure);
@@ -102,6 +109,7 @@
   static void dump() { dump_on(tty); }
   static void verify();
 
+  static bool unload_list_contains(const void* x);
 #ifndef PRODUCT
   static bool contains_loader_data(ClassLoaderData* loader_data);
 #endif
@@ -134,6 +142,7 @@
   };
 
   friend class ClassLoaderDataGraph;
+  friend class ClassLoaderDataGraphKlassIteratorAtomic;
   friend class ClassLoaderDataGraphMetaspaceIterator;
   friend class MetaDataFactory;
   friend class Method;
@@ -149,7 +158,7 @@
                            // classes in the class loader are allocated.
   Mutex* _metaspace_lock;  // Locks the metaspace for allocations and setup.
   bool _unloading;         // true if this class loader goes away
-  bool _keep_alive;        // if this CLD can be unloaded for anonymous loaders
+  bool _keep_alive;        // if this CLD is kept alive without a keep_alive_object().
   bool _is_anonymous;      // if this CLD is for an anonymous class
   volatile int _claimed;   // true if claimed, for example during GC traces.
                            // To avoid applying oop closure more than once.
@@ -195,7 +204,6 @@
 
   void unload();
   bool keep_alive() const       { return _keep_alive; }
-  bool is_alive(BoolObjectClosure* is_alive_closure) const;
   void classes_do(void f(Klass*));
   void loaded_classes_do(KlassClosure* klass_closure);
   void classes_do(void f(InstanceKlass*));
@@ -207,6 +215,9 @@
   MetaWord* allocate(size_t size);
 
  public:
+
+  bool is_alive(BoolObjectClosure* is_alive_closure) const;
+
   // Accessors
   Metaspace* metaspace_or_null() const     { return _metaspace; }
 
@@ -240,13 +251,16 @@
 
   oop class_loader() const      { return _class_loader; }
 
+  // The object the GC is using to keep this ClassLoaderData alive.
+  oop keep_alive_object() const;
+
   // Returns true if this class loader data is for a loader going away.
   bool is_unloading() const     {
     assert(!(is_the_null_class_loader_data() && _unloading), "The null class loader can never be unloaded");
     return _unloading;
   }
-  // Anonymous class loader data doesn't have anything to keep them from
-  // being unloaded during parsing the anonymous class.
+
+  // Used to make sure that this CLD is not unloaded.
   void set_keep_alive(bool value) { _keep_alive = value; }
 
   unsigned int identity_hash() {
@@ -287,6 +301,16 @@
   void initialize_shared_metaspaces();
 };
 
+// An iterator that distributes Klasses to parallel worker threads.
+class ClassLoaderDataGraphKlassIteratorAtomic : public StackObj {
+  volatile Klass* _next_klass;
+ public:
+  ClassLoaderDataGraphKlassIteratorAtomic();
+  Klass* next_klass();
+ private:
+  static Klass* next_klass_in_cldg(Klass* klass);
+};
+
 class ClassLoaderDataGraphMetaspaceIterator : public StackObj {
   ClassLoaderData* _data;
  public:
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/classfile/dictionary.cpp
--- a/src/share/vm/classfile/dictionary.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/classfile/dictionary.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -28,6 +28,7 @@
 #include "memory/iterator.hpp"
 #include "oops/oop.inline.hpp"
 #include "prims/jvmtiRedefineClassesTrace.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "utilities/hashtable.inline.hpp"
 
 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
@@ -198,6 +199,26 @@
   return class_was_unloaded;
 }
 
+void Dictionary::roots_oops_do(OopClosure* strong, OopClosure* weak) {
+  // Skip the strong roots probe marking if the closures are the same.
+  if (strong == weak) {
+    oops_do(strong);
+    return;
+  }
+
+  for (int index = 0; index < table_size(); index++) {
+    for (DictionaryEntry *probe = bucket(index);
+                          probe != NULL;
+                          probe = probe->next()) {
+      Klass* e = probe->klass();
+      ClassLoaderData* loader_data = probe->loader_data();
+      if (is_strongly_reachable(loader_data, e)) {
+        probe->set_strongly_reachable();
+      }
+    }
+  }
+  _pd_cache_table->roots_oops_do(strong, weak);
+}
 
 void Dictionary::always_strong_oops_do(OopClosure* blk) {
   // Follow all system classes and temporary placeholders in dictionary; only
@@ -489,6 +510,23 @@
   }
 }
 
+void ProtectionDomainCacheTable::roots_oops_do(OopClosure* strong, OopClosure* weak) {
+  for (int index = 0; index < table_size(); index++) {
+    for (ProtectionDomainCacheEntry* probe = bucket(index);
+                                     probe != NULL;
+                                     probe = probe->next()) {
+      if (probe->is_strongly_reachable()) {
+        probe->reset_strongly_reachable();
+        probe->oops_do(strong);
+      } else {
+        if (weak != NULL) {
+          probe->oops_do(weak);
+        }
+      }
+    }
+  }
+}
+
 uint ProtectionDomainCacheTable::bucket_size() {
   return sizeof(ProtectionDomainCacheEntry);
 }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/classfile/dictionary.hpp
--- a/src/share/vm/classfile/dictionary.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/classfile/dictionary.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -89,6 +89,7 @@
   // GC support
   void oops_do(OopClosure* f);
   void always_strong_oops_do(OopClosure* blk);
+  void roots_oops_do(OopClosure* strong, OopClosure* weak);
 
   void always_strong_classes_do(KlassClosure* closure);
 
@@ -218,6 +219,7 @@
   // GC support
   void oops_do(OopClosure* f);
   void always_strong_oops_do(OopClosure* f);
+  void roots_oops_do(OopClosure* strong, OopClosure* weak);
 
   static uint bucket_size();
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/classfile/javaClasses.cpp
--- a/src/share/vm/classfile/javaClasses.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/classfile/javaClasses.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -463,12 +463,11 @@
   return true;
 }
 
-void java_lang_String::print(Handle java_string, outputStream* st) {
-  oop          obj    = java_string();
-  assert(obj->klass() == SystemDictionary::String_klass(), "must be java_string");
-  typeArrayOop value  = java_lang_String::value(obj);
-  int          offset = java_lang_String::offset(obj);
-  int          length = java_lang_String::length(obj);
+void java_lang_String::print(oop java_string, outputStream* st) {
+  assert(java_string->klass() == SystemDictionary::String_klass(), "must be java_string");
+  typeArrayOop value  = java_lang_String::value(java_string);
+  int          offset = java_lang_String::offset(java_string);
+  int          length = java_lang_String::length(java_string);
 
   int end = MIN2(length, 100);
   if (value == NULL) {
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/classfile/javaClasses.hpp
--- a/src/share/vm/classfile/javaClasses.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/classfile/javaClasses.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -208,7 +208,7 @@
   }
 
   // Debugging
-  static void print(Handle java_string, outputStream* st);
+  static void print(oop java_string, outputStream* st);
   friend class JavaClasses;
 };
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/classfile/metadataOnStackMark.cpp
--- a/src/share/vm/classfile/metadataOnStackMark.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/classfile/metadataOnStackMark.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -47,8 +47,11 @@
   if (_marked_objects == NULL) {
     _marked_objects = new (ResourceObj::C_HEAP, mtClass) GrowableArray<Metadata*>(1000, true);
   }
+
   Threads::metadata_do(Metadata::mark_on_stack);
-  CodeCache::alive_nmethods_do(nmethod::mark_on_stack);
+  if (JvmtiExport::has_redefined_a_class()) {
+    CodeCache::alive_nmethods_do(nmethod::mark_on_stack);
+  }
   CompileBroker::mark_on_stack();
   JvmtiCurrentBreakpoints::metadata_do(Metadata::mark_on_stack);
   ThreadService::metadata_do(Metadata::mark_on_stack);
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/classfile/stackMapFrame.cpp
--- a/src/share/vm/classfile/stackMapFrame.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/classfile/stackMapFrame.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -54,21 +54,6 @@
   return frame;
 }
 
-bool StackMapFrame::has_new_object() const {
-  int32_t i;
-  for (i = 0; i < _max_locals; i++) {
-    if (_locals[i].is_uninitialized()) {
-      return true;
-    }
-  }
-  for (i = 0; i < _stack_size; i++) {
-    if (_stack[i].is_uninitialized()) {
-      return true;
-    }
-  }
-  return false;
-}
-
 void StackMapFrame::initialize_object(
     VerificationType old_object, VerificationType new_object) {
   int32_t i;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/classfile/stackMapFrame.hpp
--- a/src/share/vm/classfile/stackMapFrame.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/classfile/stackMapFrame.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -155,10 +155,6 @@
     const methodHandle m, VerificationType thisKlass, TRAPS);
 
   // Search local variable type array and stack type array.
-  // Return true if an uninitialized object is found.
-  bool has_new_object() const;
-
-  // Search local variable type array and stack type array.
   // Set every element with type of old_object to new_object.
   void initialize_object(
     VerificationType old_object, VerificationType new_object);
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/classfile/stackMapTable.cpp
--- a/src/share/vm/classfile/stackMapTable.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/classfile/stackMapTable.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -70,24 +70,26 @@
 
 bool StackMapTable::match_stackmap(
     StackMapFrame* frame, int32_t target,
-    bool match, bool update, ErrorContext* ctx, TRAPS) const {
+    bool match, bool update, bool handler, ErrorContext* ctx, TRAPS) const {
   int index = get_index_from_offset(target);
-  return match_stackmap(frame, target, index, match, update, ctx, THREAD);
+  return match_stackmap(frame, target, index, match, update, handler, ctx, THREAD);
 }
 
 // Match and/or update current_frame to the frame in stackmap table with
 // specified offset and frame index. Return true if the two frames match.
+// handler is true if the frame in stackmap_table is for an exception handler.
 //
-// The values of match and update are:                  _match__update_
+// The values of match and update are:                  _match__update__handler
 //
-// checking a branch target/exception handler:           true   false
+// checking a branch target:                             true   false   false
+// checking an exception handler:                        true   false   true
 // linear bytecode verification following an
-// unconditional branch:                                 false  true
+// unconditional branch:                                 false  true    false
 // linear bytecode verification not following an
-// unconditional branch:                                 true   true
+// unconditional branch:                                 true   true    false
 bool StackMapTable::match_stackmap(
     StackMapFrame* frame, int32_t target, int32_t frame_index,
-    bool match, bool update, ErrorContext* ctx, TRAPS) const {
+    bool match, bool update, bool handler, ErrorContext* ctx, TRAPS) const {
   if (frame_index < 0 || frame_index >= _frame_count) {
     *ctx = ErrorContext::missing_stackmap(frame->offset());
     frame->verifier()->verify_error(
@@ -98,11 +100,9 @@
   StackMapFrame *stackmap_frame = _frame_array[frame_index];
   bool result = true;
   if (match) {
-    // when checking handler target, match == true && update == false
-    bool is_exception_handler = !update;
     // Has direct control flow from last instruction, need to match the two
     // frames.
-    result = frame->is_assignable_to(stackmap_frame, is_exception_handler,
+    result = frame->is_assignable_to(stackmap_frame, handler,
         ctx, CHECK_VERIFY_(frame->verifier(), result));
   }
   if (update) {
@@ -126,24 +126,10 @@
     StackMapFrame* frame, int32_t target, TRAPS) const {
   ErrorContext ctx;
   bool match = match_stackmap(
-    frame, target, true, false, &ctx, CHECK_VERIFY(frame->verifier()));
+    frame, target, true, false, false, &ctx, CHECK_VERIFY(frame->verifier()));
   if (!match || (target < 0 || target >= _code_length)) {
     frame->verifier()->verify_error(ctx,
         "Inconsistent stackmap frames at branch target %d", target);
-    return;
-  }
-  // check if uninitialized objects exist on backward branches
-  check_new_object(frame, target, CHECK_VERIFY(frame->verifier()));
-  frame->verifier()->update_furthest_jump(target);
-}
-
-void StackMapTable::check_new_object(
-    const StackMapFrame* frame, int32_t target, TRAPS) const {
-  if (frame->offset() > target && frame->has_new_object()) {
-    frame->verifier()->verify_error(
-        ErrorContext::bad_code(frame->offset()),
-        "Uninitialized object exists on backward branch %d", target);
-    return;
   }
 }
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/classfile/stackMapTable.hpp
--- a/src/share/vm/classfile/stackMapTable.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/classfile/stackMapTable.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -74,12 +74,12 @@
   // specified offset. Return true if the two frames match.
   bool match_stackmap(
     StackMapFrame* current_frame, int32_t offset,
-    bool match, bool update, ErrorContext* ctx, TRAPS) const;
+    bool match, bool update, bool handler, ErrorContext* ctx, TRAPS) const;
   // Match and/or update current_frame to the frame in stackmap table with
   // specified offset and frame index. Return true if the two frames match.
   bool match_stackmap(
     StackMapFrame* current_frame, int32_t offset, int32_t frame_index,
-    bool match, bool update, ErrorContext* ctx, TRAPS) const;
+    bool match, bool update, bool handler, ErrorContext* ctx, TRAPS) const;
 
   // Check jump instructions. Make sure there are no uninitialized
   // instances on backward branch.
@@ -90,10 +90,6 @@
   // Returns the frame array index where the frame with offset is stored.
   int get_index_from_offset(int32_t offset) const;
 
-  // Make sure that there's no uninitialized object exist on backward branch.
-  void check_new_object(
-    const StackMapFrame* frame, int32_t target, TRAPS) const;
-
   void print_on(outputStream* str) const;
 };
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/classfile/symbolTable.cpp
--- a/src/share/vm/classfile/symbolTable.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/classfile/symbolTable.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -36,6 +36,7 @@
 #include "runtime/mutexLocker.hpp"
 #include "utilities/hashtable.inline.hpp"
 #if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc_implementation/g1/g1StringDedup.hpp"
 #endif
 
@@ -704,11 +705,26 @@
   return lookup(chars, length);
 }
 
+// Tell the GC that this string was looked up in the StringTable.
+static void ensure_string_alive(oop string) {
+  // A lookup in the StringTable could return an object that was previously
+  // considered dead. The SATB part of G1 needs to get notified about this
+  // potential resurrection, otherwise the marking might not find the object.
+#if INCLUDE_ALL_GCS
+  if (UseG1GC && string != NULL) {
+    G1SATBCardTableModRefBS::enqueue(string);
+  }
+#endif
+}
 
 oop StringTable::lookup(jchar* name, int len) {
   unsigned int hash = hash_string(name, len);
   int index = the_table()->hash_to_index(hash);
-  return the_table()->lookup(index, name, len, hash);
+  oop string = the_table()->lookup(index, name, len, hash);
+
+  ensure_string_alive(string);
+
+  return string;
 }
 
 
@@ -719,7 +735,10 @@
   oop found_string = the_table()->lookup(index, name, len, hashValue);
 
   // Found
-  if (found_string != NULL) return found_string;
+  if (found_string != NULL) {
+    ensure_string_alive(found_string);
+    return found_string;
+  }
 
   debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
   assert(!Universe::heap()->is_in_reserved(name),
@@ -744,11 +763,17 @@
 
   // Grab the StringTable_lock before getting the_table() because it could
   // change at safepoint.
-  MutexLocker ml(StringTable_lock, THREAD);
+  oop added_or_found;
+  {
+    MutexLocker ml(StringTable_lock, THREAD);
+    // Otherwise, add to symbol to table
+    added_or_found = the_table()->basic_add(index, string, name, len,
+                                  hashValue, CHECK_NULL);
+  }
 
-  // Otherwise, add to symbol to table
-  return the_table()->basic_add(index, string, name, len,
-                                hashValue, CHECK_NULL);
+  ensure_string_alive(added_or_found);
+
+  return added_or_found;
 }
 
 oop StringTable::intern(Symbol* symbol, TRAPS) {
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/classfile/systemDictionary.cpp
--- a/src/share/vm/classfile/systemDictionary.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/classfile/systemDictionary.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -52,6 +52,7 @@
 #include "runtime/java.hpp"
 #include "runtime/javaCalls.hpp"
 #include "runtime/mutexLocker.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/signature.hpp"
 #include "services/classLoadingService.hpp"
 #include "services/threadService.hpp"
@@ -1612,13 +1613,7 @@
 // system dictionary and follows the remaining classes' contents.
 
 void SystemDictionary::always_strong_oops_do(OopClosure* blk) {
-  blk->do_oop(&_java_system_loader);
-  blk->do_oop(&_system_loader_lock_obj);
-
-  dictionary()->always_strong_oops_do(blk);
-
-  // Visit extra methods
-  invoke_method_table()->oops_do(blk);
+  roots_oops_do(blk, NULL);
 }
 
 void SystemDictionary::always_strong_classes_do(KlassClosure* closure) {
@@ -1685,6 +1680,17 @@
   return unloading_occurred;
 }
 
+void SystemDictionary::roots_oops_do(OopClosure* strong, OopClosure* weak) {
+  strong->do_oop(&_java_system_loader);
+  strong->do_oop(&_system_loader_lock_obj);
+
+  // Adjust dictionary
+  dictionary()->roots_oops_do(strong, weak);
+
+  // Visit extra methods
+  invoke_method_table()->oops_do(strong);
+}
+
 void SystemDictionary::oops_do(OopClosure* f) {
   f->do_oop(&_java_system_loader);
   f->do_oop(&_system_loader_lock_obj);
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/classfile/systemDictionary.hpp
--- a/src/share/vm/classfile/systemDictionary.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/classfile/systemDictionary.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -335,6 +335,7 @@
 
   // Applies "f->do_oop" to all root oops in the system dictionary.
   static void oops_do(OopClosure* f);
+  static void roots_oops_do(OopClosure* strong, OopClosure* weak);
 
   // System loader lock
   static oop system_loader_lock()           { return _system_loader_lock_obj; }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/classfile/verifier.cpp
--- a/src/share/vm/classfile/verifier.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/classfile/verifier.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -43,7 +43,7 @@
 #include "runtime/handles.inline.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/javaCalls.hpp"
-#include "runtime/orderAccess.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/os.hpp"
 #ifdef TARGET_ARCH_x86
 # include "bytes_x86.hpp"
@@ -634,8 +634,6 @@
                                 // flow from current instruction to the next
                                 // instruction in sequence
 
-  set_furthest_jump(0);
-
   Bytecodes::Code opcode;
   while (!bcs.is_last_bytecode()) {
     // Check for recursive re-verification before each bytecode.
@@ -1794,7 +1792,7 @@
       // If matched, current_frame will be updated by this method.
       bool matches = stackmap_table->match_stackmap(
         current_frame, this_offset, stackmap_index,
-        !no_control_flow, true, &ctx, CHECK_VERIFY_(this, 0));
+        !no_control_flow, true, false, &ctx, CHECK_VERIFY_(this, 0));
       if (!matches) {
         // report type error
         verify_error(ctx, "Instruction type does not match stack map");
@@ -1841,7 +1839,7 @@
       }
       ErrorContext ctx;
       bool matches = stackmap_table->match_stackmap(
-        new_frame, handler_pc, true, false, &ctx, CHECK_VERIFY(this));
+        new_frame, handler_pc, true, false, true, &ctx, CHECK_VERIFY(this));
       if (!matches) {
         verify_error(ctx, "Stack map does not match the one at "
             "exception handler %d", handler_pc);
@@ -2252,13 +2250,6 @@
       return;
     }
 
-    // Make sure that this call is not jumped over.
-    if (bci < furthest_jump()) {
-      verify_error(ErrorContext::bad_code(bci),
-                   "Bad <init> method call from inside of a branch");
-      return;
-    }
-
     // Make sure that this call is not done from within a TRY block because
     // that can result in returning an incomplete object.  Simply checking
     // (bci >= start_pc) also ensures that this call is not done after a TRY
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/classfile/verifier.hpp
--- a/src/share/vm/classfile/verifier.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/classfile/verifier.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -258,9 +258,6 @@
 
   ErrorContext _error_context;  // contains information about an error
 
-  // Used to detect illegal jumps over calls to super() nd this() in ctors.
-  int32_t _furthest_jump;
-
   void verify_method(methodHandle method, TRAPS);
   char* generate_code_data(methodHandle m, u4 code_length, TRAPS);
   void verify_exception_handler_table(u4 code_length, char* code_data,
@@ -407,19 +404,6 @@
 
   TypeOrigin ref_ctx(const char* str, TRAPS);
 
-  // Keep track of the furthest branch done in a method to make sure that
-  // there are no branches over calls to super() or this() from inside of
-  // a constructor.
-  int32_t furthest_jump() { return _furthest_jump; }
-
-  void set_furthest_jump(int32_t target) {
-    _furthest_jump = target;
-  }
-
-  void update_furthest_jump(int32_t target) {
-    if (target > _furthest_jump) _furthest_jump = target;
-  }
-
 };
 
 inline int ClassVerifier::change_sig_to_verificationType(
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/classfile/vmSymbols.hpp
--- a/src/share/vm/classfile/vmSymbols.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/classfile/vmSymbols.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -789,6 +789,26 @@
    do_name(     decrypt_name,                                      "decrypt")                                           \
    do_signature(byteArray_int_int_byteArray_int_signature,         "([BII[BI)I")                                        \
                                                                                                                         \
+  /* support for sun.security.provider.SHA */                                                                           \
+  do_class(sun_security_provider_sha,                              "sun/security/provider/SHA")                         \
+  do_intrinsic(_sha_implCompress, sun_security_provider_sha, implCompress_name, implCompress_signature, F_R)            \
+   do_name(     implCompress_name,                                 "implCompress")                                      \
+   do_signature(implCompress_signature,                            "([BI)V")                                            \
+                                                                                                                        \
+  /* support for sun.security.provider.SHA2 */                                                                          \
+  do_class(sun_security_provider_sha2,                             "sun/security/provider/SHA2")                        \
+  do_intrinsic(_sha2_implCompress, sun_security_provider_sha2, implCompress_name, implCompress_signature, F_R)          \
+                                                                                                                        \
+  /* support for sun.security.provider.SHA5 */                                                                          \
+  do_class(sun_security_provider_sha5,                             "sun/security/provider/SHA5")                        \
+  do_intrinsic(_sha5_implCompress, sun_security_provider_sha5, implCompress_name, implCompress_signature, F_R)          \
+                                                                                                                        \
+  /* support for sun.security.provider.DigestBase */                                                                    \
+  do_class(sun_security_provider_digestbase,                       "sun/security/provider/DigestBase")                  \
+  do_intrinsic(_digestBase_implCompressMB, sun_security_provider_digestbase, implCompressMB_name, implCompressMB_signature, F_R)   \
+   do_name(     implCompressMB_name,                               "implCompressMultiBlock")                            \
+   do_signature(implCompressMB_signature,                          "([BII)I")                                           \
+                                                                                                                        \
   /* support for java.util.zip */                                                                                       \
   do_class(java_util_zip_CRC32,           "java/util/zip/CRC32")                                                        \
   do_intrinsic(_updateCRC32,               java_util_zip_CRC32,   update_name, int2_int_signature,               F_SN)  \
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/code/codeCache.cpp
--- a/src/share/vm/code/codeCache.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/code/codeCache.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -337,6 +337,11 @@
 // Walk the list of methods which might contain non-perm oops.
 void CodeCache::scavenge_root_nmethods_do(CodeBlobClosure* f) {
   assert_locked_or_safepoint(CodeCache_lock);
+
+  if (UseG1GC) {
+    return;
+  }
+
   debug_only(mark_scavenge_root_nmethods());
 
   for (nmethod* cur = scavenge_root_nmethods(); cur != NULL; cur = cur->scavenge_root_link()) {
@@ -362,6 +367,11 @@
 
 void CodeCache::add_scavenge_root_nmethod(nmethod* nm) {
   assert_locked_or_safepoint(CodeCache_lock);
+
+  if (UseG1GC) {
+    return;
+  }
+
   nm->set_on_scavenge_root_list();
   nm->set_scavenge_root_link(_scavenge_root_nmethods);
   set_scavenge_root_nmethods(nm);
@@ -370,6 +380,11 @@
 
 void CodeCache::drop_scavenge_root_nmethod(nmethod* nm) {
   assert_locked_or_safepoint(CodeCache_lock);
+
+  if (UseG1GC) {
+    return;
+  }
+
   print_trace("drop_scavenge_root", nm);
   nmethod* last = NULL;
   nmethod* cur = scavenge_root_nmethods();
@@ -391,6 +406,11 @@
 
 void CodeCache::prune_scavenge_root_nmethods() {
   assert_locked_or_safepoint(CodeCache_lock);
+
+  if (UseG1GC) {
+    return;
+  }
+
   debug_only(mark_scavenge_root_nmethods());
 
   nmethod* last = NULL;
@@ -423,6 +443,10 @@
 
 #ifndef PRODUCT
 void CodeCache::asserted_non_scavengable_nmethods_do(CodeBlobClosure* f) {
+  if (UseG1GC) {
+    return;
+  }
+
   // While we are here, verify the integrity of the list.
   mark_scavenge_root_nmethods();
   for (nmethod* cur = scavenge_root_nmethods(); cur != NULL; cur = cur->scavenge_root_link()) {
@@ -463,9 +487,36 @@
 }
 #endif //PRODUCT
 
+void CodeCache::verify_clean_inline_caches() {
+#ifdef ASSERT
+  FOR_ALL_ALIVE_BLOBS(cb) {
+    if (cb->is_nmethod()) {
+      nmethod* nm = (nmethod*)cb;
+      assert(!nm->is_unloaded(), "Tautology");
+      nm->verify_clean_inline_caches();
+      nm->verify();
+    }
+  }
+#endif
+}
+
+void CodeCache::verify_icholder_relocations() {
+#ifdef ASSERT
+  // make sure that we aren't leaking icholders
+  int count = 0;
+  FOR_ALL_BLOBS(cb) {
+    if (cb->is_nmethod()) {
+      nmethod* nm = (nmethod*)cb;
+      count += nm->verify_icholder_relocations();
+    }
+  }
+
+  assert(count + InlineCacheBuffer::pending_icholder_count() + CompiledICHolder::live_not_claimed_count() ==
+         CompiledICHolder::live_count(), "must agree");
+#endif
+}
 
 void CodeCache::gc_prologue() {
-  assert(!nmethod::oops_do_marking_is_active(), "oops_do_marking_epilogue must be called");
 }
 
 void CodeCache::gc_epilogue() {
@@ -478,41 +529,15 @@
         nm->cleanup_inline_caches();
       }
       DEBUG_ONLY(nm->verify());
-      nm->fix_oop_relocations();
+      DEBUG_ONLY(nm->verify_oop_relocations());
     }
   }
   set_needs_cache_clean(false);
   prune_scavenge_root_nmethods();
-  assert(!nmethod::oops_do_marking_is_active(), "oops_do_marking_prologue must be called");
 
-#ifdef ASSERT
-  // make sure that we aren't leaking icholders
-  int count = 0;
-  FOR_ALL_BLOBS(cb) {
-    if (cb->is_nmethod()) {
-      RelocIterator iter((nmethod*)cb);
-      while(iter.next()) {
-        if (iter.type() == relocInfo::virtual_call_type) {
-          if (CompiledIC::is_icholder_call_site(iter.virtual_call_reloc())) {
-            CompiledIC *ic = CompiledIC_at(iter.reloc());
-            if (TraceCompiledIC) {
-              tty->print("noticed icholder " INTPTR_FORMAT " ", p2i(ic->cached_icholder()));
-              ic->print();
-            }
-            assert(ic->cached_icholder() != NULL, "must be non-NULL");
-            count++;
-          }
-        }
-      }
-    }
-  }
-
-  assert(count + InlineCacheBuffer::pending_icholder_count() + CompiledICHolder::live_not_claimed_count() ==
-         CompiledICHolder::live_count(), "must agree");
-#endif
+  verify_icholder_relocations();
 }
 
-
 void CodeCache::verify_oops() {
   MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
   VerifyOopClosure voc;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/code/codeCache.hpp
--- a/src/share/vm/code/codeCache.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/code/codeCache.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -134,10 +134,6 @@
   // to) any unmarked codeBlobs in the cache.  Sets "marked_for_unloading"
   // to "true" iff some code got unloaded.
   static void do_unloading(BoolObjectClosure* is_alive, bool unloading_occurred);
-  static void oops_do(OopClosure* f) {
-    CodeBlobToOopClosure oopc(f, /*do_marking=*/ false);
-    blobs_do(&oopc);
-  }
   static void asserted_non_scavengable_nmethods_do(CodeBlobClosure* f = NULL) PRODUCT_RETURN;
   static void scavenge_root_nmethods_do(CodeBlobClosure* f);
 
@@ -172,6 +168,9 @@
   static void set_needs_cache_clean(bool v)      { _needs_cache_clean = v;    }
   static void clear_inline_caches();             // clear all inline caches
 
+  static void verify_clean_inline_caches();
+  static void verify_icholder_relocations();
+
   // Deoptimization
   static int  mark_for_deoptimization(DepChange& changes);
 #ifdef HOTSWAP
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/code/compiledIC.cpp
--- a/src/share/vm/code/compiledIC.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/code/compiledIC.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -99,13 +99,13 @@
   }
 
   {
-  MutexLockerEx pl(Patching_lock, Mutex::_no_safepoint_check_flag);
+    MutexLockerEx pl(SafepointSynchronize::is_at_safepoint() ? NULL : Patching_lock, Mutex::_no_safepoint_check_flag);
 #ifdef ASSERT
-  CodeBlob* cb = CodeCache::find_blob_unsafe(_ic_call);
-  assert(cb != NULL && cb->is_nmethod(), "must be nmethod");
+    CodeBlob* cb = CodeCache::find_blob_unsafe(_ic_call);
+    assert(cb != NULL && cb->is_nmethod(), "must be nmethod");
 #endif
-  _ic_call->set_destination_mt_safe(entry_point);
-}
+     _ic_call->set_destination_mt_safe(entry_point);
+  }
 
   if (is_optimized() || is_icstub) {
     // Optimized call sites don't have a cache value and ICStub call
@@ -159,6 +159,50 @@
 //-----------------------------------------------------------------------------
 // High-level access to an inline cache. Guaranteed to be MT-safe.
 
+void CompiledIC::initialize_from_iter(RelocIterator* iter) {
+  assert(iter->addr() == _ic_call->instruction_address(), "must find ic_call");
+
+  if (iter->type() == relocInfo::virtual_call_type) {
+    virtual_call_Relocation* r = iter->virtual_call_reloc();
+    _is_optimized = false;
+    _value = nativeMovConstReg_at(r->cached_value());
+  } else {
+    assert(iter->type() == relocInfo::opt_virtual_call_type, "must be a virtual call");
+    _is_optimized = true;
+    _value = NULL;
+  }
+}
+
+CompiledIC::CompiledIC(nmethod* nm, NativeCall* call)
+  : _ic_call(call)
+{
+  address ic_call = _ic_call->instruction_address();
+
+  assert(ic_call != NULL, "ic_call address must be set");
+  assert(nm != NULL, "must pass nmethod");
+  assert(nm->contains(ic_call), "must be in nmethod");
+
+  // Search for the ic_call at the given address.
+  RelocIterator iter(nm, ic_call, ic_call+1);
+  bool ret = iter.next();
+  assert(ret == true, "relocInfo must exist at this address");
+  assert(iter.addr() == ic_call, "must find ic_call");
+
+  initialize_from_iter(&iter);
+}
+
+CompiledIC::CompiledIC(RelocIterator* iter)
+  : _ic_call(nativeCall_at(iter->addr()))
+{
+  address ic_call = _ic_call->instruction_address();
+
+  nmethod* nm = iter->code();
+  assert(ic_call != NULL, "ic_call address must be set");
+  assert(nm != NULL, "must pass nmethod");
+  assert(nm->contains(ic_call), "must be in nmethod");
+
+  initialize_from_iter(iter);
+}
 
 bool CompiledIC::set_to_megamorphic(CallInfo* call_info, Bytecodes::Code bytecode, TRAPS) {
   assert(CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "");
@@ -485,7 +529,7 @@
 void CompiledStaticCall::set_to_clean() {
   assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
   // Reset call site
-  MutexLockerEx pl(Patching_lock, Mutex::_no_safepoint_check_flag);
+  MutexLockerEx pl(SafepointSynchronize::is_at_safepoint() ? NULL : Patching_lock, Mutex::_no_safepoint_check_flag);
 #ifdef ASSERT
   CodeBlob* cb = CodeCache::find_blob_unsafe(this);
   assert(cb != NULL && cb->is_nmethod(), "must be nmethod");
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/code/compiledIC.hpp
--- a/src/share/vm/code/compiledIC.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/code/compiledIC.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -150,6 +150,9 @@
   bool          _is_optimized;  // an optimized virtual call (i.e., no compiled IC)
 
   CompiledIC(nmethod* nm, NativeCall* ic_call);
+  CompiledIC(RelocIterator* iter);
+
+  void initialize_from_iter(RelocIterator* iter);
 
   static bool is_icholder_entry(address entry);
 
@@ -183,6 +186,7 @@
   friend CompiledIC* CompiledIC_before(nmethod* nm, address return_addr);
   friend CompiledIC* CompiledIC_at(nmethod* nm, address call_site);
   friend CompiledIC* CompiledIC_at(Relocation* call_site);
+  friend CompiledIC* CompiledIC_at(RelocIterator* reloc_iter);
 
   // This is used to release CompiledICHolder*s from nmethods that
   // are about to be freed.  The callsite might contain other stale
@@ -263,6 +267,13 @@
   return c_ic;
 }
 
+inline CompiledIC* CompiledIC_at(RelocIterator* reloc_iter) {
+  assert(reloc_iter->type() == relocInfo::virtual_call_type ||
+      reloc_iter->type() == relocInfo::opt_virtual_call_type, "wrong reloc. info");
+  CompiledIC* c_ic = new CompiledIC(reloc_iter);
+  c_ic->verify();
+  return c_ic;
+}
 
 //-----------------------------------------------------------------------------
 // The CompiledStaticCall represents a call to a static method in the compiled
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/code/dependencies.cpp
--- a/src/share/vm/code/dependencies.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/code/dependencies.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -32,6 +32,7 @@
 #include "oops/oop.inline.hpp"
 #include "runtime/handles.hpp"
 #include "runtime/handles.inline.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/copy.hpp"
 
 
@@ -406,56 +407,66 @@
 // for the sake of the compiler log, print out current dependencies:
 void Dependencies::log_all_dependencies() {
   if (log() == NULL)  return;
-  ciBaseObject* args[max_arg_count];
+  ResourceMark rm;
   for (int deptv = (int)FIRST_TYPE; deptv < (int)TYPE_LIMIT; deptv++) {
     DepType dept = (DepType)deptv;
     GrowableArray<ciBaseObject*>* deps = _deps[dept];
-    if (deps->length() == 0)  continue;
+    int deplen = deps->length();
+    if (deplen == 0) {
+      continue;
+    }
     int stride = dep_args(dept);
+    GrowableArray<ciBaseObject*>* ciargs = new GrowableArray<ciBaseObject*>(stride);
     for (int i = 0; i < deps->length(); i += stride) {
       for (int j = 0; j < stride; j++) {
         // flush out the identities before printing
-        args[j] = deps->at(i+j);
+        ciargs->push(deps->at(i+j));
       }
-      write_dependency_to(log(), dept, stride, args);
+      write_dependency_to(log(), dept, ciargs);
+      ciargs->clear();
     }
+    guarantee(deplen == deps->length(), "deps array cannot grow inside nested ResoureMark scope");
   }
 }
 
 void Dependencies::write_dependency_to(CompileLog* log,
                                        DepType dept,
-                                       int nargs, DepArgument args[],
+                                       GrowableArray<DepArgument>* args,
                                        Klass* witness) {
   if (log == NULL) {
     return;
   }
+  ResourceMark rm;
   ciEnv* env = ciEnv::current();
-  ciBaseObject* ciargs[max_arg_count];
-  assert(nargs <= max_arg_count, "oob");
-  for (int j = 0; j < nargs; j++) {
-    if (args[j].is_oop()) {
-      ciargs[j] = env->get_object(args[j].oop_value());
+  GrowableArray<ciBaseObject*>* ciargs = new GrowableArray<ciBaseObject*>(args->length());
+  for (GrowableArrayIterator<DepArgument> it = args->begin(); it != args->end(); ++it) {
+    DepArgument arg = *it;
+    if (arg.is_oop()) {
+      ciargs->push(env->get_object(arg.oop_value()));
     } else {
-      ciargs[j] = env->get_metadata(args[j].metadata_value());
+      ciargs->push(env->get_metadata(arg.metadata_value()));
     }
   }
-  Dependencies::write_dependency_to(log, dept, nargs, ciargs, witness);
+  int argslen = ciargs->length();
+  Dependencies::write_dependency_to(log, dept, ciargs, witness);
+  guarantee(argslen == ciargs->length(), "ciargs array cannot grow inside nested ResoureMark scope");
 }
 
 void Dependencies::write_dependency_to(CompileLog* log,
                                        DepType dept,
-                                       int nargs, ciBaseObject* args[],
+                                       GrowableArray<ciBaseObject*>* args,
                                        Klass* witness) {
-  if (log == NULL)  return;
-  assert(nargs <= max_arg_count, "oob");
-  int argids[max_arg_count];
-  int ctxkj = dep_context_arg(dept);  // -1 if no context arg
-  int j;
-  for (j = 0; j < nargs; j++) {
-    if (args[j]->is_object()) {
-      argids[j] = log->identify(args[j]->as_object());
+  if (log == NULL) {
+    return;
+  }
+  ResourceMark rm;
+  GrowableArray<int>* argids = new GrowableArray<int>(args->length());
+  for (GrowableArrayIterator<ciBaseObject*> it = args->begin(); it != args->end(); ++it) {
+    ciBaseObject* obj = *it;
+    if (obj->is_object()) {
+      argids->push(log->identify(obj->as_object()));
     } else {
-      argids[j] = log->identify(args[j]->as_metadata());
+      argids->push(log->identify(obj->as_metadata()));
     }
   }
   if (witness != NULL) {
@@ -464,16 +475,17 @@
     log->begin_elem("dependency");
   }
   log->print(" type='%s'", dep_name(dept));
-  if (ctxkj >= 0) {
-    log->print(" ctxk='%d'", argids[ctxkj]);
+  const int ctxkj = dep_context_arg(dept);  // -1 if no context arg
+  if (ctxkj >= 0 && ctxkj < argids->length()) {
+    log->print(" ctxk='%d'", argids->at(ctxkj));
   }
   // write remaining arguments, if any.
-  for (j = 0; j < nargs; j++) {
+  for (int j = 0; j < argids->length(); j++) {
     if (j == ctxkj)  continue;  // already logged
     if (j == 1) {
-      log->print(  " x='%d'",    argids[j]);
+      log->print(  " x='%d'",    argids->at(j));
     } else {
-      log->print(" x%d='%d'", j, argids[j]);
+      log->print(" x%d='%d'", j, argids->at(j));
     }
   }
   if (witness != NULL) {
@@ -485,9 +497,12 @@
 
 void Dependencies::write_dependency_to(xmlStream* xtty,
                                        DepType dept,
-                                       int nargs, DepArgument args[],
+                                       GrowableArray<DepArgument>* args,
                                        Klass* witness) {
-  if (xtty == NULL)  return;
+  if (xtty == NULL) {
+    return;
+  }
+  ResourceMark rm;
   ttyLocker ttyl;
   int ctxkj = dep_context_arg(dept);  // -1 if no context arg
   if (witness != NULL) {
@@ -497,23 +512,24 @@
   }
   xtty->print(" type='%s'", dep_name(dept));
   if (ctxkj >= 0) {
-    xtty->object("ctxk", args[ctxkj].metadata_value());
+    xtty->object("ctxk", args->at(ctxkj).metadata_value());
   }
   // write remaining arguments, if any.
-  for (int j = 0; j < nargs; j++) {
+  for (int j = 0; j < args->length(); j++) {
     if (j == ctxkj)  continue;  // already logged
+    DepArgument arg = args->at(j);
     if (j == 1) {
-      if (args[j].is_oop()) {
-        xtty->object("x", args[j].oop_value());
+      if (arg.is_oop()) {
+        xtty->object("x", arg.oop_value());
       } else {
-        xtty->object("x", args[j].metadata_value());
+        xtty->object("x", arg.metadata_value());
       }
     } else {
       char xn[10]; sprintf(xn, "x%d", j);
-      if (args[j].is_oop()) {
-        xtty->object(xn, args[j].oop_value());
+      if (arg.is_oop()) {
+        xtty->object(xn, arg.oop_value());
       } else {
-        xtty->object(xn, args[j].metadata_value());
+        xtty->object(xn, arg.metadata_value());
       }
     }
   }
@@ -524,7 +540,7 @@
   xtty->end_elem();
 }
 
-void Dependencies::print_dependency(DepType dept, int nargs, DepArgument args[],
+void Dependencies::print_dependency(DepType dept, GrowableArray<DepArgument>* args,
                                     Klass* witness) {
   ResourceMark rm;
   ttyLocker ttyl;   // keep the following output all in one block
@@ -533,8 +549,8 @@
                 dep_name(dept));
   // print arguments
   int ctxkj = dep_context_arg(dept);  // -1 if no context arg
-  for (int j = 0; j < nargs; j++) {
-    DepArgument arg = args[j];
+  for (int j = 0; j < args->length(); j++) {
+    DepArgument arg = args->at(j);
     bool put_star = false;
     if (arg.is_null())  continue;
     const char* what;
@@ -570,31 +586,33 @@
 void Dependencies::DepStream::log_dependency(Klass* witness) {
   if (_deps == NULL && xtty == NULL)  return;  // fast cutout for runtime
   ResourceMark rm;
-  int nargs = argument_count();
-  DepArgument args[max_arg_count];
+  const int nargs = argument_count();
+  GrowableArray<DepArgument>* args = new GrowableArray<DepArgument>(nargs);
   for (int j = 0; j < nargs; j++) {
     if (type() == call_site_target_value) {
-      args[j] = argument_oop(j);
+      args->push(argument_oop(j));
     } else {
-      args[j] = argument(j);
+      args->push(argument(j));
     }
   }
+  int argslen = args->length();
   if (_deps != NULL && _deps->log() != NULL) {
-    Dependencies::write_dependency_to(_deps->log(),
-                                      type(), nargs, args, witness);
+    Dependencies::write_dependency_to(_deps->log(), type(), args, witness);
   } else {
-    Dependencies::write_dependency_to(xtty,
-                                      type(), nargs, args, witness);
+    Dependencies::write_dependency_to(xtty, type(), args, witness);
   }
+  guarantee(argslen == args->length(), "args array cannot grow inside nested ResoureMark scope");
 }
 
 void Dependencies::DepStream::print_dependency(Klass* witness, bool verbose) {
+  ResourceMark rm;
   int nargs = argument_count();
-  DepArgument args[max_arg_count];
+  GrowableArray<DepArgument>* args = new GrowableArray<DepArgument>(nargs);
   for (int j = 0; j < nargs; j++) {
-    args[j] = argument(j);
+    args->push(argument(j));
   }
-  Dependencies::print_dependency(type(), nargs, args, witness);
+  int argslen = args->length();
+  Dependencies::print_dependency(type(), args, witness);
   if (verbose) {
     if (_code != NULL) {
       tty->print("  code: ");
@@ -602,6 +620,7 @@
       tty->cr();
     }
   }
+  guarantee(argslen == args->length(), "args array cannot grow inside nested ResoureMark scope");
 }
 
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/code/dependencies.hpp
--- a/src/share/vm/code/dependencies.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/code/dependencies.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -368,20 +368,36 @@
   void copy_to(nmethod* nm);
 
   void log_all_dependencies();
-  void log_dependency(DepType dept, int nargs, ciBaseObject* args[]) {
-    write_dependency_to(log(), dept, nargs, args);
+
+  void log_dependency(DepType dept, GrowableArray<ciBaseObject*>* args) {
+    ResourceMark rm;
+    int argslen = args->length();
+    write_dependency_to(log(), dept, args);
+    guarantee(argslen == args->length(),
+              "args array cannot grow inside nested ResoureMark scope");
   }
+
   void log_dependency(DepType dept,
                       ciBaseObject* x0,
                       ciBaseObject* x1 = NULL,
                       ciBaseObject* x2 = NULL) {
-    if (log() == NULL)  return;
-    ciBaseObject* args[max_arg_count];
-    args[0] = x0;
-    args[1] = x1;
-    args[2] = x2;
-    assert(2 < max_arg_count, "");
-    log_dependency(dept, dep_args(dept), args);
+    if (log() == NULL) {
+      return;
+    }
+    ResourceMark rm;
+    GrowableArray<ciBaseObject*>* ciargs =
+                new GrowableArray<ciBaseObject*>(dep_args(dept));
+    assert (x0 != NULL, "no log x0");
+    ciargs->push(x0);
+
+    if (x1 != NULL) {
+      ciargs->push(x1);
+    }
+    if (x2 != NULL) {
+      ciargs->push(x2);
+    }
+    assert(ciargs->length() == dep_args(dept), "");
+    log_dependency(dept, ciargs);
   }
 
   class DepArgument : public ResourceObj {
@@ -404,20 +420,8 @@
     Metadata* metadata_value() const { assert(!_is_oop && _valid, "must be"); return (Metadata*) _value; }
   };
 
-  static void write_dependency_to(CompileLog* log,
-                                  DepType dept,
-                                  int nargs, ciBaseObject* args[],
-                                  Klass* witness = NULL);
-  static void write_dependency_to(CompileLog* log,
-                                  DepType dept,
-                                  int nargs, DepArgument args[],
-                                  Klass* witness = NULL);
-  static void write_dependency_to(xmlStream* xtty,
-                                  DepType dept,
-                                  int nargs, DepArgument args[],
-                                  Klass* witness = NULL);
   static void print_dependency(DepType dept,
-                               int nargs, DepArgument args[],
+                               GrowableArray<DepArgument>* args,
                                Klass* witness = NULL);
 
  private:
@@ -426,6 +430,18 @@
 
   static Klass* ctxk_encoded_as_null(DepType dept, Metadata* x);
 
+  static void write_dependency_to(CompileLog* log,
+                                  DepType dept,
+                                  GrowableArray<ciBaseObject*>* args,
+                                  Klass* witness = NULL);
+  static void write_dependency_to(CompileLog* log,
+                                  DepType dept,
+                                  GrowableArray<DepArgument>* args,
+                                  Klass* witness = NULL);
+  static void write_dependency_to(xmlStream* xtty,
+                                  DepType dept,
+                                  GrowableArray<DepArgument>* args,
+                                  Klass* witness = NULL);
  public:
   // Use this to iterate over an nmethod's dependency set.
   // Works on new and old dependency sets.
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/code/nmethod.cpp
--- a/src/share/vm/code/nmethod.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/code/nmethod.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -37,6 +37,7 @@
 #include "oops/methodData.hpp"
 #include "prims/jvmtiRedefineClassesTrace.hpp"
 #include "prims/jvmtiImpl.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/sweeper.hpp"
 #include "utilities/dtrace.hpp"
@@ -48,6 +49,8 @@
 
 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
 
+unsigned char nmethod::_global_unloading_clock = 0;
+
 #ifdef DTRACE_ENABLED
 
 // Only bother with this argument setup if dtrace is available
@@ -383,27 +386,30 @@
   set_exception_cache(new_entry);
 }
 
-void nmethod::remove_from_exception_cache(ExceptionCache* ec) {
+void nmethod::clean_exception_cache(BoolObjectClosure* is_alive) {
   ExceptionCache* prev = NULL;
   ExceptionCache* curr = exception_cache();
-  assert(curr != NULL, "nothing to remove");
-  // find the previous and next entry of ec
-  while (curr != ec) {
-    prev = curr;
-    curr = curr->next();
-    assert(curr != NULL, "ExceptionCache not found");
+
+  while (curr != NULL) {
+    ExceptionCache* next = curr->next();
+
+    Klass* ex_klass = curr->exception_type();
+    if (ex_klass != NULL && !ex_klass->is_loader_alive(is_alive)) {
+      if (prev == NULL) {
+        set_exception_cache(next);
+      } else {
+        prev->set_next(next);
+      }
+      delete curr;
+      // prev stays the same.
+    } else {
+      prev = curr;
+    }
+
+    curr = next;
   }
-  // now: curr == ec
-  ExceptionCache* next = curr->next();
-  if (prev == NULL) {
-    set_exception_cache(next);
-  } else {
-    prev->set_next(next);
-  }
-  delete curr;
 }
 
-
 // public method for accessing the exception cache
 // These are the public access methods.
 address nmethod::handler_for_exception_and_pc(Handle exception, address pc) {
@@ -462,6 +468,7 @@
 // Fill in default values for various flag fields
 void nmethod::init_defaults() {
   _state                      = in_use;
+  _unloading_clock            = 0;
   _marked_for_reclamation     = 0;
   _has_flushed_dependencies   = 0;
   _has_unsafe_access          = 0;
@@ -480,7 +487,11 @@
   _oops_do_mark_link       = NULL;
   _jmethod_id              = NULL;
   _osr_link                = NULL;
-  _scavenge_root_link      = NULL;
+  if (UseG1GC) {
+    _unloading_next        = NULL;
+  } else {
+    _scavenge_root_link    = NULL;
+  }
   _scavenge_root_state     = 0;
   _compiler                = NULL;
 #if INCLUDE_RTM_OPT
@@ -688,8 +699,10 @@
     _hotness_counter         = NMethodSweeper::hotness_counter_reset_val();
 
     code_buffer->copy_values_to(this);
-    if (ScavengeRootsInCode && detect_scavenge_root_oops()) {
-      CodeCache::add_scavenge_root_nmethod(this);
+    if (ScavengeRootsInCode) {
+      if (detect_scavenge_root_oops()) {
+        CodeCache::add_scavenge_root_nmethod(this);
+      }
       Universe::heap()->register_nmethod(this);
     }
     debug_only(verify_scavenge_root_oops());
@@ -773,8 +786,10 @@
     _hotness_counter         = NMethodSweeper::hotness_counter_reset_val();
 
     code_buffer->copy_values_to(this);
-    if (ScavengeRootsInCode && detect_scavenge_root_oops()) {
-      CodeCache::add_scavenge_root_nmethod(this);
+    if (ScavengeRootsInCode) {
+      if (detect_scavenge_root_oops()) {
+        CodeCache::add_scavenge_root_nmethod(this);
+      }
       Universe::heap()->register_nmethod(this);
     }
     DEBUG_ONLY(verify_scavenge_root_oops();)
@@ -889,8 +904,10 @@
     code_buffer->copy_values_to(this);
     debug_info->copy_to(this);
     dependencies->copy_to(this);
-    if (ScavengeRootsInCode && detect_scavenge_root_oops()) {
-      CodeCache::add_scavenge_root_nmethod(this);
+    if (ScavengeRootsInCode) {
+      if (detect_scavenge_root_oops()) {
+        CodeCache::add_scavenge_root_nmethod(this);
+      }
       Universe::heap()->register_nmethod(this);
     }
     debug_only(verify_scavenge_root_oops());
@@ -1156,7 +1173,7 @@
     switch(iter.type()) {
       case relocInfo::virtual_call_type:
       case relocInfo::opt_virtual_call_type: {
-        CompiledIC *ic = CompiledIC_at(iter.reloc());
+        CompiledIC *ic = CompiledIC_at(&iter);
         // Ok, to lookup references to zombies here
         CodeBlob *cb = CodeCache::find_blob_unsafe(ic->ic_destination());
         if( cb != NULL && cb->is_nmethod() ) {
@@ -1180,6 +1197,77 @@
   }
 }
 
+void nmethod::verify_clean_inline_caches() {
+  assert_locked_or_safepoint(CompiledIC_lock);
+
+  // If the method is not entrant or zombie then a JMP is plastered over the
+  // first few bytes.  If an oop in the old code was there, that oop
+  // should not get GC'd.  Skip the first few bytes of oops on
+  // not-entrant methods.
+  address low_boundary = verified_entry_point();
+  if (!is_in_use()) {
+    low_boundary += NativeJump::instruction_size;
+    // %%% Note:  On SPARC we patch only a 4-byte trap, not a full NativeJump.
+    // This means that the low_boundary is going to be a little too high.
+    // This shouldn't matter, since oops of non-entrant methods are never used.
+    // In fact, why are we bothering to look at oops in a non-entrant method??
+  }
+
+  ResourceMark rm;
+  RelocIterator iter(this, low_boundary);
+  while(iter.next()) {
+    switch(iter.type()) {
+      case relocInfo::virtual_call_type:
+      case relocInfo::opt_virtual_call_type: {
+        CompiledIC *ic = CompiledIC_at(&iter);
+        // Ok, to lookup references to zombies here
+        CodeBlob *cb = CodeCache::find_blob_unsafe(ic->ic_destination());
+        if( cb != NULL && cb->is_nmethod() ) {
+          nmethod* nm = (nmethod*)cb;
+          // Verify that inline caches pointing to both zombie and not_entrant methods are clean
+          if (!nm->is_in_use() || (nm->method()->code() != nm)) {
+            assert(ic->is_clean(), "IC should be clean");
+          }
+        }
+        break;
+      }
+      case relocInfo::static_call_type: {
+        CompiledStaticCall *csc = compiledStaticCall_at(iter.reloc());
+        CodeBlob *cb = CodeCache::find_blob_unsafe(csc->destination());
+        if( cb != NULL && cb->is_nmethod() ) {
+          nmethod* nm = (nmethod*)cb;
+          // Verify that inline caches pointing to both zombie and not_entrant methods are clean
+          if (!nm->is_in_use() || (nm->method()->code() != nm)) {
+            assert(csc->is_clean(), "IC should be clean");
+          }
+        }
+        break;
+      }
+    }
+  }
+}
+
+int nmethod::verify_icholder_relocations() {
+  int count = 0;
+
+  RelocIterator iter(this);
+  while(iter.next()) {
+    if (iter.type() == relocInfo::virtual_call_type) {
+      if (CompiledIC::is_icholder_call_site(iter.virtual_call_reloc())) {
+        CompiledIC *ic = CompiledIC_at(&iter);
+        if (TraceCompiledIC) {
+          tty->print("noticed icholder " INTPTR_FORMAT " ", p2i(ic->cached_icholder()));
+          ic->print();
+        }
+        assert(ic->cached_icholder() != NULL, "must be non-NULL");
+        count++;
+      }
+    }
+  }
+
+  return count;
+}
+
 // This is a private interface with the sweeper.
 void nmethod::mark_as_seen_on_stack() {
   assert(is_alive(), "Must be an alive method");
@@ -1212,6 +1300,23 @@
   mdo->inc_decompile_count();
 }
 
+void nmethod::increase_unloading_clock() {
+  _global_unloading_clock++;
+  if (_global_unloading_clock == 0) {
+    // _nmethods are allocated with _unloading_clock == 0,
+    // so 0 is never used as a clock value.
+    _global_unloading_clock = 1;
+  }
+}
+
+void nmethod::set_unloading_clock(unsigned char unloading_clock) {
+  OrderAccess::release_store((volatile jubyte*)&_unloading_clock, unloading_clock);
+}
+
+unsigned char nmethod::unloading_clock() {
+  return (unsigned char)OrderAccess::load_acquire((volatile jubyte*)&_unloading_clock);
+}
+
 void nmethod::make_unloaded(BoolObjectClosure* is_alive, oop cause) {
 
   post_compiled_method_unload();
@@ -1257,6 +1362,10 @@
     // for later on.
     CodeCache::set_needs_cache_clean(true);
   }
+
+  // Unregister must be done before the state change
+  Universe::heap()->unregister_nmethod(this);
+
   _state = unloaded;
 
   // Log the unloading.
@@ -1611,6 +1720,35 @@
   set_unload_reported();
 }
 
+void static clean_ic_if_metadata_is_dead(CompiledIC *ic, BoolObjectClosure *is_alive) {
+  if (ic->is_icholder_call()) {
+    // The only exception is compiledICHolder oops which may
+    // yet be marked below. (We check this further below).
+    CompiledICHolder* cichk_oop = ic->cached_icholder();
+    if (cichk_oop->holder_method()->method_holder()->is_loader_alive(is_alive) &&
+        cichk_oop->holder_klass()->is_loader_alive(is_alive)) {
+      return;
+    }
+  } else {
+    Metadata* ic_oop = ic->cached_metadata();
+    if (ic_oop != NULL) {
+      if (ic_oop->is_klass()) {
+        if (((Klass*)ic_oop)->is_loader_alive(is_alive)) {
+          return;
+        }
+      } else if (ic_oop->is_method()) {
+        if (((Method*)ic_oop)->method_holder()->is_loader_alive(is_alive)) {
+          return;
+        }
+      } else {
+        ShouldNotReachHere();
+      }
+    }
+  }
+
+  ic->set_to_clean();
+}
+
 // This is called at the end of the strong tracing/marking phase of a
 // GC to unload an nmethod if it contains otherwise unreachable
 // oops.
@@ -1643,15 +1781,7 @@
   }
 
   // Exception cache
-  ExceptionCache* ec = exception_cache();
-  while (ec != NULL) {
-    Klass* ex_klass = ec->exception_type();
-    ExceptionCache* next_ec = ec->next();
-    if (ex_klass != NULL && !ex_klass->is_loader_alive(is_alive)) {
-      remove_from_exception_cache(ec);
-    }
-    ec = next_ec;
-  }
+  clean_exception_cache(is_alive);
 
   // If class unloading occurred we first iterate over all inline caches and
   // clear ICs where the cached oop is referring to an unloaded klass or method.
@@ -1661,32 +1791,8 @@
     RelocIterator iter(this, low_boundary);
     while(iter.next()) {
       if (iter.type() == relocInfo::virtual_call_type) {
-        CompiledIC *ic = CompiledIC_at(iter.reloc());
-        if (ic->is_icholder_call()) {
-          // The only exception is compiledICHolder oops which may
-          // yet be marked below. (We check this further below).
-          CompiledICHolder* cichk_oop = ic->cached_icholder();
-          if (cichk_oop->holder_method()->method_holder()->is_loader_alive(is_alive) &&
-              cichk_oop->holder_klass()->is_loader_alive(is_alive)) {
-            continue;
-          }
-        } else {
-          Metadata* ic_oop = ic->cached_metadata();
-          if (ic_oop != NULL) {
-            if (ic_oop->is_klass()) {
-              if (((Klass*)ic_oop)->is_loader_alive(is_alive)) {
-                continue;
-              }
-            } else if (ic_oop->is_method()) {
-              if (((Method*)ic_oop)->method_holder()->is_loader_alive(is_alive)) {
-                continue;
-              }
-            } else {
-              ShouldNotReachHere();
-            }
-          }
-        }
-        ic->set_to_clean();
+        CompiledIC *ic = CompiledIC_at(&iter);
+        clean_ic_if_metadata_is_dead(ic, is_alive);
       }
     }
   }
@@ -1724,6 +1830,175 @@
   verify_metadata_loaders(low_boundary, is_alive);
 }
 
+template <class CompiledICorStaticCall>
+static bool clean_if_nmethod_is_unloaded(CompiledICorStaticCall *ic, address addr, BoolObjectClosure *is_alive, nmethod* from) {
+  // Ok, to lookup references to zombies here
+  CodeBlob *cb = CodeCache::find_blob_unsafe(addr);
+  if (cb != NULL && cb->is_nmethod()) {
+    nmethod* nm = (nmethod*)cb;
+
+    if (nm->unloading_clock() != nmethod::global_unloading_clock()) {
+      // The nmethod has not been processed yet.
+      return true;
+    }
+
+    // Clean inline caches pointing to both zombie and not_entrant methods
+    if (!nm->is_in_use() || (nm->method()->code() != nm)) {
+      ic->set_to_clean();
+      assert(ic->is_clean(), err_msg("nmethod " PTR_FORMAT "not clean %s", from, from->method()->name_and_sig_as_C_string()));
+    }
+  }
+
+  return false;
+}
+
+static bool clean_if_nmethod_is_unloaded(CompiledIC *ic, BoolObjectClosure *is_alive, nmethod* from) {
+  return clean_if_nmethod_is_unloaded(ic, ic->ic_destination(), is_alive, from);
+}
+
+static bool clean_if_nmethod_is_unloaded(CompiledStaticCall *csc, BoolObjectClosure *is_alive, nmethod* from) {
+  return clean_if_nmethod_is_unloaded(csc, csc->destination(), is_alive, from);
+}
+
+bool nmethod::do_unloading_parallel(BoolObjectClosure* is_alive, bool unloading_occurred) {
+  ResourceMark rm;
+
+  // Make sure the oop's ready to receive visitors
+  assert(!is_zombie() && !is_unloaded(),
+         "should not call follow on zombie or unloaded nmethod");
+
+  // If the method is not entrant then a JMP is plastered over the
+  // first few bytes.  If an oop in the old code was there, that oop
+  // should not get GC'd.  Skip the first few bytes of oops on
+  // not-entrant methods.
+  address low_boundary = verified_entry_point();
+  if (is_not_entrant()) {
+    low_boundary += NativeJump::instruction_size;
+    // %%% Note:  On SPARC we patch only a 4-byte trap, not a full NativeJump.
+    // (See comment above.)
+  }
+
+  // The RedefineClasses() API can cause the class unloading invariant
+  // to no longer be true. See jvmtiExport.hpp for details.
+  // Also, leave a debugging breadcrumb in local flag.
+  bool a_class_was_redefined = JvmtiExport::has_redefined_a_class();
+  if (a_class_was_redefined) {
+    // This set of the unloading_occurred flag is done before the
+    // call to post_compiled_method_unload() so that the unloading
+    // of this nmethod is reported.
+    unloading_occurred = true;
+  }
+
+  // Exception cache
+  clean_exception_cache(is_alive);
+
+  bool is_unloaded = false;
+  bool postponed = false;
+
+  RelocIterator iter(this, low_boundary);
+  while(iter.next()) {
+
+    switch (iter.type()) {
+
+    case relocInfo::virtual_call_type:
+      if (unloading_occurred) {
+        // If class unloading occurred we first iterate over all inline caches and
+        // clear ICs where the cached oop is referring to an unloaded klass or method.
+        clean_ic_if_metadata_is_dead(CompiledIC_at(&iter), is_alive);
+      }
+
+      postponed |= clean_if_nmethod_is_unloaded(CompiledIC_at(&iter), is_alive, this);
+      break;
+
+    case relocInfo::opt_virtual_call_type:
+      postponed |= clean_if_nmethod_is_unloaded(CompiledIC_at(&iter), is_alive, this);
+      break;
+
+    case relocInfo::static_call_type:
+      postponed |= clean_if_nmethod_is_unloaded(compiledStaticCall_at(iter.reloc()), is_alive, this);
+      break;
+
+    case relocInfo::oop_type:
+      if (!is_unloaded) {
+        // Unload check
+        oop_Relocation* r = iter.oop_reloc();
+        // Traverse those oops directly embedded in the code.
+        // Other oops (oop_index>0) are seen as part of scopes_oops.
+        assert(1 == (r->oop_is_immediate()) +
+                  (r->oop_addr() >= oops_begin() && r->oop_addr() < oops_end()),
+              "oop must be found in exactly one place");
+        if (r->oop_is_immediate() && r->oop_value() != NULL) {
+          if (can_unload(is_alive, r->oop_addr(), unloading_occurred)) {
+            is_unloaded = true;
+          }
+        }
+      }
+      break;
+
+    }
+  }
+
+  if (is_unloaded) {
+    return postponed;
+  }
+
+  // Scopes
+  for (oop* p = oops_begin(); p < oops_end(); p++) {
+    if (*p == Universe::non_oop_word())  continue;  // skip non-oops
+    if (can_unload(is_alive, p, unloading_occurred)) {
+      is_unloaded = true;
+      break;
+    }
+  }
+
+  if (is_unloaded) {
+    return postponed;
+  }
+
+  // Ensure that all metadata is still alive
+  verify_metadata_loaders(low_boundary, is_alive);
+
+  return postponed;
+}
+
+void nmethod::do_unloading_parallel_postponed(BoolObjectClosure* is_alive, bool unloading_occurred) {
+  ResourceMark rm;
+
+  // Make sure the oop's ready to receive visitors
+  assert(!is_zombie(),
+         "should not call follow on zombie nmethod");
+
+  // If the method is not entrant then a JMP is plastered over the
+  // first few bytes.  If an oop in the old code was there, that oop
+  // should not get GC'd.  Skip the first few bytes of oops on
+  // not-entrant methods.
+  address low_boundary = verified_entry_point();
+  if (is_not_entrant()) {
+    low_boundary += NativeJump::instruction_size;
+    // %%% Note:  On SPARC we patch only a 4-byte trap, not a full NativeJump.
+    // (See comment above.)
+  }
+
+  RelocIterator iter(this, low_boundary);
+  while(iter.next()) {
+
+    switch (iter.type()) {
+
+    case relocInfo::virtual_call_type:
+      clean_if_nmethod_is_unloaded(CompiledIC_at(&iter), is_alive, this);
+      break;
+
+    case relocInfo::opt_virtual_call_type:
+      clean_if_nmethod_is_unloaded(CompiledIC_at(&iter), is_alive, this);
+      break;
+
+    case relocInfo::static_call_type:
+      clean_if_nmethod_is_unloaded(compiledStaticCall_at(iter.reloc()), is_alive, this);
+      break;
+    }
+  }
+}
+
 #ifdef ASSERT
 
 class CheckClass : AllStatic {
@@ -1770,7 +2045,7 @@
     // compiled code is maintaining a link to dead metadata.
     address static_call_addr = NULL;
     if (iter.type() == relocInfo::opt_virtual_call_type) {
-      CompiledIC* cic = CompiledIC_at(iter.reloc());
+      CompiledIC* cic = CompiledIC_at(&iter);
       if (!cic->is_call_to_interpreted()) {
         static_call_addr = iter.addr();
       }
@@ -1822,7 +2097,7 @@
         }
       } else if (iter.type() == relocInfo::virtual_call_type) {
         // Check compiledIC holders associated with this nmethod
-        CompiledIC *ic = CompiledIC_at(iter.reloc());
+        CompiledIC *ic = CompiledIC_at(&iter);
         if (ic->is_icholder_call()) {
           CompiledICHolder* cichk = ic->cached_icholder();
           f(cichk->holder_method());
@@ -1940,7 +2215,7 @@
     assert(cur != NULL, "not NULL-terminated");
     nmethod* next = cur->_oops_do_mark_link;
     cur->_oops_do_mark_link = NULL;
-    cur->fix_oop_relocations();
+    cur->verify_oop_relocations();
     NOT_PRODUCT(if (TraceScavenge)  cur->print_on(tty, "oops_do, unmark"));
     cur = next;
   }
@@ -2482,6 +2757,10 @@
 };
 
 void nmethod::verify_scavenge_root_oops() {
+  if (UseG1GC) {
+    return;
+  }
+
   if (!on_scavenge_root_list()) {
     // Actually look inside, to verify the claim that it's clean.
     DebugScavengeRoot debug_scavenge_root(this);
@@ -2925,7 +3204,7 @@
     case relocInfo::virtual_call_type:
     case relocInfo::opt_virtual_call_type: {
       VerifyMutexLocker mc(CompiledIC_lock);
-      CompiledIC_at(iter.reloc())->print();
+      CompiledIC_at(&iter)->print();
       break;
     }
     case relocInfo::static_call_type:
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/code/nmethod.hpp
--- a/src/share/vm/code/nmethod.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/code/nmethod.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -69,7 +69,12 @@
   friend class VMStructs;
  private:
   enum { cache_size = 4 };
-  PcDesc* _pc_descs[cache_size]; // last cache_size pc_descs found
+  // The array elements MUST be volatile! Several threads may modify
+  // and read from the cache concurrently. find_pc_desc_internal has
+  // returned wrong results. C++ compiler (namely xlC12) may duplicate
+  // C++ field accesses if the elements are not volatile.
+  typedef PcDesc* PcDescPtr;
+  volatile PcDescPtr _pc_descs[cache_size]; // last cache_size pc_descs found
  public:
   PcDescCache() { debug_only(_pc_descs[0] = NULL); }
   void    reset_to(PcDesc* initial_pc_desc);
@@ -111,6 +116,11 @@
   friend class NMethodSweeper;
   friend class CodeCache;  // scavengable oops
  private:
+
+  // GC support to help figure out if an nmethod has been
+  // cleaned/unloaded by the current GC.
+  static unsigned char _global_unloading_clock;
+
   // Shared fields for all nmethod's
   Method*   _method;
   int       _entry_bci;        // != InvocationEntryBci if this nmethod is an on-stack replacement method
@@ -118,7 +128,13 @@
 
   // To support simple linked-list chaining of nmethods:
   nmethod*  _osr_link;         // from InstanceKlass::osr_nmethods_head
-  nmethod*  _scavenge_root_link; // from CodeCache::scavenge_root_nmethods
+
+  union {
+    // Used by G1 to chain nmethods.
+    nmethod* _unloading_next;
+    // Used by non-G1 GCs to chain nmethods.
+    nmethod* _scavenge_root_link; // from CodeCache::scavenge_root_nmethods
+  };
 
   static nmethod* volatile _oops_do_mark_nmethods;
   nmethod*        volatile _oops_do_mark_link;
@@ -180,6 +196,8 @@
   // Protected by Patching_lock
   volatile unsigned char _state;             // {alive, not_entrant, zombie, unloaded}
 
+  volatile unsigned char _unloading_clock;   // Incremented after GC unloaded/cleaned the nmethod
+
 #ifdef ASSERT
   bool _oops_are_stale;  // indicates that it's no longer safe to access oops section
 #endif
@@ -437,6 +455,15 @@
   bool  unload_reported()                         { return _unload_reported; }
   void  set_unload_reported()                     { _unload_reported = true; }
 
+  void set_unloading_next(nmethod* next)          { _unloading_next = next; }
+  nmethod* unloading_next()                       { return _unloading_next; }
+
+  static unsigned char global_unloading_clock()   { return _global_unloading_clock; }
+  static void increase_unloading_clock();
+
+  void set_unloading_clock(unsigned char unloading_clock);
+  unsigned char unloading_clock();
+
   bool  is_marked_for_deoptimization() const      { return _marked_for_deoptimization; }
   void  mark_for_deoptimization()                 { _marked_for_deoptimization = true; }
 
@@ -529,7 +556,7 @@
   void set_exception_cache(ExceptionCache *ec)    { _exception_cache = ec; }
   address handler_for_exception_and_pc(Handle exception, address pc);
   void add_handler_for_exception_and_pc(Handle exception, address pc, address handler);
-  void remove_from_exception_cache(ExceptionCache* ec);
+  void clean_exception_cache(BoolObjectClosure* is_alive);
 
   // implicit exceptions support
   address continuation_for_implicit_exception(address pc);
@@ -552,6 +579,10 @@
     return (addr >= code_begin() && addr < verified_entry_point());
   }
 
+  // Verify calls to dead methods have been cleaned.
+  void verify_clean_inline_caches();
+  // Verify and count cached icholder relocations.
+  int  verify_icholder_relocations();
   // Check that all metadata is still alive
   void verify_metadata_loaders(address low_boundary, BoolObjectClosure* is_alive);
 
@@ -577,6 +608,10 @@
 
   // GC support
   void do_unloading(BoolObjectClosure* is_alive, bool unloading_occurred);
+  //  The parallel versions are used by G1.
+  bool do_unloading_parallel(BoolObjectClosure* is_alive, bool unloading_occurred);
+  void do_unloading_parallel_postponed(BoolObjectClosure* is_alive, bool unloading_occurred);
+  //  Unload a nmethod if the *root object is dead.
   bool can_unload(BoolObjectClosure* is_alive, oop* root, bool unloading_occurred);
 
   void preserve_callee_argument_oops(frame fr, const RegisterMap *reg_map,
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/compiler/methodLiveness.cpp
--- a/src/share/vm/compiler/methodLiveness.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/compiler/methodLiveness.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -475,7 +475,7 @@
     bci = 0;
   }
 
-  MethodLivenessResult answer((uintptr_t*)NULL,0);
+  MethodLivenessResult answer((BitMap::bm_word_t*)NULL,0);
 
   if (_block_count > 0) {
     if (TimeLivenessAnalysis) _time_total.start();
@@ -1000,7 +1000,7 @@
 }
 
 MethodLivenessResult MethodLiveness::BasicBlock::get_liveness_at(ciMethod* method, int bci) {
-  MethodLivenessResult answer(NEW_RESOURCE_ARRAY(uintptr_t, _analyzer->bit_map_size_words()),
+  MethodLivenessResult answer(NEW_RESOURCE_ARRAY(BitMap::bm_word_t, _analyzer->bit_map_size_words()),
                 _analyzer->bit_map_size_bits());
   answer.set_is_valid();
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/compiler/oopMap.cpp
--- a/src/share/vm/compiler/oopMap.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/compiler/oopMap.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -467,7 +467,6 @@
   assert(cb != NULL, "no codeblob");
 
   // Any reg might be saved by a safepoint handler (see generate_handler_blob).
-  const int max_saved_on_entry_reg_count = ConcreteRegisterImpl::number_of_registers;
   assert( reg_map->_update_for_id == NULL || fr->is_older(reg_map->_update_for_id),
          "already updated this map; do not 'update' it twice!" );
   debug_only(reg_map->_update_for_id = fr->id());
@@ -477,27 +476,20 @@
           !cb->caller_must_gc_arguments(reg_map->thread())),
          "include_argument_oops should already be set");
 
-  int nof_callee = 0;
-  oop*        locs[2*max_saved_on_entry_reg_count+1];
-  VMReg regs[2*max_saved_on_entry_reg_count+1];
-  // ("+1" because max_saved_on_entry_reg_count might be zero)
-
   // Scan through oopmap and find location of all callee-saved registers
   // (we do not do update in place, since info could be overwritten)
 
   address pc = fr->pc();
-
   OopMap* map  = cb->oop_map_for_return_address(pc);
-
-  assert(map != NULL, " no ptr map found");
+  assert(map != NULL, "no ptr map found");
+  DEBUG_ONLY(int nof_callee = 0;)
 
-  OopMapValue omv;
-  for(OopMapStream oms(map,OopMapValue::callee_saved_value); !oms.is_done(); oms.next()) {
-    omv = oms.current();
-    assert(nof_callee < 2*max_saved_on_entry_reg_count, "overflow");
-    regs[nof_callee] = omv.content_reg();
-    locs[nof_callee] = fr->oopmapreg_to_location(omv.reg(),reg_map);
-    nof_callee++;
+  for (OopMapStream oms(map, OopMapValue::callee_saved_value); !oms.is_done(); oms.next()) {
+    OopMapValue omv = oms.current();
+    VMReg reg = omv.content_reg();
+    oop* loc = fr->oopmapreg_to_location(omv.reg(), reg_map);
+    reg_map->set_location(reg, (address) loc);
+    DEBUG_ONLY(nof_callee++;)
   }
 
   // Check that runtime stubs save all callee-saved registers
@@ -506,11 +498,6 @@
          (nof_callee >= SAVED_ON_ENTRY_REG_COUNT || nof_callee >= C_SAVED_ON_ENTRY_REG_COUNT),
          "must save all");
 #endif // COMPILER2
-
-  // Copy found callee-saved register to reg_map
-  for(int i = 0; i < nof_callee; i++) {
-    reg_map->set_location(regs[i], (address)locs[i]);
-  }
 }
 
 //=============================================================================
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.cpp
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -158,7 +158,7 @@
                  " coal_deaths(" SIZE_FORMAT ")"
                  " + count(" SSIZE_FORMAT ")",
                  p2i(this), size(), _allocation_stats.prev_sweep(), _allocation_stats.split_births(),
-                 _allocation_stats.split_births(), _allocation_stats.split_deaths(),
+                 _allocation_stats.coal_births(), _allocation_stats.split_deaths(),
                  _allocation_stats.coal_deaths(), count()));
 }
 #endif
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.hpp
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_CMSOOPCLOSURES_HPP
 
 #include "memory/genOopClosures.hpp"
+#include "memory/iterator.hpp"
 
 /////////////////////////////////////////////////////////////////
 // Closures used by ConcurrentMarkSweepGeneration's collector
@@ -48,33 +49,13 @@
     }                                                     \
   }
 
-// Applies the given oop closure to all oops in all klasses visited.
-class CMKlassClosure : public KlassClosure {
-  friend class CMSOopClosure;
-  friend class CMSOopsInGenClosure;
-
-  OopClosure* _oop_closure;
-
-  // Used when _oop_closure couldn't be set in an initialization list.
-  void initialize(OopClosure* oop_closure) {
-    assert(_oop_closure == NULL, "Should only be called once");
-    _oop_closure = oop_closure;
-  }
+// TODO: This duplication of the MetadataAwareOopClosure class is only needed
+//       because some CMS OopClosures derive from OopsInGenClosure. It would be
+//       good to get rid of them completely.
+class MetadataAwareOopsInGenClosure: public OopsInGenClosure {
+  KlassToOopClosure _klass_closure;
  public:
-  CMKlassClosure(OopClosure* oop_closure = NULL) : _oop_closure(oop_closure) { }
-
-  void do_klass(Klass* k);
-};
-
-// The base class for all CMS marking closures.
-// It's used to proxy through the metadata to the oops defined in them.
-class CMSOopClosure: public ExtendedOopClosure {
-  CMKlassClosure      _klass_closure;
- public:
-  CMSOopClosure() : ExtendedOopClosure() {
-    _klass_closure.initialize(this);
-  }
-  CMSOopClosure(ReferenceProcessor* rp) : ExtendedOopClosure(rp) {
+  MetadataAwareOopsInGenClosure() {
     _klass_closure.initialize(this);
   }
 
@@ -87,26 +68,7 @@
   virtual void do_class_loader_data(ClassLoaderData* cld);
 };
 
-// TODO: This duplication of the CMSOopClosure class is only needed because
-//       some CMS OopClosures derive from OopsInGenClosure. It would be good
-//       to get rid of them completely.
-class CMSOopsInGenClosure: public OopsInGenClosure {
-  CMKlassClosure _klass_closure;
- public:
-  CMSOopsInGenClosure() {
-    _klass_closure.initialize(this);
-  }
-
-  virtual bool do_metadata()    { return do_metadata_nv(); }
-  inline  bool do_metadata_nv() { return true; }
-
-  virtual void do_klass(Klass* k);
-  void do_klass_nv(Klass* k);
-
-  virtual void do_class_loader_data(ClassLoaderData* cld);
-};
-
-class MarkRefsIntoClosure: public CMSOopsInGenClosure {
+class MarkRefsIntoClosure: public MetadataAwareOopsInGenClosure {
  private:
   const MemRegion _span;
   CMSBitMap*      _bitMap;
@@ -122,7 +84,7 @@
   }
 };
 
-class Par_MarkRefsIntoClosure: public CMSOopsInGenClosure {
+class Par_MarkRefsIntoClosure: public MetadataAwareOopsInGenClosure {
  private:
   const MemRegion _span;
   CMSBitMap*      _bitMap;
@@ -140,7 +102,7 @@
 
 // A variant of the above used in certain kinds of CMS
 // marking verification.
-class MarkRefsIntoVerifyClosure: public CMSOopsInGenClosure {
+class MarkRefsIntoVerifyClosure: public MetadataAwareOopsInGenClosure {
  private:
   const MemRegion _span;
   CMSBitMap*      _verification_bm;
@@ -159,7 +121,7 @@
 };
 
 // The non-parallel version (the parallel version appears further below).
-class PushAndMarkClosure: public CMSOopClosure {
+class PushAndMarkClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector* _collector;
   MemRegion     _span;
@@ -193,7 +155,7 @@
 // synchronization (for instance, via CAS). The marking stack
 // used in the non-parallel case above is here replaced with
 // an OopTaskQueue structure to allow efficient work stealing.
-class Par_PushAndMarkClosure: public CMSOopClosure {
+class Par_PushAndMarkClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector* _collector;
   MemRegion     _span;
@@ -218,7 +180,7 @@
 };
 
 // The non-parallel version (the parallel version appears further below).
-class MarkRefsIntoAndScanClosure: public CMSOopsInGenClosure {
+class MarkRefsIntoAndScanClosure: public MetadataAwareOopsInGenClosure {
  private:
   MemRegion          _span;
   CMSBitMap*         _bit_map;
@@ -262,7 +224,7 @@
 // stack and the bitMap are shared, so access needs to be suitably
 // sycnhronized. An OopTaskQueue structure, supporting efficient
 // workstealing, replaces a CMSMarkStack for storing grey objects.
-class Par_MarkRefsIntoAndScanClosure: public CMSOopsInGenClosure {
+class Par_MarkRefsIntoAndScanClosure: public MetadataAwareOopsInGenClosure {
  private:
   MemRegion              _span;
   CMSBitMap*             _bit_map;
@@ -291,7 +253,7 @@
 // This closure is used during the concurrent marking phase
 // following the first checkpoint. Its use is buried in
 // the closure MarkFromRootsClosure.
-class PushOrMarkClosure: public CMSOopClosure {
+class PushOrMarkClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector*   _collector;
   MemRegion       _span;
@@ -324,7 +286,7 @@
 // This closure is used during the concurrent marking phase
 // following the first checkpoint. Its use is buried in
 // the closure Par_MarkFromRootsClosure.
-class Par_PushOrMarkClosure: public CMSOopClosure {
+class Par_PushOrMarkClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector*    _collector;
   MemRegion        _whole_span;
@@ -364,7 +326,7 @@
 // processing phase of the CMS final checkpoint step, as
 // well as during the concurrent precleaning of the discovered
 // reference lists.
-class CMSKeepAliveClosure: public CMSOopClosure {
+class CMSKeepAliveClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector* _collector;
   const MemRegion _span;
@@ -384,7 +346,7 @@
   inline void do_oop_nv(narrowOop* p) { CMSKeepAliveClosure::do_oop_work(p); }
 };
 
-class CMSInnerParMarkAndPushClosure: public CMSOopClosure {
+class CMSInnerParMarkAndPushClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector* _collector;
   MemRegion     _span;
@@ -405,7 +367,7 @@
 // A parallel (MT) version of the above, used when
 // reference processing is parallel; the only difference
 // is in the do_oop method.
-class CMSParKeepAliveClosure: public CMSOopClosure {
+class CMSParKeepAliveClosure: public MetadataAwareOopClosure {
  private:
   MemRegion     _span;
   OopTaskQueue* _work_queue;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.inline.hpp
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -44,33 +44,20 @@
   }
 }
 
-// CMSOopClosure and CMSoopsInGenClosure are duplicated,
+// MetadataAwareOopClosure and MetadataAwareOopsInGenClosure are duplicated,
 // until we get rid of OopsInGenClosure.
 
-inline void CMSOopClosure::do_klass(Klass* k)       { do_klass_nv(k); }
-inline void CMSOopsInGenClosure::do_klass(Klass* k) { do_klass_nv(k); }
-
-inline void CMSOopClosure::do_klass_nv(Klass* k) {
+inline void MetadataAwareOopsInGenClosure::do_klass_nv(Klass* k) {
   ClassLoaderData* cld = k->class_loader_data();
   do_class_loader_data(cld);
 }
-inline void CMSOopsInGenClosure::do_klass_nv(Klass* k) {
-  ClassLoaderData* cld = k->class_loader_data();
-  do_class_loader_data(cld);
-}
+inline void MetadataAwareOopsInGenClosure::do_klass(Klass* k) { do_klass_nv(k); }
 
-inline void CMSOopClosure::do_class_loader_data(ClassLoaderData* cld) {
-  assert(_klass_closure._oop_closure == this, "Must be");
-
-  bool claim = true;  // Must claim the class loader data before processing.
-  cld->oops_do(_klass_closure._oop_closure, &_klass_closure, claim);
-}
-inline void CMSOopsInGenClosure::do_class_loader_data(ClassLoaderData* cld) {
+inline void MetadataAwareOopsInGenClosure::do_class_loader_data(ClassLoaderData* cld) {
   assert(_klass_closure._oop_closure == this, "Must be");
 
   bool claim = true;  // Must claim the class loader data before processing.
   cld->oops_do(_klass_closure._oop_closure, &_klass_closure, claim);
 }
 
-
 #endif // SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_CMSOOPCLOSURES_INLINE_HPP
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -33,12 +33,14 @@
 #include "memory/allocation.inline.hpp"
 #include "memory/blockOffsetTable.inline.hpp"
 #include "memory/resourceArea.hpp"
+#include "memory/space.inline.hpp"
 #include "memory/universe.inline.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/globals.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/init.hpp"
 #include "runtime/java.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/vmThread.hpp"
 #include "utilities/copy.hpp"
 
@@ -793,53 +795,6 @@
   }
 }
 
-// Apply the given closure to each oop in the space \intersect memory region.
-void CompactibleFreeListSpace::oop_iterate(MemRegion mr, ExtendedOopClosure* cl) {
-  assert_lock_strong(freelistLock());
-  if (is_empty()) {
-    return;
-  }
-  MemRegion cur = MemRegion(bottom(), end());
-  mr = mr.intersection(cur);
-  if (mr.is_empty()) {
-    return;
-  }
-  if (mr.equals(cur)) {
-    oop_iterate(cl);
-    return;
-  }
-  assert(mr.end() <= end(), "just took an intersection above");
-  HeapWord* obj_addr = block_start(mr.start());
-  HeapWord* t = mr.end();
-
-  SpaceMemRegionOopsIterClosure smr_blk(cl, mr);
-  if (block_is_obj(obj_addr)) {
-    // Handle first object specially.
-    oop obj = oop(obj_addr);
-    obj_addr += adjustObjectSize(obj->oop_iterate(&smr_blk));
-  } else {
-    FreeChunk* fc = (FreeChunk*)obj_addr;
-    obj_addr += fc->size();
-  }
-  while (obj_addr < t) {
-    HeapWord* obj = obj_addr;
-    obj_addr += block_size(obj_addr);
-    // If "obj_addr" is not greater than top, then the
-    // entire object "obj" is within the region.
-    if (obj_addr <= t) {
-      if (block_is_obj(obj)) {
-        oop(obj)->oop_iterate(cl);
-      }
-    } else {
-      // "obj" extends beyond end of region
-      if (block_is_obj(obj)) {
-        oop(obj)->oop_iterate(&smr_blk);
-      }
-      break;
-    }
-  }
-}
-
 // NOTE: In the following methods, in order to safely be able to
 // apply the closure to an object, we need to be sure that the
 // object has been initialized. We are guaranteed that an object
@@ -898,42 +853,60 @@
                                                   UpwardsObjectClosure* cl) {
   assert_locked(freelistLock());
   NOT_PRODUCT(verify_objects_initialized());
-  Space::object_iterate_mem(mr, cl);
+  assert(!mr.is_empty(), "Should be non-empty");
+  // We use MemRegion(bottom(), end()) rather than used_region() below
+  // because the two are not necessarily equal for some kinds of
+  // spaces, in particular, certain kinds of free list spaces.
+  // We could use the more complicated but more precise:
+  // MemRegion(used_region().start(), round_to(used_region().end(), CardSize))
+  // but the slight imprecision seems acceptable in the assertion check.
+  assert(MemRegion(bottom(), end()).contains(mr),
+         "Should be within used space");
+  HeapWord* prev = cl->previous();   // max address from last time
+  if (prev >= mr.end()) { // nothing to do
+    return;
+  }
+  // This assert will not work when we go from cms space to perm
+  // space, and use same closure. Easy fix deferred for later. XXX YSR
+  // assert(prev == NULL || contains(prev), "Should be within space");
+
+  bool last_was_obj_array = false;
+  HeapWord *blk_start_addr, *region_start_addr;
+  if (prev > mr.start()) {
+    region_start_addr = prev;
+    blk_start_addr    = prev;
+    // The previous invocation may have pushed "prev" beyond the
+    // last allocated block yet there may be still be blocks
+    // in this region due to a particular coalescing policy.
+    // Relax the assertion so that the case where the unallocated
+    // block is maintained and "prev" is beyond the unallocated
+    // block does not cause the assertion to fire.
+    assert((BlockOffsetArrayUseUnallocatedBlock &&
+            (!is_in(prev))) ||
+           (blk_start_addr == block_start(region_start_addr)), "invariant");
+  } else {
+    region_start_addr = mr.start();
+    blk_start_addr    = block_start(region_start_addr);
+  }
+  HeapWord* region_end_addr = mr.end();
+  MemRegion derived_mr(region_start_addr, region_end_addr);
+  while (blk_start_addr < region_end_addr) {
+    const size_t size = block_size(blk_start_addr);
+    if (block_is_obj(blk_start_addr)) {
+      last_was_obj_array = cl->do_object_bm(oop(blk_start_addr), derived_mr);
+    } else {
+      last_was_obj_array = false;
+    }
+    blk_start_addr += size;
+  }
+  if (!last_was_obj_array) {
+    assert((bottom() <= blk_start_addr) && (blk_start_addr <= end()),
+           "Should be within (closed) used space");
+    assert(blk_start_addr > prev, "Invariant");
+    cl->set_previous(blk_start_addr); // min address for next time
+  }
 }
 
-// Callers of this iterator beware: The closure application should
-// be robust in the face of uninitialized objects and should (always)
-// return a correct size so that the next addr + size below gives us a
-// valid block boundary. [See for instance,
-// ScanMarkedObjectsAgainCarefullyClosure::do_object_careful()
-// in ConcurrentMarkSweepGeneration.cpp.]
-HeapWord*
-CompactibleFreeListSpace::object_iterate_careful(ObjectClosureCareful* cl) {
-  assert_lock_strong(freelistLock());
-  HeapWord *addr, *last;
-  size_t size;
-  for (addr = bottom(), last  = end();
-       addr < last; addr += size) {
-    FreeChunk* fc = (FreeChunk*)addr;
-    if (fc->is_free()) {
-      // Since we hold the free list lock, which protects direct
-      // allocation in this generation by mutators, a free object
-      // will remain free throughout this iteration code.
-      size = fc->size();
-    } else {
-      // Note that the object need not necessarily be initialized,
-      // because (for instance) the free list lock does NOT protect
-      // object initialization. The closure application below must
-      // therefore be correct in the face of uninitialized objects.
-      size = cl->do_object_careful(oop(addr));
-      if (size == 0) {
-        // An unparsable object found. Signal early termination.
-        return addr;
-      }
-    }
-  }
-  return NULL;
-}
 
 // Callers of this iterator beware: The closure application should
 // be robust in the face of uninitialized objects and should (always)
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -337,10 +337,6 @@
                      unallocated_block() : end());
   }
 
-  bool is_in(const void* p) const {
-    return used_region().contains(p);
-  }
-
   virtual bool is_free_block(const HeapWord* p) const;
 
   // Resizing support
@@ -350,7 +346,6 @@
   Mutex* freelistLock() const { return &_freelistLock; }
 
   // Iteration support
-  void oop_iterate(MemRegion mr, ExtendedOopClosure* cl);
   void oop_iterate(ExtendedOopClosure* cl);
 
   void object_iterate(ObjectClosure* blk);
@@ -363,6 +358,12 @@
   // obj_is_alive() to determine whether it is safe to iterate of
   // an object.
   void safe_object_iterate(ObjectClosure* blk);
+
+  // Iterate over all objects that intersect with mr, calling "cl->do_object"
+  // on each.  There is an exception to this: if this closure has already
+  // been invoked on an object, it may skip such objects in some cases.  This is
+  // Most likely to happen in an "upwards" (ascending address) iteration of
+  // MemRegions.
   void object_iterate_mem(MemRegion mr, UpwardsObjectClosure* cl);
 
   // Requires that "mr" be entirely within the space.
@@ -371,11 +372,8 @@
   // terminate the iteration and return the address of the start of the
   // subregion that isn't done.  Return of "NULL" indicates that the
   // interation completed.
-  virtual HeapWord*
-       object_iterate_careful_m(MemRegion mr,
-                                ObjectClosureCareful* cl);
-  virtual HeapWord*
-       object_iterate_careful(ObjectClosureCareful* cl);
+ HeapWord* object_iterate_careful_m(MemRegion mr,
+                                    ObjectClosureCareful* cl);
 
   // Override: provides a DCTO_CL specific to this kind of space.
   DirtyCardToOopClosure* new_dcto_cl(ExtendedOopClosure* cl,
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -49,7 +49,7 @@
 #include "memory/genCollectedHeap.hpp"
 #include "memory/genMarkSweep.hpp"
 #include "memory/genOopClosures.inline.hpp"
-#include "memory/iterator.hpp"
+#include "memory/iterator.inline.hpp"
 #include "memory/padded.hpp"
 #include "memory/referencePolicy.hpp"
 #include "memory/resourceArea.hpp"
@@ -59,6 +59,7 @@
 #include "runtime/globals_extension.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/java.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/vmThread.hpp"
 #include "services/memoryService.hpp"
 #include "services/runtimeService.hpp"
@@ -1569,11 +1570,11 @@
   }
 
   if (MetaspaceGC::should_concurrent_collect()) {
-      if (Verbose && PrintGCDetails) {
+    if (Verbose && PrintGCDetails) {
       gclog_or_tty->print("CMSCollector: collect for metadata allocation ");
-      }
-      return true;
-    }
+    }
+    return true;
+  }
 
   return false;
 }
@@ -1999,7 +2000,7 @@
   SerialOldTracer* gc_tracer = GenMarkSweep::gc_tracer();
   gc_tracer->report_gc_start(gch->gc_cause(), gc_timer->gc_start());
 
-  GCTraceTime t("CMS:MSC ", PrintGCDetails && Verbose, true, NULL);
+  GCTraceTime t("CMS:MSC ", PrintGCDetails && Verbose, true, NULL, gc_tracer->gc_id());
   if (PrintGC && Verbose && !(GCCause::is_user_requested_gc(gch->gc_cause()))) {
     gclog_or_tty->print_cr("Compact ConcurrentMarkSweepGeneration after %d "
       "collections passed to foreground collector", _full_gcs_since_conc_gc);
@@ -2509,8 +2510,10 @@
   assert(ConcurrentMarkSweepThread::vm_thread_has_cms_token(),
          "VM thread should have CMS token");
 
+  // The gc id is created in register_foreground_gc_start if this collection is synchronous
+  const GCId gc_id = _collectorState == InitialMarking ? GCId::peek() : _gc_tracer_cm->gc_id();
   NOT_PRODUCT(GCTraceTime t("CMS:MS (foreground) ", PrintGCDetails && Verbose,
-    true, NULL);)
+    true, NULL, gc_id);)
   if (UseAdaptiveSizePolicy) {
     size_policy()->ms_collection_begin();
   }
@@ -3025,22 +3028,21 @@
   HandleMark  hm;
   GenCollectedHeap* gch = GenCollectedHeap::heap();
 
-  // Get a clear set of claim bits for the strong roots processing to work with.
+  // Get a clear set of claim bits for the roots processing to work with.
   ClassLoaderDataGraph::clear_claimed_marks();
 
   // Mark from roots one level into CMS
   MarkRefsIntoClosure notOlder(_span, verification_mark_bm());
   gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
 
-  gch->gen_process_strong_roots(_cmsGen->level(),
-                                true,   // younger gens are roots
-                                true,   // activate StrongRootsScope
-                                false,  // not scavenging
-                                SharedHeap::ScanningOption(roots_scanning_options()),
-                                &notOlder,
-                                true,   // walk code active on stacks
-                                NULL,
-                                NULL); // SSS: Provide correct closure
+  gch->gen_process_roots(_cmsGen->level(),
+                         true,   // younger gens are roots
+                         true,   // activate StrongRootsScope
+                         SharedHeap::ScanningOption(roots_scanning_options()),
+                         should_unload_classes(),
+                         &notOlder,
+                         NULL,
+                         NULL);  // SSS: Provide correct closure
 
   // Now mark from the roots
   MarkFromRootsClosure markFromRootsClosure(this, _span,
@@ -3091,24 +3093,24 @@
   HandleMark  hm;
   GenCollectedHeap* gch = GenCollectedHeap::heap();
 
-  // Get a clear set of claim bits for the strong roots processing to work with.
+  // Get a clear set of claim bits for the roots processing to work with.
   ClassLoaderDataGraph::clear_claimed_marks();
 
   // Mark from roots one level into CMS
   MarkRefsIntoVerifyClosure notOlder(_span, verification_mark_bm(),
                                      markBitMap());
-  CMKlassClosure klass_closure(&notOlder);
+  CLDToOopClosure cld_closure(&notOlder, true);
 
   gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
-  gch->gen_process_strong_roots(_cmsGen->level(),
-                                true,   // younger gens are roots
-                                true,   // activate StrongRootsScope
-                                false,  // not scavenging
-                                SharedHeap::ScanningOption(roots_scanning_options()),
-                                &notOlder,
-                                true,   // walk code active on stacks
-                                NULL,
-                                &klass_closure);
+
+  gch->gen_process_roots(_cmsGen->level(),
+                         true,   // younger gens are roots
+                         true,   // activate StrongRootsScope
+                         SharedHeap::ScanningOption(roots_scanning_options()),
+                         should_unload_classes(),
+                         &notOlder,
+                         NULL,
+                         &cld_closure);
 
   // Now mark from the roots
   MarkFromRootsVerifyClosure markFromRootsClosure(this, _span,
@@ -3169,16 +3171,6 @@
 }
 
 void
-ConcurrentMarkSweepGeneration::oop_iterate(MemRegion mr, ExtendedOopClosure* cl) {
-  if (freelistLock()->owned_by_self()) {
-    Generation::oop_iterate(mr, cl);
-  } else {
-    MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
-    Generation::oop_iterate(mr, cl);
-  }
-}
-
-void
 ConcurrentMarkSweepGeneration::oop_iterate(ExtendedOopClosure* cl) {
   if (freelistLock()->owned_by_self()) {
     Generation::oop_iterate(cl);
@@ -3305,12 +3297,10 @@
 void CMSCollector::setup_cms_unloading_and_verification_state() {
   const  bool should_verify =   VerifyBeforeGC || VerifyAfterGC || VerifyDuringGC
                              || VerifyBeforeExit;
-  const  int  rso           =   SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;
+  const  int  rso           =   SharedHeap::SO_AllCodeCache;
 
   // We set the proper root for this CMS cycle here.
   if (should_unload_classes()) {   // Should unload classes this cycle
-    remove_root_scanning_option(SharedHeap::SO_AllClasses);
-    add_root_scanning_option(SharedHeap::SO_SystemClasses);
     remove_root_scanning_option(rso);  // Shrink the root set appropriately
     set_verifying(should_verify);    // Set verification state for this cycle
     return;                            // Nothing else needs to be done at this time
@@ -3318,8 +3308,6 @@
 
   // Not unloading classes this cycle
   assert(!should_unload_classes(), "Inconsitency!");
-  remove_root_scanning_option(SharedHeap::SO_SystemClasses);
-  add_root_scanning_option(SharedHeap::SO_AllClasses);
 
   if ((!verifying() || unloaded_classes_last_cycle()) && should_verify) {
     // Include symbols, strings and code cache elements to prevent their resurrection.
@@ -3527,6 +3515,7 @@
  public:
   CMSPhaseAccounting(CMSCollector *collector,
                      const char *phase,
+                     const GCId gc_id,
                      bool print_cr = true);
   ~CMSPhaseAccounting();
 
@@ -3535,6 +3524,7 @@
   const char *_phase;
   elapsedTimer _wallclock;
   bool _print_cr;
+  const GCId _gc_id;
 
  public:
   // Not MT-safe; so do not pass around these StackObj's
@@ -3550,15 +3540,15 @@
 
 CMSPhaseAccounting::CMSPhaseAccounting(CMSCollector *collector,
                                        const char *phase,
+                                       const GCId gc_id,
                                        bool print_cr) :
-  _collector(collector), _phase(phase), _print_cr(print_cr) {
+  _collector(collector), _phase(phase), _print_cr(print_cr), _gc_id(gc_id) {
 
   if (PrintCMSStatistics != 0) {
     _collector->resetYields();
   }
   if (PrintGCDetails) {
-    gclog_or_tty->date_stamp(PrintGCDateStamps);
-    gclog_or_tty->stamp(PrintGCTimeStamps);
+    gclog_or_tty->gclog_stamp(_gc_id);
     gclog_or_tty->print_cr("[%s-concurrent-%s-start]",
       _collector->cmsGen()->short_name(), _phase);
   }
@@ -3572,8 +3562,7 @@
   _collector->stopTimer();
   _wallclock.stop();
   if (PrintGCDetails) {
-    gclog_or_tty->date_stamp(PrintGCDateStamps);
-    gclog_or_tty->stamp(PrintGCTimeStamps);
+    gclog_or_tty->gclog_stamp(_gc_id);
     gclog_or_tty->print("[%s-concurrent-%s: %3.3f/%3.3f secs]",
                  _collector->cmsGen()->short_name(),
                  _phase, _collector->timerValue(), _wallclock.seconds());
@@ -3671,7 +3660,7 @@
   setup_cms_unloading_and_verification_state();
 
   NOT_PRODUCT(GCTraceTime t("\ncheckpointRootsInitialWork",
-    PrintGCDetails && Verbose, true, _gc_timer_cm);)
+    PrintGCDetails && Verbose, true, _gc_timer_cm, _gc_tracer_cm->gc_id());)
   if (UseAdaptiveSizePolicy) {
     size_policy()->checkpoint_roots_initial_begin();
   }
@@ -3684,12 +3673,6 @@
   ResourceMark rm;
   HandleMark  hm;
 
-  FalseClosure falseClosure;
-  // In the case of a synchronous collection, we will elide the
-  // remark step, so it's important to catch all the nmethod oops
-  // in this step.
-  // The final 'true' flag to gen_process_strong_roots will ensure this.
-  // If 'async' is true, we can relax the nmethod tracing.
   MarkRefsIntoClosure notOlder(_span, &_markBitMap);
   GenCollectedHeap* gch = GenCollectedHeap::heap();
 
@@ -3735,17 +3718,16 @@
       gch->set_par_threads(0);
     } else {
       // The serial version.
-      CMKlassClosure klass_closure(&notOlder);
+      CLDToOopClosure cld_closure(&notOlder, true);
       gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
-      gch->gen_process_strong_roots(_cmsGen->level(),
-                                    true,   // younger gens are roots
-                                    true,   // activate StrongRootsScope
-                                    false,  // not scavenging
-                                    SharedHeap::ScanningOption(roots_scanning_options()),
-                                    &notOlder,
-                                    true,   // walk all of code cache if (so & SO_CodeCache)
-                                    NULL,
-                                    &klass_closure);
+      gch->gen_process_roots(_cmsGen->level(),
+                             true,   // younger gens are roots
+                             true,   // activate StrongRootsScope
+                             SharedHeap::ScanningOption(roots_scanning_options()),
+                             should_unload_classes(),
+                             &notOlder,
+                             NULL,
+                             &cld_closure);
     }
   }
 
@@ -3796,7 +3778,7 @@
 
     CMSTokenSyncWithLocks ts(true, bitMapLock());
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
-    CMSPhaseAccounting pa(this, "mark", !PrintGCDetails);
+    CMSPhaseAccounting pa(this, "mark", _gc_tracer_cm->gc_id(), !PrintGCDetails);
     res = markFromRootsWork(asynch);
     if (res) {
       _collectorState = Precleaning;
@@ -4199,7 +4181,7 @@
   pst->all_tasks_completed();
 }
 
-class Par_ConcMarkingClosure: public CMSOopClosure {
+class Par_ConcMarkingClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector* _collector;
   CMSConcMarkingTask* _task;
@@ -4212,7 +4194,7 @@
  public:
   Par_ConcMarkingClosure(CMSCollector* collector, CMSConcMarkingTask* task, OopTaskQueue* work_queue,
                          CMSBitMap* bit_map, CMSMarkStack* overflow_stack):
-    CMSOopClosure(collector->ref_processor()),
+    MetadataAwareOopClosure(collector->ref_processor()),
     _collector(collector),
     _task(task),
     _span(collector->_span),
@@ -4519,7 +4501,7 @@
       _start_sampling = false;
     }
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
-    CMSPhaseAccounting pa(this, "preclean", !PrintGCDetails);
+    CMSPhaseAccounting pa(this, "preclean", _gc_tracer_cm->gc_id(), !PrintGCDetails);
     preclean_work(CMSPrecleanRefLists1, CMSPrecleanSurvivors1);
   }
   CMSTokenSync x(true); // is cms thread
@@ -4548,7 +4530,7 @@
   // we will never do an actual abortable preclean cycle.
   if (get_eden_used() > CMSScheduleRemarkEdenSizeThreshold) {
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
-    CMSPhaseAccounting pa(this, "abortable-preclean", !PrintGCDetails);
+    CMSPhaseAccounting pa(this, "abortable-preclean", _gc_tracer_cm->gc_id(), !PrintGCDetails);
     // We need more smarts in the abortable preclean
     // loop below to deal with cases where allocation
     // in young gen is very very slow, and our precleaning
@@ -4693,7 +4675,7 @@
     GCTimer *gc_timer = NULL; // Currently not tracing concurrent phases
     rp->preclean_discovered_references(
           rp->is_alive_non_header(), &keep_alive, &complete_trace, &yield_cl,
-          gc_timer);
+          gc_timer, _gc_tracer_cm->gc_id());
   }
 
   if (clean_survivor) {  // preclean the active survivor space(s)
@@ -4983,7 +4965,7 @@
 }
 
 class PrecleanKlassClosure : public KlassClosure {
-  CMKlassClosure _cm_klass_closure;
+  KlassToOopClosure _cm_klass_closure;
  public:
   PrecleanKlassClosure(OopClosure* oop_closure) : _cm_klass_closure(oop_closure) {}
   void do_klass(Klass* k) {
@@ -5036,7 +5018,7 @@
       // expect it to be false and set to true
       FlagSetting fl(gch->_is_gc_active, false);
       NOT_PRODUCT(GCTraceTime t("Scavenge-Before-Remark",
-        PrintGCDetails && Verbose, true, _gc_timer_cm);)
+        PrintGCDetails && Verbose, true, _gc_timer_cm, _gc_tracer_cm->gc_id());)
       int level = _cmsGen->level() - 1;
       if (level >= 0) {
         gch->do_collection(true,        // full (i.e. force, see below)
@@ -5065,7 +5047,7 @@
 void CMSCollector::checkpointRootsFinalWork(bool asynch,
   bool clear_all_soft_refs, bool init_mark_was_synchronous) {
 
-  NOT_PRODUCT(GCTraceTime tr("checkpointRootsFinalWork", PrintGCDetails, false, _gc_timer_cm);)
+  NOT_PRODUCT(GCTraceTime tr("checkpointRootsFinalWork", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());)
 
   assert(haveFreelistLocks(), "must have free list locks");
   assert_lock_strong(bitMapLock());
@@ -5120,11 +5102,11 @@
       // the most recent young generation GC, minus those cleaned up by the
       // concurrent precleaning.
       if (CMSParallelRemarkEnabled && CollectedHeap::use_parallel_gc_threads()) {
-        GCTraceTime t("Rescan (parallel) ", PrintGCDetails, false, _gc_timer_cm);
+        GCTraceTime t("Rescan (parallel) ", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
         do_remark_parallel();
       } else {
         GCTraceTime t("Rescan (non-parallel) ", PrintGCDetails, false,
-                    _gc_timer_cm);
+                    _gc_timer_cm, _gc_tracer_cm->gc_id());
         do_remark_non_parallel();
       }
     }
@@ -5137,7 +5119,7 @@
   verify_overflow_empty();
 
   {
-    NOT_PRODUCT(GCTraceTime ts("refProcessingWork", PrintGCDetails, false, _gc_timer_cm);)
+    NOT_PRODUCT(GCTraceTime ts("refProcessingWork", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());)
     refProcessingWork(asynch, clear_all_soft_refs);
   }
   verify_work_stacks_empty();
@@ -5221,7 +5203,6 @@
   _timer.start();
   GenCollectedHeap* gch = GenCollectedHeap::heap();
   Par_MarkRefsIntoClosure par_mri_cl(_collector->_span, &(_collector->_markBitMap));
-  CMKlassClosure klass_closure(&par_mri_cl);
 
   // ---------- young gen roots --------------
   {
@@ -5237,17 +5218,19 @@
   // ---------- remaining roots --------------
   _timer.reset();
   _timer.start();
-  gch->gen_process_strong_roots(_collector->_cmsGen->level(),
-                                false,     // yg was scanned above
-                                false,     // this is parallel code
-                                false,     // not scavenging
-                                SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
-                                &par_mri_cl,
-                                true,   // walk all of code cache if (so & SO_CodeCache)
-                                NULL,
-                                &klass_closure);
+
+  CLDToOopClosure cld_closure(&par_mri_cl, true);
+
+  gch->gen_process_roots(_collector->_cmsGen->level(),
+                         false,     // yg was scanned above
+                         false,     // this is parallel code
+                         SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
+                         _collector->should_unload_classes(),
+                         &par_mri_cl,
+                         NULL,
+                         &cld_closure);
   assert(_collector->should_unload_classes()
-         || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_CodeCache),
+         || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_AllCodeCache),
          "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
   _timer.stop();
   if (PrintCMSStatistics != 0) {
@@ -5297,7 +5280,7 @@
 };
 
 class RemarkKlassClosure : public KlassClosure {
-  CMKlassClosure _cm_klass_closure;
+  KlassToOopClosure _cm_klass_closure;
  public:
   RemarkKlassClosure(OopClosure* oop_closure) : _cm_klass_closure(oop_closure) {}
   void do_klass(Klass* k) {
@@ -5374,17 +5357,17 @@
   // ---------- remaining roots --------------
   _timer.reset();
   _timer.start();
-  gch->gen_process_strong_roots(_collector->_cmsGen->level(),
-                                false,     // yg was scanned above
-                                false,     // this is parallel code
-                                false,     // not scavenging
-                                SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
-                                &par_mrias_cl,
-                                true,   // walk all of code cache if (so & SO_CodeCache)
-                                NULL,
-                                NULL);     // The dirty klasses will be handled below
+  gch->gen_process_roots(_collector->_cmsGen->level(),
+                         false,     // yg was scanned above
+                         false,     // this is parallel code
+                         SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
+                         _collector->should_unload_classes(),
+                         &par_mrias_cl,
+                         NULL,
+                         NULL);     // The dirty klasses will be handled below
+
   assert(_collector->should_unload_classes()
-         || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_CodeCache),
+         || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_AllCodeCache),
          "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
   _timer.stop();
   if (PrintCMSStatistics != 0) {
@@ -5437,7 +5420,7 @@
   // We might have added oops to ClassLoaderData::_handles during the
   // concurrent marking phase. These oops point to newly allocated objects
   // that are guaranteed to be kept alive. Either by the direct allocation
-  // code, or when the young collector processes the strong roots. Hence,
+  // code, or when the young collector processes the roots. Hence,
   // we don't have to revisit the _handles block during the remark phase.
 
   // ---------- rescan dirty cards ------------
@@ -5859,7 +5842,7 @@
     cms_space,
     n_workers, workers, task_queues());
 
-  // Set up for parallel process_strong_roots work.
+  // Set up for parallel process_roots work.
   gch->set_par_threads(n_workers);
   // We won't be iterating over the cards in the card table updating
   // the younger_gen cards, so we shouldn't call the following else
@@ -5868,7 +5851,7 @@
   // gch->rem_set()->prepare_for_younger_refs_iterate(true); // parallel
 
   // The young gen rescan work will not be done as part of
-  // process_strong_roots (which currently doesn't knw how to
+  // process_roots (which currently doesn't know how to
   // parallelize such a scan), but rather will be broken up into
   // a set of parallel tasks (via the sampling that the [abortable]
   // preclean phase did of EdenSpace, plus the [two] tasks of
@@ -5922,7 +5905,7 @@
                               NULL,  // space is set further below
                               &_markBitMap, &_markStack, &mrias_cl);
   {
-    GCTraceTime t("grey object rescan", PrintGCDetails, false, _gc_timer_cm);
+    GCTraceTime t("grey object rescan", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
     // Iterate over the dirty cards, setting the corresponding bits in the
     // mod union table.
     {
@@ -5959,29 +5942,29 @@
     Universe::verify();
   }
   {
-    GCTraceTime t("root rescan", PrintGCDetails, false, _gc_timer_cm);
+    GCTraceTime t("root rescan", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
 
     verify_work_stacks_empty();
 
     gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
     GenCollectedHeap::StrongRootsScope srs(gch);
-    gch->gen_process_strong_roots(_cmsGen->level(),
-                                  true,  // younger gens as roots
-                                  false, // use the local StrongRootsScope
-                                  false, // not scavenging
-                                  SharedHeap::ScanningOption(roots_scanning_options()),
-                                  &mrias_cl,
-                                  true,   // walk code active on stacks
-                                  NULL,
-                                  NULL);  // The dirty klasses will be handled below
+
+    gch->gen_process_roots(_cmsGen->level(),
+                           true,  // younger gens as roots
+                           false, // use the local StrongRootsScope
+                           SharedHeap::ScanningOption(roots_scanning_options()),
+                           should_unload_classes(),
+                           &mrias_cl,
+                           NULL,
+                           NULL); // The dirty klasses will be handled below
 
     assert(should_unload_classes()
-           || (roots_scanning_options() & SharedHeap::SO_CodeCache),
+           || (roots_scanning_options() & SharedHeap::SO_AllCodeCache),
            "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
   }
 
   {
-    GCTraceTime t("visit unhandled CLDs", PrintGCDetails, false, _gc_timer_cm);
+    GCTraceTime t("visit unhandled CLDs", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
 
     verify_work_stacks_empty();
 
@@ -6000,7 +5983,7 @@
   }
 
   {
-    GCTraceTime t("dirty klass scan", PrintGCDetails, false, _gc_timer_cm);
+    GCTraceTime t("dirty klass scan", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
 
     verify_work_stacks_empty();
 
@@ -6013,7 +5996,7 @@
   // We might have added oops to ClassLoaderData::_handles during the
   // concurrent marking phase. These oops point to newly allocated objects
   // that are guaranteed to be kept alive. Either by the direct allocation
-  // code, or when the young collector processes the strong roots. Hence,
+  // code, or when the young collector processes the roots. Hence,
   // we don't have to revisit the _handles block during the remark phase.
 
   verify_work_stacks_empty();
@@ -6068,6 +6051,8 @@
 };
 
 void CMSRefProcTaskProxy::work(uint worker_id) {
+  ResourceMark rm;
+  HandleMark hm;
   assert(_collector->_span.equals(_span), "Inconsistency in _span");
   CMSParKeepAliveClosure par_keep_alive(_collector, _span,
                                         _mark_bit_map,
@@ -6202,7 +6187,7 @@
                                 _span, &_markBitMap, &_markStack,
                                 &cmsKeepAliveClosure, false /* !preclean */);
   {
-    GCTraceTime t("weak refs processing", PrintGCDetails, false, _gc_timer_cm);
+    GCTraceTime t("weak refs processing", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
 
     ReferenceProcessorStats stats;
     if (rp->processing_is_mt()) {
@@ -6227,13 +6212,15 @@
                                         &cmsKeepAliveClosure,
                                         &cmsDrainMarkingStackClosure,
                                         &task_executor,
-                                        _gc_timer_cm);
+                                        _gc_timer_cm,
+                                        _gc_tracer_cm->gc_id());
     } else {
       stats = rp->process_discovered_references(&_is_alive_closure,
                                         &cmsKeepAliveClosure,
                                         &cmsDrainMarkingStackClosure,
                                         NULL,
-                                        _gc_timer_cm);
+                                        _gc_timer_cm,
+                                        _gc_tracer_cm->gc_id());
     }
     _gc_tracer_cm->report_gc_reference_stats(stats);
 
@@ -6244,7 +6231,7 @@
 
   if (should_unload_classes()) {
     {
-      GCTraceTime t("class unloading", PrintGCDetails, false, _gc_timer_cm);
+      GCTraceTime t("class unloading", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
 
       // Unload classes and purge the SystemDictionary.
       bool purged_class = SystemDictionary::do_unloading(&_is_alive_closure);
@@ -6257,19 +6244,18 @@
     }
 
     {
-      GCTraceTime t("scrub symbol table", PrintGCDetails, false, _gc_timer_cm);
+      GCTraceTime t("scrub symbol table", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
       // Clean up unreferenced symbols in symbol table.
       SymbolTable::unlink();
     }
-  }
-
-  // CMS doesn't use the StringTable as hard roots when class unloading is turned off.
-  // Need to check if we really scanned the StringTable.
-  if ((roots_scanning_options() & SharedHeap::SO_Strings) == 0) {
-    GCTraceTime t("scrub string table", PrintGCDetails, false, _gc_timer_cm);
-    // Delete entries for dead interned strings.
-    StringTable::unlink(&_is_alive_closure);
-  }
+
+    {
+      GCTraceTime t("scrub string table", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
+      // Delete entries for dead interned strings.
+      StringTable::unlink(&_is_alive_closure);
+    }
+  }
+
 
   // Restore any preserved marks as a result of mark stack or
   // work queue overflow
@@ -6333,7 +6319,7 @@
   _intra_sweep_timer.start();
   if (asynch) {
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
-    CMSPhaseAccounting pa(this, "sweep", !PrintGCDetails);
+    CMSPhaseAccounting pa(this, "sweep", _gc_tracer_cm->gc_id(), !PrintGCDetails);
     // First sweep the old gen
     {
       CMSTokenSyncWithLocks ts(true, _cmsGen->freelistLock(),
@@ -6554,7 +6540,7 @@
     // Clear the mark bitmap (no grey objects to start with)
     // for the next cycle.
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
-    CMSPhaseAccounting cmspa(this, "reset", !PrintGCDetails);
+    CMSPhaseAccounting cmspa(this, "reset", _gc_tracer_cm->gc_id(), !PrintGCDetails);
 
     HeapWord* curAddr = _markBitMap.startWord();
     while (curAddr < _markBitMap.endWord()) {
@@ -6620,7 +6606,7 @@
 void CMSCollector::do_CMS_operation(CMS_op_type op, GCCause::Cause gc_cause) {
   gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
   TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
-  GCTraceTime t(GCCauseString("GC", gc_cause), PrintGC, !PrintGCDetails, NULL);
+  GCTraceTime t(GCCauseString("GC", gc_cause), PrintGC, !PrintGCDetails, NULL, _gc_tracer_cm->gc_id());
   TraceCollectorStats tcs(counters());
 
   switch (op) {
@@ -7738,7 +7724,7 @@
   CMSCollector* collector, MemRegion span,
   CMSBitMap* verification_bm, CMSBitMap* cms_bm,
   CMSMarkStack*  mark_stack):
-  CMSOopClosure(collector->ref_processor()),
+  MetadataAwareOopClosure(collector->ref_processor()),
   _collector(collector),
   _span(span),
   _verification_bm(verification_bm),
@@ -7791,7 +7777,7 @@
                      MemRegion span,
                      CMSBitMap* bitMap, CMSMarkStack*  markStack,
                      HeapWord* finger, MarkFromRootsClosure* parent) :
-  CMSOopClosure(collector->ref_processor()),
+  MetadataAwareOopClosure(collector->ref_processor()),
   _collector(collector),
   _span(span),
   _bitMap(bitMap),
@@ -7808,7 +7794,7 @@
                      HeapWord* finger,
                      HeapWord** global_finger_addr,
                      Par_MarkFromRootsClosure* parent) :
-  CMSOopClosure(collector->ref_processor()),
+  MetadataAwareOopClosure(collector->ref_processor()),
   _collector(collector),
   _whole_span(collector->_span),
   _span(span),
@@ -7857,11 +7843,6 @@
   _overflow_stack->expand(); // expand the stack if possible
 }
 
-void CMKlassClosure::do_klass(Klass* k) {
-  assert(_oop_closure != NULL, "Not initialized?");
-  k->oops_do(_oop_closure);
-}
-
 void PushOrMarkClosure::do_oop(oop obj) {
   // Ignore mark word because we are running concurrent with mutators.
   assert(obj->is_oop_or_null(true), "expected an oop or NULL");
@@ -7959,7 +7940,7 @@
                                        CMSBitMap* mod_union_table,
                                        CMSMarkStack*  mark_stack,
                                        bool           concurrent_precleaning):
-  CMSOopClosure(rp),
+  MetadataAwareOopClosure(rp),
   _collector(collector),
   _span(span),
   _bit_map(bit_map),
@@ -8032,7 +8013,7 @@
                                                ReferenceProcessor* rp,
                                                CMSBitMap* bit_map,
                                                OopTaskQueue* work_queue):
-  CMSOopClosure(rp),
+  MetadataAwareOopClosure(rp),
   _collector(collector),
   _span(span),
   _bit_map(bit_map),
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -32,6 +32,7 @@
 #include "gc_implementation/shared/generationCounters.hpp"
 #include "memory/freeBlockDictionary.hpp"
 #include "memory/generation.hpp"
+#include "memory/iterator.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/virtualspace.hpp"
 #include "services/memoryService.hpp"
@@ -1285,7 +1286,6 @@
   void save_sweep_limit();
 
   // More iteration support
-  virtual void oop_iterate(MemRegion mr, ExtendedOopClosure* cl);
   virtual void oop_iterate(ExtendedOopClosure* cl);
   virtual void safe_object_iterate(ObjectClosure* cl);
   virtual void object_iterate(ObjectClosure* cl);
@@ -1383,13 +1383,6 @@
 // Closures of various sorts used by CMS to accomplish its work
 //
 
-// This closure is used to check that a certain set of oops is empty.
-class FalseClosure: public OopClosure {
- public:
-  void do_oop(oop* p)       { guarantee(false, "Should be an empty set"); }
-  void do_oop(narrowOop* p) { guarantee(false, "Should be an empty set"); }
-};
-
 // This closure is used to do concurrent marking from the roots
 // following the first checkpoint.
 class MarkFromRootsClosure: public BitMapClosure {
@@ -1454,7 +1447,7 @@
 
 // The following closures are used to do certain kinds of verification of
 // CMS marking.
-class PushAndMarkVerifyClosure: public CMSOopClosure {
+class PushAndMarkVerifyClosure: public MetadataAwareOopClosure {
   CMSCollector*    _collector;
   MemRegion        _span;
   CMSBitMap*       _verification_bm;
@@ -1507,6 +1500,19 @@
   }
 };
 
+// A version of ObjectClosure with "memory" (see _previous_address below)
+class UpwardsObjectClosure: public BoolObjectClosure {
+  HeapWord* _previous_address;
+ public:
+  UpwardsObjectClosure() : _previous_address(NULL) { }
+  void set_previous(HeapWord* addr) { _previous_address = addr; }
+  HeapWord* previous()              { return _previous_address; }
+  // A return value of "true" can be used by the caller to decide
+  // if this object's end should *NOT* be recorded in
+  // _previous_address above.
+  virtual bool do_object_bm(oop obj, MemRegion mr) = 0;
+};
+
 // This closure is used during the second checkpointing phase
 // to rescan the marked objects on the dirty cards in the mod
 // union table and the card table proper. It's invoked via
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/concurrentMarkSweep/vmCMSOperations.cpp
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/vmCMSOperations.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/vmCMSOperations.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -64,7 +64,7 @@
 void VM_CMS_Operation::verify_before_gc() {
   if (VerifyBeforeGC &&
       GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
-    GCTraceTime tm("Verify Before", false, false, _collector->_gc_timer_cm);
+    GCTraceTime tm("Verify Before", false, false, _collector->_gc_timer_cm, _collector->_gc_tracer_cm->gc_id());
     HandleMark hm;
     FreelistLocker x(_collector);
     MutexLockerEx  y(_collector->bitMapLock(), Mutex::_no_safepoint_check_flag);
@@ -76,7 +76,7 @@
 void VM_CMS_Operation::verify_after_gc() {
   if (VerifyAfterGC &&
       GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
-    GCTraceTime tm("Verify After", false, false, _collector->_gc_timer_cm);
+    GCTraceTime tm("Verify After", false, false, _collector->_gc_timer_cm, _collector->_gc_tracer_cm->gc_id());
     HandleMark hm;
     FreelistLocker x(_collector);
     MutexLockerEx  y(_collector->bitMapLock(), Mutex::_no_safepoint_check_flag);
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/bufferingOopClosure.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/bufferingOopClosure.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,271 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/g1/bufferingOopClosure.hpp"
+#include "memory/iterator.hpp"
+#include "utilities/debug.hpp"
+
+/////////////// Unit tests ///////////////
+
+#ifndef PRODUCT
+
+class TestBufferingOopClosure {
+
+  // Helper class to fake a set of oop*s and narrowOop*s.
+  class FakeRoots {
+   public:
+    // Used for sanity checking of the values passed to the do_oops functions in the test.
+    static const uintptr_t NarrowOopMarker = uintptr_t(1) << (BitsPerWord -1);
+
+    int    _num_narrow;
+    int    _num_full;
+    void** _narrow;
+    void** _full;
+
+    FakeRoots(int num_narrow, int num_full) :
+        _num_narrow(num_narrow),
+        _num_full(num_full),
+        _narrow((void**)::malloc(sizeof(void*) * num_narrow)),
+        _full((void**)::malloc(sizeof(void*) * num_full)) {
+
+      for (int i = 0; i < num_narrow; i++) {
+        _narrow[i] = (void*)(NarrowOopMarker + (uintptr_t)i);
+      }
+      for (int i = 0; i < num_full; i++) {
+        _full[i] = (void*)(uintptr_t)i;
+      }
+    }
+
+    ~FakeRoots() {
+      ::free(_narrow);
+      ::free(_full);
+    }
+
+    void oops_do_narrow_then_full(OopClosure* cl) {
+      for (int i = 0; i < _num_narrow; i++) {
+        cl->do_oop((narrowOop*)_narrow[i]);
+      }
+      for (int i = 0; i < _num_full; i++) {
+        cl->do_oop((oop*)_full[i]);
+      }
+    }
+
+    void oops_do_full_then_narrow(OopClosure* cl) {
+      for (int i = 0; i < _num_full; i++) {
+        cl->do_oop((oop*)_full[i]);
+      }
+      for (int i = 0; i < _num_narrow; i++) {
+        cl->do_oop((narrowOop*)_narrow[i]);
+      }
+    }
+
+    void oops_do_mixed(OopClosure* cl) {
+      int i;
+      for (i = 0; i < _num_full && i < _num_narrow; i++) {
+        cl->do_oop((oop*)_full[i]);
+        cl->do_oop((narrowOop*)_narrow[i]);
+      }
+      for (int j = i; j < _num_full; j++) {
+        cl->do_oop((oop*)_full[i]);
+      }
+      for (int j = i; j < _num_narrow; j++) {
+        cl->do_oop((narrowOop*)_narrow[i]);
+      }
+    }
+
+    static const int MaxOrder = 2;
+
+    void oops_do(OopClosure* cl, int do_oop_order) {
+      switch(do_oop_order) {
+        case 0:
+          oops_do_narrow_then_full(cl);
+          break;
+        case 1:
+          oops_do_full_then_narrow(cl);
+          break;
+        case 2:
+          oops_do_mixed(cl);
+          break;
+        default:
+          oops_do_narrow_then_full(cl);
+          break;
+      }
+    }
+  };
+
+  class CountOopClosure : public OopClosure {
+    int _narrow_oop_count;
+    int _full_oop_count;
+   public:
+    CountOopClosure() : _narrow_oop_count(0), _full_oop_count(0) {}
+    void do_oop(narrowOop* p) {
+      assert((uintptr_t(p) & FakeRoots::NarrowOopMarker) != 0,
+          "The narrowOop was unexpectedly not marked with the NarrowOopMarker");
+      _narrow_oop_count++;
+    }
+
+    void do_oop(oop* p){
+      assert((uintptr_t(p) & FakeRoots::NarrowOopMarker) == 0,
+          "The oop was unexpectedly marked with the NarrowOopMarker");
+      _full_oop_count++;
+    }
+
+    int narrow_oop_count() { return _narrow_oop_count; }
+    int full_oop_count()   { return _full_oop_count; }
+    int all_oop_count()    { return _narrow_oop_count + _full_oop_count; }
+  };
+
+  class DoNothingOopClosure : public OopClosure {
+   public:
+    void do_oop(narrowOop* p) {}
+    void do_oop(oop* p)       {}
+  };
+
+  static void testCount(int num_narrow, int num_full, int do_oop_order) {
+    FakeRoots fr(num_narrow, num_full);
+
+    CountOopClosure coc;
+    BufferingOopClosure boc(&coc);
+
+    fr.oops_do(&boc, do_oop_order);
+
+    boc.done();
+
+    #define assert_testCount(got, expected)                                     \
+       assert((got) == (expected),                                              \
+           err_msg("Expected: %d, got: %d, when running testCount(%d, %d, %d)", \
+               (got), (expected), num_narrow, num_full, do_oop_order))
+
+    assert_testCount(num_narrow, coc.narrow_oop_count());
+    assert_testCount(num_full, coc.full_oop_count());
+    assert_testCount(num_narrow + num_full, coc.all_oop_count());
+  }
+
+  static void testCount() {
+    int buffer_length = BufferingOopClosure::BufferLength;
+
+    for (int order = 0; order < FakeRoots::MaxOrder; order++) {
+      testCount(0,                 0,                 order);
+      testCount(10,                0,                 order);
+      testCount(0,                 10,                order);
+      testCount(10,                10,                order);
+      testCount(buffer_length,     10,                order);
+      testCount(10,                buffer_length,     order);
+      testCount(buffer_length,     buffer_length,     order);
+      testCount(buffer_length + 1, 10,                order);
+      testCount(10,                buffer_length + 1, order);
+      testCount(buffer_length + 1, buffer_length,     order);
+      testCount(buffer_length,     buffer_length + 1, order);
+      testCount(buffer_length + 1, buffer_length + 1, order);
+    }
+  }
+
+  static void testIsBufferEmptyOrFull(int num_narrow, int num_full, bool expect_empty, bool expect_full) {
+    FakeRoots fr(num_narrow, num_full);
+
+    DoNothingOopClosure cl;
+    BufferingOopClosure boc(&cl);
+
+    fr.oops_do(&boc, 0);
+
+    #define assert_testIsBufferEmptyOrFull(got, expected)                             \
+        assert((got) == (expected),                                                   \
+            err_msg("Expected: %d, got: %d. testIsBufferEmptyOrFull(%d, %d, %s, %s)", \
+                (got), (expected), num_narrow, num_full,                              \
+                BOOL_TO_STR(expect_empty), BOOL_TO_STR(expect_full)))
+
+    assert_testIsBufferEmptyOrFull(expect_empty, boc.is_buffer_empty());
+    assert_testIsBufferEmptyOrFull(expect_full, boc.is_buffer_full());
+  }
+
+  static void testIsBufferEmptyOrFull() {
+    int bl = BufferingOopClosure::BufferLength;
+
+    testIsBufferEmptyOrFull(0,       0, true,  false);
+    testIsBufferEmptyOrFull(1,       0, false, false);
+    testIsBufferEmptyOrFull(0,       1, false, false);
+    testIsBufferEmptyOrFull(1,       1, false, false);
+    testIsBufferEmptyOrFull(10,      0, false, false);
+    testIsBufferEmptyOrFull(0,      10, false, false);
+    testIsBufferEmptyOrFull(10,     10, false, false);
+    testIsBufferEmptyOrFull(0,      bl, false, true);
+    testIsBufferEmptyOrFull(bl,      0, false, true);
+    testIsBufferEmptyOrFull(bl/2, bl/2, false, true);
+    testIsBufferEmptyOrFull(bl-1,    1, false, true);
+    testIsBufferEmptyOrFull(1,    bl-1, false, true);
+    // Processed
+    testIsBufferEmptyOrFull(bl+1,    0, false, false);
+    testIsBufferEmptyOrFull(bl*2,    0, false, true);
+  }
+
+  static void testEmptyAfterDone(int num_narrow, int num_full) {
+    FakeRoots fr(num_narrow, num_full);
+
+    DoNothingOopClosure cl;
+    BufferingOopClosure boc(&cl);
+
+    fr.oops_do(&boc, 0);
+
+    // Make sure all get processed.
+    boc.done();
+
+    assert(boc.is_buffer_empty(),
+        err_msg("Should be empty after call to done(). testEmptyAfterDone(%d, %d)",
+            num_narrow, num_full));
+  }
+
+  static void testEmptyAfterDone() {
+    int bl = BufferingOopClosure::BufferLength;
+
+    testEmptyAfterDone(0,       0);
+    testEmptyAfterDone(1,       0);
+    testEmptyAfterDone(0,       1);
+    testEmptyAfterDone(1,       1);
+    testEmptyAfterDone(10,      0);
+    testEmptyAfterDone(0,      10);
+    testEmptyAfterDone(10,     10);
+    testEmptyAfterDone(0,      bl);
+    testEmptyAfterDone(bl,      0);
+    testEmptyAfterDone(bl/2, bl/2);
+    testEmptyAfterDone(bl-1,    1);
+    testEmptyAfterDone(1,    bl-1);
+    // Processed
+    testEmptyAfterDone(bl+1,    0);
+    testEmptyAfterDone(bl*2,    0);
+  }
+
+  public:
+  static void test() {
+    testCount();
+    testIsBufferEmptyOrFull();
+    testEmptyAfterDone();
+  }
+};
+
+void TestBufferingOopClosure_test() {
+  TestBufferingOopClosure::test();
+}
+
+#endif
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp
--- a/src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,10 +25,10 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_BUFFERINGOOPCLOSURE_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_BUFFERINGOOPCLOSURE_HPP
 
-#include "memory/genOopClosures.hpp"
-#include "memory/generation.hpp"
+#include "memory/iterator.hpp"
+#include "oops/oopsHierarchy.hpp"
 #include "runtime/os.hpp"
-#include "utilities/taskqueue.hpp"
+#include "utilities/debug.hpp"
 
 // A BufferingOops closure tries to separate out the cost of finding roots
 // from the cost of applying closures to them.  It maintains an array of
@@ -41,60 +41,103 @@
 // The caller must be sure to call "done" to process any unprocessed
 // buffered entriess.
 
-class Generation;
-class HeapRegion;
-
 class BufferingOopClosure: public OopClosure {
+  friend class TestBufferingOopClosure;
 protected:
-  enum PrivateConstants {
-    BufferLength = 1024
-  };
+  static const size_t BufferLength = 1024;
 
-  StarTask  _buffer[BufferLength];
-  StarTask* _buffer_top;
-  StarTask* _buffer_curr;
+  // We need to know if the buffered addresses contain oops or narrowOops.
+  // We can't tag the addresses the way StarTask does, because we need to
+  // be able to handle unaligned addresses coming from oops embedded in code.
+  //
+  // The addresses for the full-sized oops are filled in from the bottom,
+  // while the addresses for the narrowOops are filled in from the top.
+  OopOrNarrowOopStar  _buffer[BufferLength];
+  OopOrNarrowOopStar* _oop_top;
+  OopOrNarrowOopStar* _narrowOop_bottom;
 
   OopClosure* _oc;
   double      _closure_app_seconds;
 
-  void process_buffer () {
+
+  bool is_buffer_empty() {
+    return _oop_top == _buffer && _narrowOop_bottom == (_buffer + BufferLength - 1);
+  }
+
+  bool is_buffer_full() {
+    return _narrowOop_bottom < _oop_top;
+  }
+
+  // Process addresses containing full-sized oops.
+  void process_oops() {
+    for (OopOrNarrowOopStar* curr = _buffer; curr < _oop_top; ++curr) {
+      _oc->do_oop((oop*)(*curr));
+    }
+    _oop_top = _buffer;
+  }
+
+  // Process addresses containing narrow oops.
+  void process_narrowOops() {
+    for (OopOrNarrowOopStar* curr = _buffer + BufferLength - 1; curr > _narrowOop_bottom; --curr) {
+      _oc->do_oop((narrowOop*)(*curr));
+    }
+    _narrowOop_bottom = _buffer + BufferLength - 1;
+  }
+
+  // Apply the closure to all oops and clear the buffer.
+  // Accumulate the time it took.
+  void process_buffer() {
     double start = os::elapsedTime();
-    for (StarTask* curr = _buffer; curr < _buffer_curr; ++curr) {
-      if (curr->is_narrow()) {
-        assert(UseCompressedOops, "Error");
-        _oc->do_oop((narrowOop*)(*curr));
-      } else {
-        _oc->do_oop((oop*)(*curr));
-      }
-    }
-    _buffer_curr = _buffer;
+
+    process_oops();
+    process_narrowOops();
+
     _closure_app_seconds += (os::elapsedTime() - start);
   }
 
-  template <class T> inline void do_oop_work(T* p) {
-    if (_buffer_curr == _buffer_top) {
+  void process_buffer_if_full() {
+    if (is_buffer_full()) {
       process_buffer();
     }
-    StarTask new_ref(p);
-    *_buffer_curr = new_ref;
-    ++_buffer_curr;
+  }
+
+  void add_narrowOop(narrowOop* p) {
+    assert(!is_buffer_full(), "Buffer should not be full");
+    *_narrowOop_bottom = (OopOrNarrowOopStar)p;
+    _narrowOop_bottom--;
+  }
+
+  void add_oop(oop* p) {
+    assert(!is_buffer_full(), "Buffer should not be full");
+    *_oop_top = (OopOrNarrowOopStar)p;
+    _oop_top++;
   }
 
 public:
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(oop* p)       { do_oop_work(p); }
+  virtual void do_oop(narrowOop* p) {
+    process_buffer_if_full();
+    add_narrowOop(p);
+  }
 
-  void done () {
-    if (_buffer_curr > _buffer) {
+  virtual void do_oop(oop* p)       {
+    process_buffer_if_full();
+    add_oop(p);
+  }
+
+  void done() {
+    if (!is_buffer_empty()) {
       process_buffer();
     }
   }
-  double closure_app_seconds () {
+
+  double closure_app_seconds() {
     return _closure_app_seconds;
   }
-  BufferingOopClosure (OopClosure *oc) :
+
+  BufferingOopClosure(OopClosure *oc) :
     _oc(oc),
-    _buffer_curr(_buffer), _buffer_top(_buffer + BufferLength),
+    _oop_top(_buffer),
+    _narrowOop_bottom(_buffer + BufferLength - 1),
     _closure_app_seconds(0.0) { }
 };
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -29,7 +29,7 @@
 #include "gc_implementation/g1/g1HotCardCache.hpp"
 #include "runtime/java.hpp"
 
-ConcurrentG1Refine::ConcurrentG1Refine(G1CollectedHeap* g1h) :
+ConcurrentG1Refine::ConcurrentG1Refine(G1CollectedHeap* g1h, CardTableEntryClosure* refine_closure) :
   _threads(NULL), _n_threads(0),
   _hot_card_cache(g1h)
 {
@@ -61,7 +61,7 @@
 
   ConcurrentG1RefineThread *next = NULL;
   for (uint i = _n_threads - 1; i != UINT_MAX; i--) {
-    ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);
+    ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, refine_closure, worker_id_offset, i);
     assert(t != NULL, "Conc refine should have been created");
     if (t->osthread() == NULL) {
         vm_shutdown_during_initialization("Could not create ConcurrentG1RefineThread");
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -71,7 +71,7 @@
   void reset_threshold_step();
 
  public:
-  ConcurrentG1Refine(G1CollectedHeap* g1h);
+  ConcurrentG1Refine(G1CollectedHeap* g1h, CardTableEntryClosure* refine_closure);
   ~ConcurrentG1Refine();
 
   void init(); // Accomplish some initialization that has to wait.
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp
--- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -33,8 +33,10 @@
 
 ConcurrentG1RefineThread::
 ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread *next,
+                         CardTableEntryClosure* refine_closure,
                          uint worker_id_offset, uint worker_id) :
   ConcurrentGCThread(),
+  _refine_closure(refine_closure),
   _worker_id_offset(worker_id_offset),
   _worker_id(worker_id),
   _active(false),
@@ -71,6 +73,7 @@
 }
 
 void ConcurrentG1RefineThread::sample_young_list_rs_lengths() {
+  SuspendibleThreadSetJoiner sts;
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   G1CollectorPolicy* g1p = g1h->g1_policy();
   if (g1p->adaptive_young_list_length()) {
@@ -82,8 +85,8 @@
 
       // we try to yield every time we visit 10 regions
       if (regions_visited == 10) {
-        if (_sts.should_yield()) {
-          _sts.yield("G1 refine");
+        if (sts.should_yield()) {
+          sts.yield();
           // we just abandon the iteration
           break;
         }
@@ -99,9 +102,7 @@
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   _vtime_start = os::elapsedVTime();
   while(!_should_terminate) {
-    _sts.join();
     sample_young_list_rs_lengths();
-    _sts.leave();
 
     if (os::supports_vtime()) {
       _vtime_accum = (os::elapsedVTime() - _vtime_start);
@@ -182,37 +183,37 @@
       break;
     }
 
-    _sts.join();
+    {
+      SuspendibleThreadSetJoiner sts;
+
+      do {
+        int curr_buffer_num = (int)dcqs.completed_buffers_num();
+        // If the number of the buffers falls down into the yellow zone,
+        // that means that the transition period after the evacuation pause has ended.
+        if (dcqs.completed_queue_padding() > 0 && curr_buffer_num <= cg1r()->yellow_zone()) {
+          dcqs.set_completed_queue_padding(0);
+        }
 
-    do {
-      int curr_buffer_num = (int)dcqs.completed_buffers_num();
-      // If the number of the buffers falls down into the yellow zone,
-      // that means that the transition period after the evacuation pause has ended.
-      if (dcqs.completed_queue_padding() > 0 && curr_buffer_num <= cg1r()->yellow_zone()) {
-        dcqs.set_completed_queue_padding(0);
-      }
+        if (_worker_id > 0 && curr_buffer_num <= _deactivation_threshold) {
+          // If the number of the buffer has fallen below our threshold
+          // we should deactivate. The predecessor will reactivate this
+          // thread should the number of the buffers cross the threshold again.
+          deactivate();
+          break;
+        }
 
-      if (_worker_id > 0 && curr_buffer_num <= _deactivation_threshold) {
-        // If the number of the buffer has fallen below our threshold
-        // we should deactivate. The predecessor will reactivate this
-        // thread should the number of the buffers cross the threshold again.
+        // Check if we need to activate the next thread.
+        if (_next != NULL && !_next->is_active() && curr_buffer_num > _next->_threshold) {
+          _next->activate();
+        }
+      } while (dcqs.apply_closure_to_completed_buffer(_refine_closure, _worker_id + _worker_id_offset, cg1r()->green_zone()));
+
+      // We can exit the loop above while being active if there was a yield request.
+      if (is_active()) {
         deactivate();
-        break;
       }
-
-      // Check if we need to activate the next thread.
-      if (_next != NULL && !_next->is_active() && curr_buffer_num > _next->_threshold) {
-        _next->activate();
-      }
-    } while (dcqs.apply_closure_to_completed_buffer(_worker_id + _worker_id_offset, cg1r()->green_zone()));
-
-    // We can exit the loop above while being active if there was a yield request.
-    if (is_active()) {
-      deactivate();
     }
 
-    _sts.leave();
-
     if (os::supports_vtime()) {
       _vtime_accum = (os::elapsedVTime() - _vtime_start);
     } else {
@@ -223,17 +224,6 @@
   terminate();
 }
 
-
-void ConcurrentG1RefineThread::yield() {
-  if (G1TraceConcRefinement) {
-    gclog_or_tty->print_cr("G1-Refine-yield");
-  }
-  _sts.yield("G1 refine");
-  if (G1TraceConcRefinement) {
-    gclog_or_tty->print_cr("G1-Refine-yield-end");
-  }
-}
-
 void ConcurrentG1RefineThread::stop() {
   // it is ok to take late safepoints here, if needed
   {
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp
--- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -28,6 +28,7 @@
 #include "gc_implementation/shared/concurrentGCThread.hpp"
 
 // Forward Decl.
+class CardTableEntryClosure;
 class ConcurrentG1Refine;
 
 // The G1 Concurrent Refinement Thread (could be several in the future).
@@ -49,6 +50,9 @@
   Monitor* _monitor;
   ConcurrentG1Refine* _cg1r;
 
+  // The closure applied to completed log buffers.
+  CardTableEntryClosure* _refine_closure;
+
   int _thread_threshold_step;
   // This thread activation threshold
   int _threshold;
@@ -64,13 +68,11 @@
   void activate();
   void deactivate();
 
-  // For use by G1CollectedHeap, which is a friend.
-  static SuspendibleThreadSet* sts() { return &_sts; }
-
 public:
   virtual void run();
   // Constructor
   ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread* next,
+                           CardTableEntryClosure* refine_closure,
                            uint worker_id_offset, uint worker_id);
 
   void initialize();
@@ -84,8 +86,6 @@
 
   ConcurrentG1Refine* cg1r() { return _cg1r;     }
 
-  // Yield for GC
-  void yield();
   // shutdown
   void stop();
 };
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/concurrentMark.cpp
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "classfile/symbolTable.hpp"
+#include "code/codeCache.hpp"
 #include "gc_implementation/g1/concurrentMark.inline.hpp"
 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
@@ -39,12 +40,14 @@
 #include "gc_implementation/shared/gcTimer.hpp"
 #include "gc_implementation/shared/gcTrace.hpp"
 #include "gc_implementation/shared/gcTraceTime.hpp"
+#include "memory/allocation.hpp"
 #include "memory/genOopClosures.inline.hpp"
 #include "memory/referencePolicy.hpp"
 #include "memory/resourceArea.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/java.hpp"
+#include "runtime/prefetch.inline.hpp"
 #include "services/memTracker.hpp"
 
 // Concurrent marking bit map wrapper
@@ -56,8 +59,8 @@
   _bmWordSize = 0;
 }
 
-HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
-                                               HeapWord* limit) const {
+HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
+                                               const HeapWord* limit) const {
   // First we must round addr *up* to a possible object boundary.
   addr = (HeapWord*)align_size_up((intptr_t)addr,
                                   HeapWordSize << _shifter);
@@ -74,8 +77,8 @@
   return nextAddr;
 }
 
-HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
-                                                 HeapWord* limit) const {
+HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
+                                                 const HeapWord* limit) const {
   size_t addrOffset = heapWordToOffset(addr);
   if (limit == NULL) {
     limit = _bmStartWord + _bmWordSize;
@@ -126,7 +129,7 @@
   }
   assert(_virtual_space.committed_size() == brs.size(),
          "didn't reserve backing store for all of concurrent marking bit map?");
-  _bm.set_map((uintptr_t*)_virtual_space.low());
+  _bm.set_map((BitMap::bm_word_t*)_virtual_space.low());
   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
          _bmWordSize, "inconsistency in bit map sizing");
   _bm.set_size(_bmWordSize >> _shifter);
@@ -510,6 +513,7 @@
   _has_overflown(false),
   _concurrent(false),
   _has_aborted(false),
+  _aborted_gc_id(GCId::undefined()),
   _restart_for_overflow(false),
   _concurrent_marking_in_progress(false),
 
@@ -886,6 +890,10 @@
   guarantee(!g1h->mark_in_progress(), "invariant");
 }
 
+bool ConcurrentMark::nextMarkBitmapIsClear() {
+  return _nextMarkBitMap->getNextMarkedWordAddress(_heap_start, _heap_end) == _heap_end;
+}
+
 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 public:
   bool doHeapRegion(HeapRegion* r) {
@@ -976,13 +984,13 @@
   }
 
   if (concurrent()) {
-    ConcurrentGCThread::stsLeave();
+    SuspendibleThreadSet::leave();
   }
 
   bool barrier_aborted = !_first_overflow_barrier_sync.enter();
 
   if (concurrent()) {
-    ConcurrentGCThread::stsJoin();
+    SuspendibleThreadSet::join();
   }
   // at this point everyone should have synced up and not be doing any
   // more work
@@ -1019,8 +1027,7 @@
       force_overflow()->update();
 
       if (G1Log::fine()) {
-        gclog_or_tty->date_stamp(PrintGCDateStamps);
-        gclog_or_tty->stamp(PrintGCTimeStamps);
+        gclog_or_tty->gclog_stamp(concurrent_gc_id());
         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
       }
     }
@@ -1036,13 +1043,13 @@
   }
 
   if (concurrent()) {
-    ConcurrentGCThread::stsLeave();
+    SuspendibleThreadSet::leave();
   }
 
   bool barrier_aborted = !_second_overflow_barrier_sync.enter();
 
   if (concurrent()) {
-    ConcurrentGCThread::stsJoin();
+    SuspendibleThreadSet::join();
   }
   // at this point everything should be re-initialized and ready to go
 
@@ -1094,7 +1101,7 @@
 
     double start_vtime = os::elapsedVTime();
 
-    ConcurrentGCThread::stsJoin();
+    SuspendibleThreadSet::join();
 
     assert(worker_id < _cm->active_tasks(), "invariant");
     CMTask* the_task = _cm->task(worker_id);
@@ -1121,9 +1128,9 @@
         if (!_cm->has_aborted() && the_task->has_aborted()) {
           sleep_time_ms =
             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
-          ConcurrentGCThread::stsLeave();
+          SuspendibleThreadSet::leave();
           os::sleep(Thread::current(), sleep_time_ms, false);
-          ConcurrentGCThread::stsJoin();
+          SuspendibleThreadSet::join();
         }
         double end_time2_sec = os::elapsedTime();
         double elapsed_time2_sec = end_time2_sec - start_time_sec;
@@ -1141,7 +1148,7 @@
     the_task->record_end_time();
     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 
-    ConcurrentGCThread::stsLeave();
+    SuspendibleThreadSet::leave();
 
     double end_vtime = os::elapsedVTime();
     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
@@ -1221,6 +1228,9 @@
 };
 
 void ConcurrentMark::scanRootRegions() {
+  // Start of concurrent marking.
+  ClassLoaderDataGraph::clear_claimed_marks();
+
   // scan_in_progress() will have been set to true only if there was
   // at least one root region to scan. So, if it's false, we
   // should not attempt to do any further work.
@@ -1269,7 +1279,7 @@
   CMConcurrentMarkingTask markingTask(this, cmThread());
   if (use_parallel_marking_threads()) {
     _parallel_workers->set_active_workers((int)active_workers);
-    // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
+    // Don't set _n_par_threads because it affects MT in process_roots()
     // and the decisions on that MT processing is made elsewhere.
     assert(_parallel_workers->active_workers() > 0, "Should have been set");
     _parallel_workers->run_task(&markingTask);
@@ -1300,6 +1310,7 @@
     Universe::verify(VerifyOption_G1UsePrevMarking,
                      " VerifyDuringGC:(before)");
   }
+  g1h->check_bitmaps("Remark Start");
 
   G1CollectorPolicy* g1p = g1h->g1_policy();
   g1p->record_concurrent_mark_remark_start();
@@ -1348,6 +1359,7 @@
       Universe::verify(VerifyOption_G1UseNextMarking,
                        " VerifyDuringGC:(after)");
     }
+    g1h->check_bitmaps("Remark End");
     assert(!restart_for_overflow(), "sanity");
     // Completely reset the marking state since marking completed
     set_non_marking_state();
@@ -1997,6 +2009,7 @@
     Universe::verify(VerifyOption_G1UsePrevMarking,
                      " VerifyDuringGC:(before)");
   }
+  g1h->check_bitmaps("Cleanup Start");
 
   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
   g1p->record_concurrent_mark_cleanup_start();
@@ -2034,8 +2047,8 @@
     // that calculated by walking the marking bitmap.
 
     // Bitmaps to hold expected values
-    BitMap expected_region_bm(_region_bm.size(), false);
-    BitMap expected_card_bm(_card_bm.size(), false);
+    BitMap expected_region_bm(_region_bm.size(), true);
+    BitMap expected_card_bm(_card_bm.size(), true);
 
     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
                                                  &_region_bm,
@@ -2137,22 +2150,30 @@
   // Update the soft reference policy with the new heap occupancy.
   Universe::update_heap_info_at_gc();
 
-  // We need to make this be a "collection" so any collection pause that
-  // races with it goes around and waits for completeCleanup to finish.
-  g1h->increment_total_collections();
-
-  // We reclaimed old regions so we should calculate the sizes to make
-  // sure we update the old gen/space data.
-  g1h->g1mm()->update_sizes();
-
   if (VerifyDuringGC) {
     HandleMark hm;  // handle scope
     Universe::heap()->prepare_for_verify();
     Universe::verify(VerifyOption_G1UsePrevMarking,
                      " VerifyDuringGC:(after)");
   }
+  g1h->check_bitmaps("Cleanup End");
 
   g1h->verify_region_sets_optional();
+
+  // We need to make this be a "collection" so any collection pause that
+  // races with it goes around and waits for completeCleanup to finish.
+  g1h->increment_total_collections();
+
+  // Clean out dead classes and update Metaspace sizes.
+  if (ClassUnloadingWithConcurrentMark) {
+    ClassLoaderDataGraph::purge();
+  }
+  MetaspaceGC::compute_new_size();
+
+  // We reclaimed old regions so we should calculate the sizes to make
+  // sure we update the old gen/space data.
+  g1h->g1mm()->update_sizes();
+
   g1h->trace_heap_after_concurrent_cycle();
 }
 
@@ -2382,6 +2403,8 @@
   }
 
   virtual void work(uint worker_id) {
+    ResourceMark rm;
+    HandleMark hm;
     CMTask* task = _cm->task(worker_id);
     G1CMIsAliveClosure g1_is_alive(_g1h);
     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
@@ -2439,6 +2462,26 @@
   _g1h->set_par_threads(0);
 }
 
+void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
+  G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
+}
+
+// Helper class to get rid of some boilerplate code.
+class G1RemarkGCTraceTime : public GCTraceTime {
+  static bool doit_and_prepend(bool doit) {
+    if (doit) {
+      gclog_or_tty->put(' ');
+    }
+    return doit;
+  }
+
+ public:
+  G1RemarkGCTraceTime(const char* title, bool doit)
+    : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
+        G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
+  }
+};
+
 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
   if (has_overflown()) {
     // Skip processing the discovered references if we have
@@ -2464,7 +2507,7 @@
     if (G1Log::finer()) {
       gclog_or_tty->put(' ');
     }
-    GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm());
+    GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id());
 
     ReferenceProcessor* rp = g1h->ref_processor_cm();
 
@@ -2521,7 +2564,8 @@
                                           &g1_keep_alive,
                                           &g1_drain_mark_stack,
                                           executor,
-                                          g1h->gc_timer_cm());
+                                          g1h->gc_timer_cm(),
+                                          concurrent_gc_id());
     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
 
     // The do_oop work routines of the keep_alive and drain_marking_stack
@@ -2550,9 +2594,31 @@
     return;
   }
 
-  g1h->unlink_string_and_symbol_table(&g1_is_alive,
-                                      /* process_strings */ false, // currently strings are always roots
-                                      /* process_symbols */ true);
+  assert(_markStack.isEmpty(), "Marking should have completed");
+
+  // Unload Klasses, String, Symbols, Code Cache, etc.
+  {
+    G1RemarkGCTraceTime trace("Unloading", G1Log::finer());
+
+    if (ClassUnloadingWithConcurrentMark) {
+      bool purged_classes;
+
+      {
+        G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest());
+        purged_classes = SystemDictionary::do_unloading(&g1_is_alive);
+      }
+
+      {
+        G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest());
+        weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
+      }
+    }
+
+    if (G1StringDedup::is_enabled()) {
+      G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest());
+      G1StringDedup::unlink(&g1_is_alive);
+    }
+  }
 }
 
 void ConcurrentMark::swapMarkBitMaps() {
@@ -2561,6 +2627,57 @@
   _nextMarkBitMap  = (CMBitMap*)  temp;
 }
 
+class CMObjectClosure;
+
+// Closure for iterating over objects, currently only used for
+// processing SATB buffers.
+class CMObjectClosure : public ObjectClosure {
+private:
+  CMTask* _task;
+
+public:
+  void do_object(oop obj) {
+    _task->deal_with_reference(obj);
+  }
+
+  CMObjectClosure(CMTask* task) : _task(task) { }
+};
+
+class G1RemarkThreadsClosure : public ThreadClosure {
+  CMObjectClosure _cm_obj;
+  G1CMOopClosure _cm_cl;
+  MarkingCodeBlobClosure _code_cl;
+  int _thread_parity;
+  bool _is_par;
+
+ public:
+  G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) :
+    _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
+    _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {}
+
+  void do_thread(Thread* thread) {
+    if (thread->is_Java_thread()) {
+      if (thread->claim_oops_do(_is_par, _thread_parity)) {
+        JavaThread* jt = (JavaThread*)thread;
+
+        // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
+        // however the liveness of oops reachable from nmethods have very complex lifecycles:
+        // * Alive if on the stack of an executing method
+        // * Weakly reachable otherwise
+        // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
+        // live by the SATB invariant but other oops recorded in nmethods may behave differently.
+        jt->nmethods_do(&_code_cl);
+
+        jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj);
+      }
+    } else if (thread->is_VM_thread()) {
+      if (thread->claim_oops_do(_is_par, _thread_parity)) {
+        JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj);
+      }
+    }
+  }
+};
+
 class CMRemarkTask: public AbstractGangTask {
 private:
   ConcurrentMark* _cm;
@@ -2572,6 +2689,14 @@
     if (worker_id < _cm->active_tasks()) {
       CMTask* task = _cm->task(worker_id);
       task->record_start_time();
+      {
+        ResourceMark rm;
+        HandleMark hm;
+
+        G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial);
+        Threads::threads_do(&threads_f);
+      }
+
       do {
         task->do_marking_step(1000000000.0 /* something very large */,
                               true         /* do_termination       */,
@@ -2594,6 +2719,8 @@
   HandleMark   hm;
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 
+  G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer());
+
   g1h->ensure_parsability(false);
 
   if (G1CollectedHeap::use_parallel_gc_threads()) {
@@ -3241,8 +3368,14 @@
 
 // abandon current marking iteration due to a Full GC
 void ConcurrentMark::abort() {
-  // Clear all marks to force marking thread to do nothing
+  // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
+  // concurrent bitmap clearing.
   _nextMarkBitMap->clearAll();
+
+  // Note we cannot clear the previous marking bitmap here
+  // since VerifyDuringGC verifies the objects marked during
+  // a full GC against the previous bitmap.
+
   // Clear the liveness counting data
   clear_all_count_data();
   // Empty mark stack
@@ -3252,6 +3385,12 @@
   }
   _first_overflow_barrier_sync.abort();
   _second_overflow_barrier_sync.abort();
+  const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
+  if (!gc_id.is_undefined()) {
+    // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
+    // to detect that it was aborted. Only keep track of the first GC id that we aborted.
+    _aborted_gc_id = gc_id;
+   }
   _has_aborted = true;
 
   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
@@ -3266,6 +3405,13 @@
   _g1h->register_concurrent_cycle_end();
 }
 
+const GCId& ConcurrentMark::concurrent_gc_id() {
+  if (has_aborted()) {
+    return _aborted_gc_id;
+  }
+  return _g1h->gc_tracer_cm()->gc_id();
+}
+
 static void print_ms_time_info(const char* prefix, const char* name,
                                NumberSeq& ns) {
   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
@@ -3322,21 +3468,17 @@
 
 // We take a break if someone is trying to stop the world.
 bool ConcurrentMark::do_yield_check(uint worker_id) {
-  if (should_yield()) {
+  if (SuspendibleThreadSet::should_yield()) {
     if (worker_id == 0) {
       _g1h->g1_policy()->record_concurrent_pause();
     }
-    cmThread()->yield();
+    SuspendibleThreadSet::yield();
     return true;
   } else {
     return false;
   }
 }
 
-bool ConcurrentMark::should_yield() {
-  return cmThread()->should_yield();
-}
-
 bool ConcurrentMark::containing_card_is_marked(void* p) {
   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
@@ -3410,20 +3552,6 @@
   }
 };
 
-// Closure for iterating over objects, currently only used for
-// processing SATB buffers.
-class CMObjectClosure : public ObjectClosure {
-private:
-  CMTask* _task;
-
-public:
-  void do_object(oop obj) {
-    _task->deal_with_reference(obj);
-  }
-
-  CMObjectClosure(CMTask* task) : _task(task) { }
-};
-
 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
                                ConcurrentMark* cm,
                                CMTask* task)
@@ -3625,7 +3753,7 @@
 #endif // _MARKING_STATS_
 
   // (4) We check whether we should yield. If we have to, then we abort.
-  if (_cm->should_yield()) {
+  if (SuspendibleThreadSet::should_yield()) {
     // We should yield. To do this we abort the task. The caller is
     // responsible for yielding.
     set_has_aborted();
@@ -3889,15 +4017,6 @@
     }
   }
 
-  if (!concurrent() && !has_aborted()) {
-    // We should only do this during remark.
-    if (G1CollectedHeap::use_parallel_gc_threads()) {
-      satb_mq_set.par_iterate_closure_all_threads(_worker_id);
-    } else {
-      satb_mq_set.iterate_closure_all_threads();
-    }
-  }
-
   _draining_satb_buffers = false;
 
   assert(has_aborted() ||
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/concurrentMark.hpp
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -25,7 +25,9 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP
 
+#include "classfile/javaClasses.hpp"
 #include "gc_implementation/g1/heapRegionSet.hpp"
+#include "gc_implementation/shared/gcId.hpp"
 #include "utilities/taskqueue.hpp"
 
 class G1CollectedHeap;
@@ -85,19 +87,19 @@
   // Return the address corresponding to the next marked bit at or after
   // "addr", and before "limit", if "limit" is non-NULL.  If there is no
   // such bit, returns "limit" if that is non-NULL, or else "endWord()".
-  HeapWord* getNextMarkedWordAddress(HeapWord* addr,
-                                     HeapWord* limit = NULL) const;
+  HeapWord* getNextMarkedWordAddress(const HeapWord* addr,
+                                     const HeapWord* limit = NULL) const;
   // Return the address corresponding to the next unmarked bit at or after
   // "addr", and before "limit", if "limit" is non-NULL.  If there is no
   // such bit, returns "limit" if that is non-NULL, or else "endWord()".
-  HeapWord* getNextUnmarkedWordAddress(HeapWord* addr,
-                                       HeapWord* limit = NULL) const;
+  HeapWord* getNextUnmarkedWordAddress(const HeapWord* addr,
+                                       const HeapWord* limit = NULL) const;
 
   // conversion utilities
   HeapWord* offsetToHeapWord(size_t offset) const {
     return _bmStartWord + (offset << _shifter);
   }
-  size_t heapWordToOffset(HeapWord* addr) const {
+  size_t heapWordToOffset(const HeapWord* addr) const {
     return pointer_delta(addr, _bmStartWord) >> _shifter;
   }
   int heapWordDiffToOffsetDiff(size_t diff) const;
@@ -444,6 +446,7 @@
   volatile bool           _concurrent;
   // set at the end of a Full GC so that marking aborts
   volatile bool           _has_aborted;
+  GCId                    _aborted_gc_id;
 
   // used when remark aborts due to an overflow to indicate that
   // another concurrent marking phase should start
@@ -474,6 +477,7 @@
   ForceOverflowSettings _force_overflow_conc;
   ForceOverflowSettings _force_overflow_stw;
 
+  void weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes);
   void weakRefsWork(bool clear_all_soft_refs);
 
   void swapMarkBitMaps();
@@ -732,6 +736,9 @@
   // Clear the next marking bitmap (will be called concurrently).
   void clearNextBitmap();
 
+  // Return whether the next mark bitmap has no marks set.
+  bool nextMarkBitmapIsClear();
+
   // These two do the work that needs to be done before and after the
   // initial root checkpoint. Since this checkpoint can be done at two
   // different points (i.e. an explicit pause or piggy-backed on a
@@ -818,13 +825,14 @@
   }
 
   inline bool do_yield_check(uint worker_i = 0);
-  inline bool should_yield();
 
   // Called to abort the marking cycle after a Full GC takes palce.
   void abort();
 
   bool has_aborted()      { return _has_aborted; }
 
+  const GCId& concurrent_gc_id();
+
   // This prints the global/local fingers. It is used for debugging.
   NOT_PRODUCT(void print_finger();)
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp
--- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1,4 +1,4 @@
-/*
+ /*
  * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -29,6 +29,7 @@
 #include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/g1MMUTracker.hpp"
 #include "gc_implementation/g1/vm_operations_g1.hpp"
+#include "gc_implementation/shared/gcTrace.hpp"
 #include "memory/resourceArea.hpp"
 #include "runtime/vmThread.hpp"
 
@@ -109,8 +110,7 @@
       double scan_start = os::elapsedTime();
       if (!cm()->has_aborted()) {
         if (G1Log::fine()) {
-          gclog_or_tty->date_stamp(PrintGCDateStamps);
-          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->gclog_stamp(cm()->concurrent_gc_id());
           gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]");
         }
 
@@ -118,8 +118,7 @@
 
         double scan_end = os::elapsedTime();
         if (G1Log::fine()) {
-          gclog_or_tty->date_stamp(PrintGCDateStamps);
-          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->gclog_stamp(cm()->concurrent_gc_id());
           gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]",
                                  scan_end - scan_start);
         }
@@ -127,8 +126,7 @@
 
       double mark_start_sec = os::elapsedTime();
       if (G1Log::fine()) {
-        gclog_or_tty->date_stamp(PrintGCDateStamps);
-        gclog_or_tty->stamp(PrintGCTimeStamps);
+        gclog_or_tty->gclog_stamp(cm()->concurrent_gc_id());
         gclog_or_tty->print_cr("[GC concurrent-mark-start]");
       }
 
@@ -151,8 +149,7 @@
           }
 
           if (G1Log::fine()) {
-            gclog_or_tty->date_stamp(PrintGCDateStamps);
-            gclog_or_tty->stamp(PrintGCTimeStamps);
+            gclog_or_tty->gclog_stamp(cm()->concurrent_gc_id());
             gclog_or_tty->print_cr("[GC concurrent-mark-end, %1.7lf secs]",
                                       mark_end_sec - mark_start_sec);
           }
@@ -167,8 +164,7 @@
                                    "in remark (restart #%d).", iter);
           }
           if (G1Log::fine()) {
-            gclog_or_tty->date_stamp(PrintGCDateStamps);
-            gclog_or_tty->stamp(PrintGCTimeStamps);
+            gclog_or_tty->gclog_stamp(cm()->concurrent_gc_id());
             gclog_or_tty->print_cr("[GC concurrent-mark-restart-for-overflow]");
           }
         }
@@ -194,9 +190,8 @@
       } else {
         // We don't want to update the marking status if a GC pause
         // is already underway.
-        _sts.join();
+        SuspendibleThreadSetJoiner sts;
         g1h->set_marking_complete();
-        _sts.leave();
       }
 
       // Check if cleanup set the free_regions_coming flag. If it
@@ -212,8 +207,7 @@
 
         double cleanup_start_sec = os::elapsedTime();
         if (G1Log::fine()) {
-          gclog_or_tty->date_stamp(PrintGCDateStamps);
-          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->gclog_stamp(cm()->concurrent_gc_id());
           gclog_or_tty->print_cr("[GC concurrent-cleanup-start]");
         }
 
@@ -233,8 +227,7 @@
 
         double cleanup_end_sec = os::elapsedTime();
         if (G1Log::fine()) {
-          gclog_or_tty->date_stamp(PrintGCDateStamps);
-          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->gclog_stamp(cm()->concurrent_gc_id());
           gclog_or_tty->print_cr("[GC concurrent-cleanup-end, %1.7lf secs]",
                                  cleanup_end_sec - cleanup_start_sec);
         }
@@ -266,46 +259,47 @@
       // record_concurrent_mark_cleanup_completed() (and, in fact, it's
       // not needed any more as the concurrent mark state has been
       // already reset).
-      _sts.join();
-      if (!cm()->has_aborted()) {
-        g1_policy->record_concurrent_mark_cleanup_completed();
+      {
+        SuspendibleThreadSetJoiner sts;
+        if (!cm()->has_aborted()) {
+          g1_policy->record_concurrent_mark_cleanup_completed();
+        }
       }
-      _sts.leave();
 
       if (cm()->has_aborted()) {
         if (G1Log::fine()) {
-          gclog_or_tty->date_stamp(PrintGCDateStamps);
-          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->gclog_stamp(cm()->concurrent_gc_id());
           gclog_or_tty->print_cr("[GC concurrent-mark-abort]");
         }
       }
 
       // We now want to allow clearing of the marking bitmap to be
       // suspended by a collection pause.
-      _sts.join();
-      _cm->clearNextBitmap();
-      _sts.leave();
+      // We may have aborted just before the remark. Do not bother clearing the
+      // bitmap then, as it has been done during mark abort.
+      if (!cm()->has_aborted()) {
+        SuspendibleThreadSetJoiner sts;
+        _cm->clearNextBitmap();
+      } else {
+        assert(!G1VerifyBitmaps || _cm->nextMarkBitmapIsClear(), "Next mark bitmap must be clear");
+      }
     }
 
     // Update the number of full collections that have been
     // completed. This will also notify the FullGCCount_lock in case a
     // Java thread is waiting for a full GC to happen (e.g., it
     // called System.gc() with +ExplicitGCInvokesConcurrent).
-    _sts.join();
-    g1h->increment_old_marking_cycles_completed(true /* concurrent */);
-    g1h->register_concurrent_cycle_end();
-    _sts.leave();
+    {
+      SuspendibleThreadSetJoiner sts;
+      g1h->increment_old_marking_cycles_completed(true /* concurrent */);
+      g1h->register_concurrent_cycle_end();
+    }
   }
   assert(_should_terminate, "just checking");
 
   terminate();
 }
 
-
-void ConcurrentMarkThread::yield() {
-  _sts.yield("Concurrent Mark");
-}
-
 void ConcurrentMarkThread::stop() {
   {
     MutexLockerEx ml(Terminator_lock);
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp
--- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -89,9 +89,6 @@
   // that started() is set and set in_progress().
   bool during_cycle()      { return started() || in_progress(); }
 
-  // Yield for GC
-  void            yield();
-
   // shutdown
   void stop();
 };
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp
--- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -70,7 +70,7 @@
 
 DirtyCardQueueSet::DirtyCardQueueSet(bool notify_when_complete) :
   PtrQueueSet(notify_when_complete),
-  _closure(NULL),
+  _mut_process_closure(NULL),
   _shared_dirty_card_queue(this, true /*perm*/),
   _free_ids(NULL),
   _processed_buffers_mut(0), _processed_buffers_rs_thread(0)
@@ -83,10 +83,11 @@
   return (uint)os::processor_count();
 }
 
-void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
+void DirtyCardQueueSet::initialize(CardTableEntryClosure* cl, Monitor* cbl_mon, Mutex* fl_lock,
                                    int process_completed_threshold,
                                    int max_completed_queue,
                                    Mutex* lock, PtrQueueSet* fl_owner) {
+  _mut_process_closure = cl;
   PtrQueueSet::initialize(cbl_mon, fl_lock, process_completed_threshold,
                           max_completed_queue, fl_owner);
   set_buffer_size(G1UpdateBufferSize);
@@ -98,18 +99,15 @@
   t->dirty_card_queue().handle_zero_index();
 }
 
-void DirtyCardQueueSet::set_closure(CardTableEntryClosure* closure) {
-  _closure = closure;
-}
-
-void DirtyCardQueueSet::iterate_closure_all_threads(bool consume,
+void DirtyCardQueueSet::iterate_closure_all_threads(CardTableEntryClosure* cl,
+                                                    bool consume,
                                                     uint worker_i) {
   assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
   for(JavaThread* t = Threads::first(); t; t = t->next()) {
-    bool b = t->dirty_card_queue().apply_closure(_closure, consume);
+    bool b = t->dirty_card_queue().apply_closure(cl, consume);
     guarantee(b, "Should not be interrupted.");
   }
-  bool b = shared_dirty_card_queue()->apply_closure(_closure,
+  bool b = shared_dirty_card_queue()->apply_closure(cl,
                                                     consume,
                                                     worker_i);
   guarantee(b, "Should not be interrupted.");
@@ -143,7 +141,7 @@
 
   bool b = false;
   if (worker_i != UINT_MAX) {
-    b = DirtyCardQueue::apply_closure_to_buffer(_closure, buf, 0,
+    b = DirtyCardQueue::apply_closure_to_buffer(_mut_process_closure, buf, 0,
                                                 _sz, true, worker_i);
     if (b) Atomic::inc(&_processed_buffers_mut);
 
@@ -218,18 +216,11 @@
   return res;
 }
 
-bool DirtyCardQueueSet::apply_closure_to_completed_buffer(uint worker_i,
-                                                          int stop_at,
-                                                          bool during_pause) {
-  return apply_closure_to_completed_buffer(_closure, worker_i,
-                                           stop_at, during_pause);
-}
-
-void DirtyCardQueueSet::apply_closure_to_all_completed_buffers() {
+void DirtyCardQueueSet::apply_closure_to_all_completed_buffers(CardTableEntryClosure* cl) {
   BufferNode* nd = _completed_buffers_head;
   while (nd != NULL) {
     bool b =
-      DirtyCardQueue::apply_closure_to_buffer(_closure,
+      DirtyCardQueue::apply_closure_to_buffer(cl,
                                               BufferNode::make_buffer_from_node(nd),
                                               0, _sz, false);
     guarantee(b, "Should not stop early.");
@@ -237,6 +228,24 @@
   }
 }
 
+void DirtyCardQueueSet::par_apply_closure_to_all_completed_buffers(CardTableEntryClosure* cl) {
+  BufferNode* nd = _cur_par_buffer_node;
+  while (nd != NULL) {
+    BufferNode* next = (BufferNode*)nd->next();
+    BufferNode* actual = (BufferNode*)Atomic::cmpxchg_ptr((void*)next, (volatile void*)&_cur_par_buffer_node, (void*)nd);
+    if (actual == nd) {
+      bool b =
+        DirtyCardQueue::apply_closure_to_buffer(cl,
+                                                BufferNode::make_buffer_from_node(actual),
+                                                0, _sz, false);
+      guarantee(b, "Should not stop early.");
+      nd = next;
+    } else {
+      nd = actual;
+    }
+  }
+}
+
 // Deallocates any completed log buffers
 void DirtyCardQueueSet::clear() {
   BufferNode* buffers_to_delete = NULL;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp
--- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -73,7 +73,8 @@
 
 
 class DirtyCardQueueSet: public PtrQueueSet {
-  CardTableEntryClosure* _closure;
+  // The closure used in mut_process_buffer().
+  CardTableEntryClosure* _mut_process_closure;
 
   DirtyCardQueue _shared_dirty_card_queue;
 
@@ -88,10 +89,12 @@
   jint _processed_buffers_mut;
   jint _processed_buffers_rs_thread;
 
+  // Current buffer node used for parallel iteration.
+  BufferNode* volatile _cur_par_buffer_node;
 public:
   DirtyCardQueueSet(bool notify_when_complete = true);
 
-  void initialize(Monitor* cbl_mon, Mutex* fl_lock,
+  void initialize(CardTableEntryClosure* cl, Monitor* cbl_mon, Mutex* fl_lock,
                   int process_completed_threshold,
                   int max_completed_queue,
                   Mutex* lock, PtrQueueSet* fl_owner = NULL);
@@ -102,33 +105,15 @@
 
   static void handle_zero_index_for_thread(JavaThread* t);
 
-  // Register "blk" as "the closure" for all queues.  Only one such closure
-  // is allowed.  The "apply_closure_to_completed_buffer" method will apply
-  // this closure to a completed buffer, and "iterate_closure_all_threads"
-  // applies it to partially-filled buffers (the latter should only be done
-  // with the world stopped).
-  void set_closure(CardTableEntryClosure* closure);
-
-  // If there is a registered closure for buffers, apply it to all entries
-  // in all currently-active buffers.  This should only be applied at a
-  // safepoint.  (Currently must not be called in parallel; this should
-  // change in the future.)  If "consume" is true, processed entries are
-  // discarded.
-  void iterate_closure_all_threads(bool consume = true,
+  // Apply the given closure to all entries in all currently-active buffers.
+  // This should only be applied at a safepoint. (Currently must not be called
+  // in parallel; this should change in the future.)  If "consume" is true,
+  // processed entries are discarded.
+  void iterate_closure_all_threads(CardTableEntryClosure* cl,
+                                   bool consume = true,
                                    uint worker_i = 0);
 
   // If there exists some completed buffer, pop it, then apply the
-  // registered closure to all its elements, nulling out those elements
-  // processed.  If all elements are processed, returns "true".  If no
-  // completed buffers exist, returns false.  If a completed buffer exists,
-  // but is only partially completed before a "yield" happens, the
-  // partially completed buffer (with its processed elements set to NULL)
-  // is returned to the completed buffer set, and this call returns false.
-  bool apply_closure_to_completed_buffer(uint worker_i = 0,
-                                         int stop_at = 0,
-                                         bool during_pause = false);
-
-  // If there exists some completed buffer, pop it, then apply the
   // specified closure to all its elements, nulling out those elements
   // processed.  If all elements are processed, returns "true".  If no
   // completed buffers exist, returns false.  If a completed buffer exists,
@@ -149,7 +134,12 @@
 
   // Applies the current closure to all completed buffers,
   // non-consumptively.
-  void apply_closure_to_all_completed_buffers();
+  void apply_closure_to_all_completed_buffers(CardTableEntryClosure* cl);
+
+  void reset_for_par_iteration() { _cur_par_buffer_node = _completed_buffers_head; }
+  // Applies the current closure to all completed buffers, non-consumptively.
+  // Parallel version.
+  void par_apply_closure_to_all_completed_buffers(CardTableEntryClosure* cl);
 
   DirtyCardQueue* shared_dirty_card_queue() {
     return &_shared_dirty_card_queue;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1AllocRegion.cpp
--- a/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "gc_implementation/g1/g1AllocRegion.inline.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "runtime/orderAccess.inline.hpp"
 
 G1CollectedHeap* G1AllocRegion::_g1h = NULL;
 HeapRegion* G1AllocRegion::_dummy_region = NULL;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1AllocRegion.inline.hpp
--- a/src/share/vm/gc_implementation/g1/g1AllocRegion.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1AllocRegion.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1ALLOCREGION_INLINE_HPP
 
 #include "gc_implementation/g1/g1AllocRegion.hpp"
+#include "gc_implementation/g1/heapRegion.inline.hpp"
 
 inline HeapWord* G1AllocRegion::allocate(HeapRegion* alloc_region,
                                          size_t word_size,
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp
--- a/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
 #include "memory/space.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/java.hpp"
@@ -98,6 +99,20 @@
   return (delta & right_n_bits(LogN_words)) == (size_t)NoBits;
 }
 
+void G1BlockOffsetSharedArray::set_offset_array(HeapWord* left, HeapWord* right, u_char offset) {
+  check_index(index_for(right - 1), "right address out of range");
+  assert(left  < right, "Heap addresses out of order");
+  size_t num_cards = pointer_delta(right, left) >> LogN_words;
+  if (UseMemSetInBOT) {
+    memset(&_offset_array[index_for(left)], offset, num_cards);
+  } else {
+    size_t i = index_for(left);
+    const size_t end = i + num_cards;
+    for (; i < end; i++) {
+      _offset_array[i] = offset;
+    }
+  }
+}
 
 //////////////////////////////////////////////////////////////////////
 // G1BlockOffsetArray
@@ -107,7 +122,7 @@
                                        MemRegion mr, bool init_to_zero) :
   G1BlockOffsetTable(mr.start(), mr.end()),
   _unallocated_block(_bottom),
-  _array(array), _csp(NULL),
+  _array(array), _gsp(NULL),
   _init_to_zero(init_to_zero) {
   assert(_bottom <= _end, "arguments out of order");
   if (!_init_to_zero) {
@@ -117,9 +132,8 @@
   }
 }
 
-void G1BlockOffsetArray::set_space(Space* sp) {
-  _sp = sp;
-  _csp = sp->toContiguousSpace();
+void G1BlockOffsetArray::set_space(G1OffsetTableContigSpace* sp) {
+  _gsp = sp;
 }
 
 // The arguments follow the normal convention of denoting
@@ -378,7 +392,7 @@
   }
   // Otherwise, find the block start using the table.
   HeapWord* q = block_at_or_preceding(addr, false, 0);
-  HeapWord* n = q + _sp->block_size(q);
+  HeapWord* n = q + block_size(q);
   return forward_to_block_containing_addr_const(q, n, addr);
 }
 
@@ -406,31 +420,17 @@
          err_msg("next_boundary is beyond the end of the covered region "
                  " next_boundary " PTR_FORMAT " _array->_end " PTR_FORMAT,
                  next_boundary, _array->_end));
-  if (csp() != NULL) {
-    if (addr >= csp()->top()) return csp()->top();
-    while (next_boundary < addr) {
-      while (n <= next_boundary) {
-        q = n;
-        oop obj = oop(q);
-        if (obj->klass_or_null() == NULL) return q;
-        n += obj->size();
-      }
-      assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
-      // [q, n) is the block that crosses the boundary.
-      alloc_block_work2(&next_boundary, &next_index, q, n);
+  if (addr >= gsp()->top()) return gsp()->top();
+  while (next_boundary < addr) {
+    while (n <= next_boundary) {
+      q = n;
+      oop obj = oop(q);
+      if (obj->klass_or_null() == NULL) return q;
+      n += block_size(q);
     }
-  } else {
-    while (next_boundary < addr) {
-      while (n <= next_boundary) {
-        q = n;
-        oop obj = oop(q);
-        if (obj->klass_or_null() == NULL) return q;
-        n += _sp->block_size(q);
-      }
-      assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
-      // [q, n) is the block that crosses the boundary.
-      alloc_block_work2(&next_boundary, &next_index, q, n);
-    }
+    assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
+    // [q, n) is the block that crosses the boundary.
+    alloc_block_work2(&next_boundary, &next_index, q, n);
   }
   return forward_to_block_containing_addr_const(q, n, addr);
 }
@@ -638,7 +638,7 @@
   assert(_bottom <= addr && addr < _end,
          "addr must be covered by this Array");
   HeapWord* q = block_at_or_preceding(addr, true, _next_offset_index-1);
-  HeapWord* n = q + _sp->block_size(q);
+  HeapWord* n = q + block_size(q);
   return forward_to_block_containing_addr_const(q, n, addr);
 }
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp
--- a/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -52,8 +52,8 @@
 // consolidation.
 
 // Forward declarations
-class ContiguousSpace;
 class G1BlockOffsetSharedArray;
+class G1OffsetTableContigSpace;
 
 class G1BlockOffsetTable VALUE_OBJ_CLASS_SPEC {
   friend class VMStructs;
@@ -157,6 +157,8 @@
     return _offset_array[index];
   }
 
+  void set_offset_array(HeapWord* left, HeapWord* right, u_char offset);
+
   void set_offset_array(size_t index, u_char offset) {
     check_index(index, "index out of range");
     check_offset(offset, "offset too large");
@@ -170,21 +172,6 @@
     _offset_array[index] = (u_char) pointer_delta(high, low);
   }
 
-  void set_offset_array(HeapWord* left, HeapWord* right, u_char offset) {
-    check_index(index_for(right - 1), "right address out of range");
-    assert(left  < right, "Heap addresses out of order");
-    size_t num_cards = pointer_delta(right, left) >> LogN_words;
-    if (UseMemSetInBOT) {
-      memset(&_offset_array[index_for(left)], offset, num_cards);
-    } else {
-      size_t i = index_for(left);
-      const size_t end = i + num_cards;
-      for (; i < end; i++) {
-        _offset_array[i] = offset;
-      }
-    }
-  }
-
   void set_offset_array(size_t left, size_t right, u_char offset) {
     check_index(right, "right index out of range");
     assert(left <= right, "indexes out of order");
@@ -281,11 +268,7 @@
   G1BlockOffsetSharedArray* _array;
 
   // The space that owns this subregion.
-  Space* _sp;
-
-  // If "_sp" is a contiguous space, the field below is the view of "_sp"
-  // as a contiguous space, else NULL.
-  ContiguousSpace* _csp;
+  G1OffsetTableContigSpace* _gsp;
 
   // If true, array entries are initialized to 0; otherwise, they are
   // initialized to point backwards to the beginning of the covered region.
@@ -310,7 +293,9 @@
 
 protected:
 
-  ContiguousSpace* csp() const { return _csp; }
+  G1OffsetTableContigSpace* gsp() const { return _gsp; }
+
+  inline size_t block_size(const HeapWord* p) const;
 
   // Returns the address of a block whose start is at most "addr".
   // If "has_max_index" is true, "assumes "max_index" is the last valid one
@@ -363,7 +348,7 @@
   // "this" to be passed as a parameter to a member constructor for
   // the containing concrete subtype of Space.
   // This would be legal C++, but MS VC++ doesn't allow it.
-  void set_space(Space* sp);
+  void set_space(G1OffsetTableContigSpace* sp);
 
   // Resets the covered region to the given "mr".
   void set_region(MemRegion mr);
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp
--- a/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -26,6 +26,8 @@
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1BLOCKOFFSETTABLE_INLINE_HPP
 
 #include "gc_implementation/g1/g1BlockOffsetTable.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/heapRegion.inline.hpp"
 #include "memory/space.hpp"
 
 inline HeapWord* G1BlockOffsetTable::block_start(const void* addr) {
@@ -69,6 +71,11 @@
   return result;
 }
 
+inline size_t
+G1BlockOffsetArray::block_size(const HeapWord* p) const {
+  return gsp()->block_size(p);
+}
+
 inline HeapWord*
 G1BlockOffsetArray::block_at_or_preceding(const void* addr,
                                           bool has_max_index,
@@ -88,7 +95,7 @@
     // to go back by.
     size_t n_cards_back = BlockOffsetArray::entry_to_cards_back(offset);
     q -= (N_words * n_cards_back);
-    assert(q >= _sp->bottom(), "Went below bottom!");
+    assert(q >= gsp()->bottom(), "Went below bottom!");
     index -= n_cards_back;
     offset = _array->offset_array(index);
   }
@@ -101,21 +108,12 @@
 G1BlockOffsetArray::
 forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n,
                                        const void* addr) const {
-  if (csp() != NULL) {
-    if (addr >= csp()->top()) return csp()->top();
-    while (n <= addr) {
-      q = n;
-      oop obj = oop(q);
-      if (obj->klass_or_null() == NULL) return q;
-      n += obj->size();
-    }
-  } else {
-    while (n <= addr) {
-      q = n;
-      oop obj = oop(q);
-      if (obj->klass_or_null() == NULL) return q;
-      n += _sp->block_size(q);
-    }
+  if (addr >= gsp()->top()) return gsp()->top();
+  while (n <= addr) {
+    q = n;
+    oop obj = oop(q);
+    if (obj->klass_or_null() == NULL) return q;
+    n += block_size(q);
   }
   assert(q <= n, "wrong order for q and addr");
   assert(addr < n, "wrong order for addr and n");
@@ -126,7 +124,7 @@
 G1BlockOffsetArray::forward_to_block_containing_addr(HeapWord* q,
                                                      const void* addr) {
   if (oop(q)->klass_or_null() == NULL) return q;
-  HeapWord* n = q + _sp->block_size(q);
+  HeapWord* n = q + block_size(q);
   // In the normal case, where the query "addr" is a card boundary, and the
   // offset table chunks are the same size as cards, the block starting at
   // "q" will contain addr, so the test below will fail, and we'll fall
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.cpp
--- a/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -30,49 +30,73 @@
 
 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
 
-G1CodeRootChunk::G1CodeRootChunk() : _top(NULL), _next(NULL), _prev(NULL) {
+G1CodeRootChunk::G1CodeRootChunk() : _top(NULL), _next(NULL), _prev(NULL), _free(NULL) {
   _top = bottom();
 }
 
 void G1CodeRootChunk::reset() {
   _next = _prev = NULL;
+  _free = NULL;
   _top = bottom();
 }
 
 void G1CodeRootChunk::nmethods_do(CodeBlobClosure* cl) {
-  nmethod** cur = bottom();
+  NmethodOrLink* cur = bottom();
   while (cur != _top) {
-    cl->do_code_blob(*cur);
+    if (is_nmethod(cur)) {
+      cl->do_code_blob(cur->_nmethod);
+    }
     cur++;
   }
 }
 
-FreeList<G1CodeRootChunk> G1CodeRootSet::_free_list;
-size_t G1CodeRootSet::_num_chunks_handed_out = 0;
+bool G1CodeRootChunk::remove_lock_free(nmethod* method) {
+  NmethodOrLink* cur = bottom();
+
+  for (NmethodOrLink* cur = bottom(); cur != _top; cur++) {
+    if (cur->_nmethod == method) {
+      bool result = Atomic::cmpxchg_ptr(NULL, &cur->_nmethod, method) == method;
+
+      if (!result) {
+        // Someone else cleared out this entry.
+        return false;
+      }
 
-G1CodeRootChunk* G1CodeRootSet::new_chunk() {
-  G1CodeRootChunk* result = _free_list.get_chunk_at_head();
-  if (result == NULL) {
-    result = new G1CodeRootChunk();
+      // The method was cleared. Time to link it into the free list.
+      NmethodOrLink* prev_free;
+      do {
+        prev_free = (NmethodOrLink*)_free;
+        cur->_link = prev_free;
+      } while (Atomic::cmpxchg_ptr(cur, &_free, prev_free) != prev_free);
+
+      return true;
+    }
   }
-  G1CodeRootSet::_num_chunks_handed_out++;
-  result->reset();
-  return result;
+
+  return false;
 }
 
-void G1CodeRootSet::free_chunk(G1CodeRootChunk* chunk) {
-  _free_list.return_chunk_at_head(chunk);
-  G1CodeRootSet::_num_chunks_handed_out--;
+G1CodeRootChunkManager::G1CodeRootChunkManager() : _free_list(), _num_chunks_handed_out(0) {
+  _free_list.initialize();
+  _free_list.set_size(G1CodeRootChunk::word_size());
 }
 
-void G1CodeRootSet::free_all_chunks(FreeList<G1CodeRootChunk>* list) {
-  G1CodeRootSet::_num_chunks_handed_out -= list->count();
+size_t G1CodeRootChunkManager::fl_mem_size() {
+  return _free_list.count() * _free_list.size();
+}
+
+void G1CodeRootChunkManager::free_all_chunks(FreeList<G1CodeRootChunk>* list) {
+  _num_chunks_handed_out -= list->count();
   _free_list.prepend(list);
 }
 
-void G1CodeRootSet::purge_chunks(size_t keep_ratio) {
-  size_t keep = G1CodeRootSet::_num_chunks_handed_out * keep_ratio / 100;
+void G1CodeRootChunkManager::free_chunk(G1CodeRootChunk* chunk) {
+  _free_list.return_chunk_at_head(chunk);
+  _num_chunks_handed_out--;
+}
 
+void G1CodeRootChunkManager::purge_chunks(size_t keep_ratio) {
+  size_t keep = _num_chunks_handed_out * keep_ratio / 100;
   if (keep >= (size_t)_free_list.count()) {
     return;
   }
@@ -90,20 +114,51 @@
   }
 }
 
-size_t G1CodeRootSet::static_mem_size() {
-  return sizeof(_free_list) + sizeof(_num_chunks_handed_out);
+size_t G1CodeRootChunkManager::static_mem_size() {
+  return sizeof(G1CodeRootChunkManager);
+}
+
+
+G1CodeRootChunk* G1CodeRootChunkManager::new_chunk() {
+  G1CodeRootChunk* result = _free_list.get_chunk_at_head();
+  if (result == NULL) {
+    result = new G1CodeRootChunk();
+  }
+  _num_chunks_handed_out++;
+  result->reset();
+  return result;
+}
+
+#ifndef PRODUCT
+
+size_t G1CodeRootChunkManager::num_chunks_handed_out() const {
+  return _num_chunks_handed_out;
 }
 
-size_t G1CodeRootSet::fl_mem_size() {
-  return _free_list.count() * _free_list.size();
+size_t G1CodeRootChunkManager::num_free_chunks() const {
+  return (size_t)_free_list.count();
+}
+
+#endif
+
+G1CodeRootChunkManager G1CodeRootSet::_default_chunk_manager;
+
+void G1CodeRootSet::purge_chunks(size_t keep_ratio) {
+  _default_chunk_manager.purge_chunks(keep_ratio);
 }
 
-void G1CodeRootSet::initialize() {
-  _free_list.initialize();
-  _free_list.set_size(G1CodeRootChunk::word_size());
+size_t G1CodeRootSet::free_chunks_static_mem_size() {
+  return _default_chunk_manager.static_mem_size();
 }
 
-G1CodeRootSet::G1CodeRootSet() : _list(), _length(0) {
+size_t G1CodeRootSet::free_chunks_mem_size() {
+  return _default_chunk_manager.fl_mem_size();
+}
+
+G1CodeRootSet::G1CodeRootSet(G1CodeRootChunkManager* manager) : _manager(manager), _list(), _length(0) {
+  if (_manager == NULL) {
+    _manager = &_default_chunk_manager;
+  }
   _list.initialize();
   _list.set_size(G1CodeRootChunk::word_size());
 }
@@ -114,34 +169,43 @@
 
 void G1CodeRootSet::add(nmethod* method) {
   if (!contains(method)) {
-    // Try to add the nmethod. If there is not enough space, get a new chunk.
-    if (_list.head() == NULL || _list.head()->is_full()) {
-      G1CodeRootChunk* cur = new_chunk();
+    // Find the first chunk that isn't full.
+    G1CodeRootChunk* cur = _list.head();
+    while (cur != NULL) {
+      if (!cur->is_full()) {
+        break;
+      }
+      cur = cur->next();
+    }
+
+    // All chunks are full, get a new chunk.
+    if (cur == NULL) {
+      cur = new_chunk();
       _list.return_chunk_at_head(cur);
     }
-    bool result = _list.head()->add(method);
+
+    // Add the nmethod.
+    bool result = cur->add(method);
+
     guarantee(result, err_msg("Not able to add nmethod "PTR_FORMAT" to newly allocated chunk.", method));
+
     _length++;
   }
 }
 
-void G1CodeRootSet::remove(nmethod* method) {
+void G1CodeRootSet::remove_lock_free(nmethod* method) {
   G1CodeRootChunk* found = find(method);
   if (found != NULL) {
-    bool result = found->remove(method);
-    guarantee(result, err_msg("could not find nmethod "PTR_FORMAT" during removal although we previously found it", method));
-    // eventually free completely emptied chunk
-    if (found->is_empty()) {
-      _list.remove_chunk(found);
-      free(found);
+    bool result = found->remove_lock_free(method);
+    if (result) {
+      Atomic::dec_ptr((volatile intptr_t*)&_length);
     }
-    _length--;
   }
   assert(!contains(method), err_msg(PTR_FORMAT" still contains nmethod "PTR_FORMAT, this, method));
 }
 
 nmethod* G1CodeRootSet::pop() {
-  do {
+  while (true) {
     G1CodeRootChunk* cur = _list.head();
     if (cur == NULL) {
       assert(_length == 0, "when there are no chunks, there should be no elements");
@@ -154,7 +218,7 @@
     } else {
       free(_list.get_chunk_at_head());
     }
-  } while (true);
+  }
 }
 
 G1CodeRootChunk* G1CodeRootSet::find(nmethod* method) {
@@ -189,28 +253,38 @@
   }
 }
 
+size_t G1CodeRootSet::static_mem_size() {
+  return sizeof(G1CodeRootSet);
+}
+
 size_t G1CodeRootSet::mem_size() {
-  return sizeof(this) + _list.count() * _list.size();
+  return G1CodeRootSet::static_mem_size() + _list.count() * _list.size();
 }
 
 #ifndef PRODUCT
 
 void G1CodeRootSet::test() {
-  initialize();
+  G1CodeRootChunkManager mgr;
 
-  assert(_free_list.count() == 0, "Free List must be empty");
-  assert(_num_chunks_handed_out == 0, "No elements must have been handed out yet");
+  assert(mgr.num_chunks_handed_out() == 0, "Must not have handed out chunks yet");
+
+  assert(G1CodeRootChunkManager::static_mem_size() > sizeof(void*),
+         err_msg("The chunk manager's static memory usage seems too small, is only "SIZE_FORMAT" bytes.", G1CodeRootChunkManager::static_mem_size()));
 
   // The number of chunks that we allocate for purge testing.
   size_t const num_chunks = 10;
+
   {
-    G1CodeRootSet set1;
+    G1CodeRootSet set1(&mgr);
     assert(set1.is_empty(), "Code root set must be initially empty but is not.");
 
+    assert(G1CodeRootSet::static_mem_size() > sizeof(void*),
+           err_msg("The code root set's static memory usage seems too small, is only "SIZE_FORMAT" bytes", G1CodeRootSet::static_mem_size()));
+
     set1.add((nmethod*)1);
-    assert(_num_chunks_handed_out == 1,
+    assert(mgr.num_chunks_handed_out() == 1,
            err_msg("Must have allocated and handed out one chunk, but handed out "
-                   SIZE_FORMAT" chunks", _num_chunks_handed_out));
+                   SIZE_FORMAT" chunks", mgr.num_chunks_handed_out()));
     assert(set1.length() == 1, err_msg("Added exactly one element, but set contains "
                                        SIZE_FORMAT" elements", set1.length()));
 
@@ -219,19 +293,19 @@
     for (uint i = 0; i < G1CodeRootChunk::word_size() + 1; i++) {
       set1.add((nmethod*)1);
     }
-    assert(_num_chunks_handed_out == 1,
+    assert(mgr.num_chunks_handed_out() == 1,
            err_msg("Duplicate detection must have prevented allocation of further "
-                   "chunks but contains "SIZE_FORMAT, _num_chunks_handed_out));
+                   "chunks but allocated "SIZE_FORMAT, mgr.num_chunks_handed_out()));
     assert(set1.length() == 1,
            err_msg("Duplicate detection should not have increased the set size but "
                    "is "SIZE_FORMAT, set1.length()));
 
     size_t num_total_after_add = G1CodeRootChunk::word_size() + 1;
     for (size_t i = 0; i < num_total_after_add - 1; i++) {
-      set1.add((nmethod*)(2 + i));
+      set1.add((nmethod*)(uintptr_t)(2 + i));
     }
-    assert(_num_chunks_handed_out > 1,
-           "After adding more code roots, more than one chunks should have been handed out");
+    assert(mgr.num_chunks_handed_out() > 1,
+           "After adding more code roots, more than one additional chunk should have been handed out");
     assert(set1.length() == num_total_after_add,
            err_msg("After adding in total "SIZE_FORMAT" distinct code roots, they "
                    "need to be in the set, but there are only "SIZE_FORMAT,
@@ -244,27 +318,27 @@
     assert(num_popped == num_total_after_add,
            err_msg("Managed to pop "SIZE_FORMAT" code roots, but only "SIZE_FORMAT" "
                    "were added", num_popped, num_total_after_add));
-    assert(_num_chunks_handed_out == 0,
+    assert(mgr.num_chunks_handed_out() == 0,
            err_msg("After popping all elements, all chunks must have been returned "
-                   "but are still "SIZE_FORMAT, _num_chunks_handed_out));
+                   "but there are still "SIZE_FORMAT" additional", mgr.num_chunks_handed_out()));
 
-    purge_chunks(0);
-    assert(_free_list.count() == 0,
+    mgr.purge_chunks(0);
+    assert(mgr.num_free_chunks() == 0,
            err_msg("After purging everything, the free list must be empty but still "
-                   "contains "SIZE_FORMAT" chunks", _free_list.count()));
+                   "contains "SIZE_FORMAT" chunks", mgr.num_free_chunks()));
 
     // Add some more handed out chunks.
     size_t i = 0;
-    while (_num_chunks_handed_out < num_chunks) {
+    while (mgr.num_chunks_handed_out() < num_chunks) {
       set1.add((nmethod*)i);
       i++;
     }
 
     {
       // Generate chunks on the free list.
-      G1CodeRootSet set2;
+      G1CodeRootSet set2(&mgr);
       size_t i = 0;
-      while (_num_chunks_handed_out < num_chunks * 2) {
+      while (mgr.num_chunks_handed_out() < (num_chunks * 2)) {
         set2.add((nmethod*)i);
         i++;
       }
@@ -272,45 +346,45 @@
       // num_chunks elements on the free list.
     }
 
-    assert(_num_chunks_handed_out == num_chunks,
+    assert(mgr.num_chunks_handed_out() == num_chunks,
            err_msg("Deletion of the second set must have resulted in giving back "
-                   "those, but there is still "SIZE_FORMAT" handed out, expecting "
-                   SIZE_FORMAT, _num_chunks_handed_out, num_chunks));
-    assert((size_t)_free_list.count() == num_chunks,
+                   "those, but there are still "SIZE_FORMAT" additional handed out, expecting "
+                   SIZE_FORMAT, mgr.num_chunks_handed_out(), num_chunks));
+    assert(mgr.num_free_chunks() == num_chunks,
            err_msg("After freeing "SIZE_FORMAT" chunks, they must be on the free list "
-                   "but there are only "SIZE_FORMAT, num_chunks, _free_list.count()));
+                   "but there are only "SIZE_FORMAT, num_chunks, mgr.num_free_chunks()));
 
     size_t const test_percentage = 50;
-    purge_chunks(test_percentage);
-    assert(_num_chunks_handed_out == num_chunks,
+    mgr.purge_chunks(test_percentage);
+    assert(mgr.num_chunks_handed_out() == num_chunks,
            err_msg("Purging must not hand out chunks but there are "SIZE_FORMAT,
-                   _num_chunks_handed_out));
-    assert((size_t)_free_list.count() == (ssize_t)(num_chunks * test_percentage / 100),
+                   mgr.num_chunks_handed_out()));
+    assert(mgr.num_free_chunks() == (size_t)(mgr.num_chunks_handed_out() * test_percentage / 100),
            err_msg("Must have purged "SIZE_FORMAT" percent of "SIZE_FORMAT" chunks"
-                   "but there are "SSIZE_FORMAT, test_percentage, num_chunks,
-                   _free_list.count()));
+                   "but there are "SIZE_FORMAT, test_percentage, num_chunks,
+                   mgr.num_free_chunks()));
     // Purge the remainder of the chunks on the free list.
-    purge_chunks(0);
-    assert(_free_list.count() == 0, "Free List must be empty");
-    assert(_num_chunks_handed_out == num_chunks,
+    mgr.purge_chunks(0);
+    assert(mgr.num_free_chunks() == 0, "Free List must be empty");
+    assert(mgr.num_chunks_handed_out() == num_chunks,
            err_msg("Expected to be "SIZE_FORMAT" chunks handed out from the first set "
-                   "but there are "SIZE_FORMAT, num_chunks, _num_chunks_handed_out));
+                   "but there are "SIZE_FORMAT, num_chunks, mgr.num_chunks_handed_out()));
 
     // Exit of the scope of the set1 object will call the destructor that generates
     // num_chunks additional elements on the free list.
-  }
+   }
 
-  assert(_num_chunks_handed_out == 0,
+  assert(mgr.num_chunks_handed_out() == 0,
          err_msg("Deletion of the only set must have resulted in no chunks handed "
-                 "out, but there is still "SIZE_FORMAT" handed out", _num_chunks_handed_out));
-  assert((size_t)_free_list.count() == num_chunks,
+                 "out, but there is still "SIZE_FORMAT" handed out", mgr.num_chunks_handed_out()));
+  assert(mgr.num_free_chunks() == num_chunks,
          err_msg("After freeing "SIZE_FORMAT" chunks, they must be on the free list "
-                 "but there are only "SSIZE_FORMAT, num_chunks, _free_list.count()));
+                 "but there are only "SIZE_FORMAT, num_chunks, mgr.num_free_chunks()));
 
   // Restore initial state.
-  purge_chunks(0);
-  assert(_free_list.count() == 0, "Free List must be empty");
-  assert(_num_chunks_handed_out == 0, "No elements must have been handed out yet");
+  mgr.purge_chunks(0);
+  assert(mgr.num_free_chunks() == 0, "Free List must be empty");
+  assert(mgr.num_chunks_handed_out() == 0, "No additional elements must have been handed out yet");
 }
 
 void TestCodeCacheRemSet_test() {
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.hpp
--- a/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -31,6 +31,14 @@
 
 class CodeBlobClosure;
 
+// The elements of the G1CodeRootChunk is either:
+//  1) nmethod pointers
+//  2) nodes in an internally chained free list
+typedef union {
+  nmethod* _nmethod;
+  void*    _link;
+} NmethodOrLink;
+
 class G1CodeRootChunk : public CHeapObj<mtGC> {
  private:
   static const int NUM_ENTRIES = 32;
@@ -38,16 +46,28 @@
   G1CodeRootChunk*     _next;
   G1CodeRootChunk*     _prev;
 
-  nmethod** _top;
+  NmethodOrLink*          _top;
+  // First free position within the chunk.
+  volatile NmethodOrLink* _free;
 
-  nmethod* _data[NUM_ENTRIES];
+  NmethodOrLink _data[NUM_ENTRIES];
 
-  nmethod** bottom() const {
-    return (nmethod**) &(_data[0]);
+  NmethodOrLink* bottom() const {
+    return (NmethodOrLink*) &(_data[0]);
   }
 
-  nmethod** end() const {
-    return (nmethod**) &(_data[NUM_ENTRIES]);
+  NmethodOrLink* end() const {
+    return (NmethodOrLink*) &(_data[NUM_ENTRIES]);
+  }
+
+  bool is_link(NmethodOrLink* nmethod_or_link) {
+    return nmethod_or_link->_link == NULL ||
+        (bottom() <= nmethod_or_link->_link
+        && nmethod_or_link->_link < end());
+  }
+
+  bool is_nmethod(NmethodOrLink* nmethod_or_link) {
+    return !is_link(nmethod_or_link);
   }
 
  public:
@@ -85,62 +105,97 @@
   }
 
   bool is_full() const {
-    return _top == (nmethod**)end();
+    return _top == end() && _free == NULL;
   }
 
   bool contains(nmethod* method) {
-    nmethod** cur = bottom();
+    NmethodOrLink* cur = bottom();
     while (cur != _top) {
-      if (*cur == method) return true;
+      if (cur->_nmethod == method) return true;
       cur++;
     }
     return false;
   }
 
   bool add(nmethod* method) {
-    if (is_full()) return false;
-    *_top = method;
-    _top++;
+    if (is_full()) {
+      return false;
+    }
+
+    if (_free != NULL) {
+      // Take from internally chained free list
+      NmethodOrLink* first_free = (NmethodOrLink*)_free;
+      _free = (NmethodOrLink*)_free->_link;
+      first_free->_nmethod = method;
+    } else {
+      // Take from top.
+      _top->_nmethod = method;
+      _top++;
+    }
+
     return true;
   }
 
-  bool remove(nmethod* method) {
-    nmethod** cur = bottom();
-    while (cur != _top) {
-      if (*cur == method) {
-        memmove(cur, cur + 1, (_top - (cur + 1)) * sizeof(nmethod**));
-        _top--;
-        return true;
-      }
-      cur++;
-    }
-    return false;
-  }
+  bool remove_lock_free(nmethod* method);
 
   void nmethods_do(CodeBlobClosure* blk);
 
   nmethod* pop() {
-    if (is_empty()) {
-      return NULL;
+    if (_free != NULL) {
+      // Kill the free list.
+      _free = NULL;
+    }
+
+    while (!is_empty()) {
+      _top--;
+      if (is_nmethod(_top)) {
+        return _top->_nmethod;
+      }
     }
-    _top--;
-    return *_top;
+
+    return NULL;
   }
 };
 
+// Manages free chunks.
+class G1CodeRootChunkManager VALUE_OBJ_CLASS_SPEC {
+ private:
+  // Global free chunk list management
+  FreeList<G1CodeRootChunk> _free_list;
+  // Total number of chunks handed out
+  size_t _num_chunks_handed_out;
+
+ public:
+  G1CodeRootChunkManager();
+
+  G1CodeRootChunk* new_chunk();
+  void free_chunk(G1CodeRootChunk* chunk);
+  // Free all elements of the given list.
+  void free_all_chunks(FreeList<G1CodeRootChunk>* list);
+
+  void initialize();
+  void purge_chunks(size_t keep_ratio);
+
+  static size_t static_mem_size();
+  size_t fl_mem_size();
+
+#ifndef PRODUCT
+  size_t num_chunks_handed_out() const;
+  size_t num_free_chunks() const;
+#endif
+};
+
 // Implements storage for a set of code roots.
 // All methods that modify the set are not thread-safe except if otherwise noted.
 class G1CodeRootSet VALUE_OBJ_CLASS_SPEC {
  private:
-  // Global free chunk list management
-  static FreeList<G1CodeRootChunk> _free_list;
-  // Total number of chunks handed out
-  static size_t _num_chunks_handed_out;
+  // Global default free chunk manager instance.
+  static G1CodeRootChunkManager _default_chunk_manager;
 
-  static G1CodeRootChunk* new_chunk();
-  static void free_chunk(G1CodeRootChunk* chunk);
+  G1CodeRootChunk* new_chunk() { return _manager->new_chunk(); }
+  void free_chunk(G1CodeRootChunk* chunk) { _manager->free_chunk(chunk); }
   // Free all elements of the given list.
-  static void free_all_chunks(FreeList<G1CodeRootChunk>* list);
+  void free_all_chunks(FreeList<G1CodeRootChunk>* list) { _manager->free_all_chunks(list); }
 
   // Return the chunk that contains the given nmethod, NULL otherwise.
   // Scans the list of chunks backwards, as this method is used to add new
@@ -150,22 +205,24 @@
 
   size_t _length;
   FreeList<G1CodeRootChunk> _list;
+  G1CodeRootChunkManager* _manager;
 
  public:
-  G1CodeRootSet();
+  // If an instance is initialized with a chunk manager of NULL, use the global
+  // default one.
+  G1CodeRootSet(G1CodeRootChunkManager* manager = NULL);
   ~G1CodeRootSet();
 
-  static void initialize();
   static void purge_chunks(size_t keep_ratio);
 
-  static size_t static_mem_size();
-  static size_t fl_mem_size();
+  static size_t free_chunks_static_mem_size();
+  static size_t free_chunks_mem_size();
 
   // Search for the code blob from the recently allocated ones to find duplicates more quickly, as this
   // method is likely to be repeatedly called with the same nmethod.
   void add(nmethod* method);
 
-  void remove(nmethod* method);
+  void remove_lock_free(nmethod* method);
   nmethod* pop();
 
   bool contains(nmethod* method);
@@ -179,6 +236,8 @@
   // Length in elements
   size_t length() const { return _length; }
 
+  // Static data memory size in bytes of this set.
+  static size_t static_mem_size();
   // Memory size in bytes taken by this set.
   size_t mem_size();
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -42,6 +42,7 @@
 #include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/g1MarkSweep.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
+#include "gc_implementation/g1/g1ParScanThreadState.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.inline.hpp"
 #include "gc_implementation/g1/g1StringDedup.hpp"
 #include "gc_implementation/g1/g1YCTypes.hpp"
@@ -54,14 +55,15 @@
 #include "gc_implementation/shared/gcTrace.hpp"
 #include "gc_implementation/shared/gcTraceTime.hpp"
 #include "gc_implementation/shared/isGCActiveMark.hpp"
+#include "memory/allocation.hpp"
 #include "memory/gcLocker.inline.hpp"
 #include "memory/generationSpec.hpp"
 #include "memory/iterator.hpp"
 #include "memory/referenceProcessor.hpp"
 #include "oops/oop.inline.hpp"
 #include "oops/oop.pcgc.inline.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/vmThread.hpp"
-#include "utilities/ticks.hpp"
 
 size_t G1CollectedHeap::_humongous_object_threshold_in_words = 0;
 
@@ -86,66 +88,64 @@
 // G1ParVerifyTask uses heap_region_par_iterate_chunked() for parallelism.
 // The number of GC workers is passed to heap_region_par_iterate_chunked().
 // It does use run_task() which sets _n_workers in the task.
-// G1ParTask executes g1_process_strong_roots() ->
-// SharedHeap::process_strong_roots() which calls eventually to
+// G1ParTask executes g1_process_roots() ->
+// SharedHeap::process_roots() which calls eventually to
 // CardTableModRefBS::par_non_clean_card_iterate_work() which uses
-// SequentialSubTasksDone.  SharedHeap::process_strong_roots() also
+// SequentialSubTasksDone.  SharedHeap::process_roots() also
 // directly uses SubTasksDone (_process_strong_tasks field in SharedHeap).
 //
 
 // Local to this file.
 
 class RefineCardTableEntryClosure: public CardTableEntryClosure {
-  SuspendibleThreadSet* _sts;
-  G1RemSet* _g1rs;
-  ConcurrentG1Refine* _cg1r;
   bool _concurrent;
 public:
-  RefineCardTableEntryClosure(SuspendibleThreadSet* sts,
-                              G1RemSet* g1rs,
-                              ConcurrentG1Refine* cg1r) :
-    _sts(sts), _g1rs(g1rs), _cg1r(cg1r), _concurrent(true)
-  {}
+  RefineCardTableEntryClosure() : _concurrent(true) { }
+
   bool do_card_ptr(jbyte* card_ptr, uint worker_i) {
-    bool oops_into_cset = _g1rs->refine_card(card_ptr, worker_i, false);
+    bool oops_into_cset = G1CollectedHeap::heap()->g1_rem_set()->refine_card(card_ptr, worker_i, false);
     // This path is executed by the concurrent refine or mutator threads,
     // concurrently, and so we do not care if card_ptr contains references
     // that point into the collection set.
     assert(!oops_into_cset, "should be");
 
-    if (_concurrent && _sts->should_yield()) {
+    if (_concurrent && SuspendibleThreadSet::should_yield()) {
       // Caller will actually yield.
       return false;
     }
     // Otherwise, we finished successfully; return true.
     return true;
   }
+
   void set_concurrent(bool b) { _concurrent = b; }
 };
 
 
 class ClearLoggedCardTableEntryClosure: public CardTableEntryClosure {
-  int _calls;
-  G1CollectedHeap* _g1h;
+  size_t _num_processed;
   CardTableModRefBS* _ctbs;
   int _histo[256];
-public:
+
+ public:
   ClearLoggedCardTableEntryClosure() :
-    _calls(0), _g1h(G1CollectedHeap::heap()), _ctbs(_g1h->g1_barrier_set())
+    _num_processed(0), _ctbs(G1CollectedHeap::heap()->g1_barrier_set())
   {
     for (int i = 0; i < 256; i++) _histo[i] = 0;
   }
+
   bool do_card_ptr(jbyte* card_ptr, uint worker_i) {
-    if (_g1h->is_in_reserved(_ctbs->addr_for(card_ptr))) {
-      _calls++;
-      unsigned char* ujb = (unsigned char*)card_ptr;
-      int ind = (int)(*ujb);
-      _histo[ind]++;
-      *card_ptr = -1;
-    }
+    unsigned char* ujb = (unsigned char*)card_ptr;
+    int ind = (int)(*ujb);
+    _histo[ind]++;
+
+    *card_ptr = (jbyte)CardTableModRefBS::clean_card_val();
+    _num_processed++;
+
     return true;
   }
-  int calls() { return _calls; }
+
+  size_t num_processed() { return _num_processed; }
+
   void print_histo() {
     gclog_or_tty->print_cr("Card table value histogram:");
     for (int i = 0; i < 256; i++) {
@@ -156,22 +156,20 @@
   }
 };
 
-class RedirtyLoggedCardTableEntryClosure: public CardTableEntryClosure {
-  int _calls;
-  G1CollectedHeap* _g1h;
-  CardTableModRefBS* _ctbs;
-public:
-  RedirtyLoggedCardTableEntryClosure() :
-    _calls(0), _g1h(G1CollectedHeap::heap()), _ctbs(_g1h->g1_barrier_set()) {}
+class RedirtyLoggedCardTableEntryClosure : public CardTableEntryClosure {
+ private:
+  size_t _num_processed;
+
+ public:
+  RedirtyLoggedCardTableEntryClosure() : CardTableEntryClosure(), _num_processed(0) { }
 
   bool do_card_ptr(jbyte* card_ptr, uint worker_i) {
-    if (_g1h->is_in_reserved(_ctbs->addr_for(card_ptr))) {
-      _calls++;
-      *card_ptr = 0;
-    }
+    *card_ptr = CardTableModRefBS::dirty_card_val();
+    _num_processed++;
     return true;
   }
-  int calls() { return _calls; }
+
+  size_t num_processed() const { return _num_processed; }
 };
 
 YoungList::YoungList(G1CollectedHeap* g1h) :
@@ -475,9 +473,8 @@
 
   // First clear the logged cards.
   ClearLoggedCardTableEntryClosure clear;
-  dcqs.set_closure(&clear);
-  dcqs.apply_closure_to_all_completed_buffers();
-  dcqs.iterate_closure_all_threads(false);
+  dcqs.apply_closure_to_all_completed_buffers(&clear);
+  dcqs.iterate_closure_all_threads(&clear, false);
   clear.print_histo();
 
   // Now ensure that there's no dirty cards.
@@ -490,13 +487,13 @@
   guarantee(count2.n() == 0, "Card table should be clean.");
 
   RedirtyLoggedCardTableEntryClosure redirty;
-  JavaThread::dirty_card_queue_set().set_closure(&redirty);
-  dcqs.apply_closure_to_all_completed_buffers();
-  dcqs.iterate_closure_all_threads(false);
+  dcqs.apply_closure_to_all_completed_buffers(&redirty);
+  dcqs.iterate_closure_all_threads(&redirty, false);
   gclog_or_tty->print_cr("Log entries = %d, dirty cards = %d.",
-                         clear.calls(), orig_count);
-  guarantee(redirty.calls() == clear.calls(),
-            "Or else mechanism is broken.");
+                         clear.num_processed(), orig_count);
+  guarantee(redirty.num_processed() == clear.num_processed(),
+            err_msg("Redirtied "SIZE_FORMAT" cards, bug cleared "SIZE_FORMAT,
+                    redirty.num_processed(), clear.num_processed()));
 
   CountNonCleanMemRegionClosure count3(this);
   ct_bs->mod_card_iterate(&count3);
@@ -505,8 +502,6 @@
                            orig_count, count3.n());
     guarantee(count3.n() >= orig_count, "Should have restored them all.");
   }
-
-  JavaThread::dirty_card_queue_set().set_closure(_refine_cte_cl);
 }
 
 // Private class members.
@@ -776,6 +771,7 @@
   // match new_top.
   assert(hr == NULL ||
          (hr->end() == new_end && hr->top() == new_top), "sanity");
+  check_bitmaps("Humongous Region Allocation", first_hr);
 
   assert(first_hr->used() == word_size * HeapWordSize, "invariant");
   _summary_bytes_used += first_hr->used();
@@ -1305,7 +1301,7 @@
     TraceCPUTime tcpu(G1Log::finer(), true, gclog_or_tty);
 
     {
-      GCTraceTime t(GCCauseString("Full GC", gc_cause()), G1Log::fine(), true, NULL);
+      GCTraceTime t(GCCauseString("Full GC", gc_cause()), G1Log::fine(), true, NULL, gc_tracer->gc_id());
       TraceCollectorStats tcs(g1mm()->full_collection_counters());
       TraceMemoryManagerStats tms(true /* fullGC */, gc_cause());
 
@@ -1334,6 +1330,7 @@
 
       verify_before_gc();
 
+      check_bitmaps("Full GC Start");
       pre_full_gc_dump(gc_timer);
 
       COMPILER2_PRESENT(DerivedPointerTable::clear());
@@ -1507,13 +1504,22 @@
 
       verify_after_gc();
 
+      // Clear the previous marking bitmap, if needed for bitmap verification.
+      // Note we cannot do this when we clear the next marking bitmap in
+      // ConcurrentMark::abort() above since VerifyDuringGC verifies the
+      // objects marked during a full GC against the previous bitmap.
+      // But we need to clear it before calling check_bitmaps below since
+      // the full GC has compacted objects and updated TAMS but not updated
+      // the prev bitmap.
+      if (G1VerifyBitmaps) {
+        ((CMBitMap*) concurrent_mark()->prevMarkBitMap())->clearAll();
+      }
+      check_bitmaps("Full GC End");
+
       // Start a new incremental collection set for the next pause
       assert(g1_policy()->collection_set() == NULL, "must be");
       g1_policy()->start_incremental_cset_building();
 
-      // Clear the _cset_fast_test bitmap in anticipation of adding
-      // regions to the incremental collection set for the next
-      // evacuation pause.
       clear_cset_fast_test();
 
       init_mutator_alloc_region();
@@ -1922,6 +1928,8 @@
   _secondary_free_list("Secondary Free List", new SecondaryFreeRegionListMtSafeChecker()),
   _old_set("Old Set", false /* humongous */, new OldRegionSetMtSafeChecker()),
   _humongous_set("Master Humongous Set", true /* humongous */, new HumongousRegionSetMtSafeChecker()),
+  _humongous_is_live(),
+  _has_humongous_reclaim_candidates(false),
   _free_regions_coming(false),
   _young_list(new YoungList(this)),
   _gc_time_stamp(0),
@@ -1933,8 +1941,7 @@
   _old_marking_cycles_started(0),
   _old_marking_cycles_completed(0),
   _concurrent_cycle_started(false),
-  _in_cset_fast_test(NULL),
-  _in_cset_fast_test_base(NULL),
+  _in_cset_fast_test(),
   _dirty_cards_region_list(NULL),
   _worker_cset_start_region(NULL),
   _worker_cset_start_region_time_stamp(NULL),
@@ -2004,7 +2011,9 @@
   Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap");
   Universe::check_alignment(max_byte_size, heap_alignment, "g1 heap");
 
-  _cg1r = new ConcurrentG1Refine(this);
+  _refine_cte_cl = new RefineCardTableEntryClosure();
+
+  _cg1r = new ConcurrentG1Refine(this, _refine_cte_cl);
 
   // Reserve the maximum.
 
@@ -2076,20 +2085,8 @@
 
   _g1h = this;
 
-  _in_cset_fast_test_length = max_regions();
-  _in_cset_fast_test_base =
-                   NEW_C_HEAP_ARRAY(bool, (size_t) _in_cset_fast_test_length, mtGC);
-
-  // We're biasing _in_cset_fast_test to avoid subtracting the
-  // beginning of the heap every time we want to index; basically
-  // it's the same with what we do with the card table.
-  _in_cset_fast_test = _in_cset_fast_test_base -
-               ((uintx) _g1_reserved.start() >> HeapRegion::LogOfHRGrainBytes);
-
-  // Clear the _cset_fast_test bitmap in anticipation of adding
-  // regions to the incremental collection set for the first
-  // evacuation pause.
-  clear_cset_fast_test();
+  _in_cset_fast_test.initialize(_g1_reserved.start(), _g1_reserved.end(), HeapRegion::GrainBytes);
+  _humongous_is_live.initialize(_g1_reserved.start(), _g1_reserved.end(), HeapRegion::GrainBytes);
 
   // Create the ConcurrentMark data structure and thread.
   // (Must do this late, so that "max_regions" is defined.)
@@ -2112,25 +2109,21 @@
   // Perform any initialization actions delegated to the policy.
   g1_policy()->init();
 
-  _refine_cte_cl =
-    new RefineCardTableEntryClosure(ConcurrentG1RefineThread::sts(),
-                                    g1_rem_set(),
-                                    concurrent_g1_refine());
-  JavaThread::dirty_card_queue_set().set_closure(_refine_cte_cl);
-
   JavaThread::satb_mark_queue_set().initialize(SATB_Q_CBL_mon,
                                                SATB_Q_FL_lock,
                                                G1SATBProcessCompletedThreshold,
                                                Shared_SATB_Q_lock);
 
-  JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
+  JavaThread::dirty_card_queue_set().initialize(_refine_cte_cl,
+                                                DirtyCardQ_CBL_mon,
                                                 DirtyCardQ_FL_lock,
                                                 concurrent_g1_refine()->yellow_zone(),
                                                 concurrent_g1_refine()->red_zone(),
                                                 Shared_DirtyCardQ_lock);
 
   if (G1DeferredRSUpdate) {
-    dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
+    dirty_card_queue_set().initialize(NULL, // Should never be called by the Java code
+                                      DirtyCardQ_CBL_mon,
                                       DirtyCardQ_FL_lock,
                                       -1, // never trigger processing
                                       -1, // no limit on length
@@ -2140,7 +2133,8 @@
 
   // Initialize the card queue set used to hold cards containing
   // references into the collection set.
-  _into_cset_dirty_card_queue_set.initialize(DirtyCardQ_CBL_mon,
+  _into_cset_dirty_card_queue_set.initialize(NULL, // Should never be called by the Java code
+                                             DirtyCardQ_CBL_mon,
                                              DirtyCardQ_FL_lock,
                                              -1, // never trigger processing
                                              -1, // no limit on length
@@ -2188,6 +2182,11 @@
   }
 }
 
+void G1CollectedHeap::clear_humongous_is_live_table() {
+  guarantee(G1ReclaimDeadHumongousObjectsAtYoungGC, "Should only be called if true");
+  _humongous_is_live.clear();
+}
+
 size_t G1CollectedHeap::conservative_max_heap_alignment() {
   return HeapRegion::max_region_size();
 }
@@ -2604,15 +2603,12 @@
 
 // Iteration functions.
 
-// Iterates an OopClosure over all ref-containing fields of objects
-// within a HeapRegion.
+// Applies an ExtendedOopClosure onto all references of objects within a HeapRegion.
 
 class IterateOopClosureRegionClosure: public HeapRegionClosure {
-  MemRegion _mr;
   ExtendedOopClosure* _cl;
 public:
-  IterateOopClosureRegionClosure(MemRegion mr, ExtendedOopClosure* cl)
-    : _mr(mr), _cl(cl) {}
+  IterateOopClosureRegionClosure(ExtendedOopClosure* cl) : _cl(cl) {}
   bool doHeapRegion(HeapRegion* r) {
     if (!r->continuesHumongous()) {
       r->oop_iterate(_cl);
@@ -2622,12 +2618,7 @@
 };
 
 void G1CollectedHeap::oop_iterate(ExtendedOopClosure* cl) {
-  IterateOopClosureRegionClosure blk(_g1_committed, cl);
-  heap_region_iterate(&blk);
-}
-
-void G1CollectedHeap::oop_iterate(MemRegion mr, ExtendedOopClosure* cl) {
-  IterateOopClosureRegionClosure blk(mr, cl);
+  IterateOopClosureRegionClosure blk(cl);
   heap_region_iterate(&blk);
 }
 
@@ -2980,10 +2971,17 @@
   }
 }
 
-CompactibleSpace* G1CollectedHeap::first_compactible_space() {
-  return n_regions() > 0 ? region_at(0) : NULL;
-}
-
+HeapRegion* G1CollectedHeap::next_compaction_region(const HeapRegion* from) const {
+  // We're not using an iterator given that it will wrap around when
+  // it reaches the last region and this is not what we want here.
+  for (uint index = from->hrs_index() + 1; index < n_regions(); index++) {
+    HeapRegion* hr = region_at(index);
+    if (!hr->isHumongous()) {
+      return hr;
+    }
+  }
+  return NULL;
+}
 
 Space* G1CollectedHeap::space_containing(const void* addr) const {
   Space* res = heap_region_containing(addr);
@@ -3414,25 +3412,20 @@
 
     if (!silent) { gclog_or_tty->print("Roots "); }
     VerifyRootsClosure rootsCl(vo);
-    G1VerifyCodeRootOopClosure codeRootsCl(this, &rootsCl, vo);
-    G1VerifyCodeRootBlobClosure blobsCl(&codeRootsCl);
     VerifyKlassClosure klassCl(this, &rootsCl);
+    CLDToKlassAndOopClosure cldCl(&klassCl, &rootsCl, false);
 
     // We apply the relevant closures to all the oops in the
-    // system dictionary, the string table and the code cache.
-    const int so = SO_AllClasses | SO_Strings | SO_CodeCache;
-
-    // Need cleared claim bits for the strong roots processing
-    ClassLoaderDataGraph::clear_claimed_marks();
-
-    process_strong_roots(true,      // activate StrongRootsScope
-                         false,     // we set "is scavenging" to false,
-                                    // so we don't reset the dirty cards.
-                         ScanningOption(so),  // roots scanning options
-                         &rootsCl,
-                         &blobsCl,
-                         &klassCl
-                         );
+    // system dictionary, class loader data graph, the string table
+    // and the nmethods in the code cache.
+    G1VerifyCodeRootOopClosure codeRootsCl(this, &rootsCl, vo);
+    G1VerifyCodeRootBlobClosure blobsCl(&codeRootsCl);
+
+    process_all_roots(true,            // activate StrongRootsScope
+                      SO_AllCodeCache, // roots scanning options
+                      &rootsCl,
+                      &cldCl,
+                      &blobsCl);
 
     bool failures = rootsCl.failures() || codeRootsCl.failures();
 
@@ -3804,6 +3797,61 @@
   return g1_rem_set()->cardsScanned();
 }
 
+bool G1CollectedHeap::humongous_region_is_always_live(uint index) {
+  HeapRegion* region = region_at(index);
+  assert(region->startsHumongous(), "Must start a humongous object");
+  return oop(region->bottom())->is_objArray() || !region->rem_set()->is_empty();
+}
+
+class RegisterHumongousWithInCSetFastTestClosure : public HeapRegionClosure {
+ private:
+  size_t _total_humongous;
+  size_t _candidate_humongous;
+ public:
+  RegisterHumongousWithInCSetFastTestClosure() : _total_humongous(0), _candidate_humongous(0) {
+  }
+
+  virtual bool doHeapRegion(HeapRegion* r) {
+    if (!r->startsHumongous()) {
+      return false;
+    }
+    G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+    uint region_idx = r->hrs_index();
+    bool is_candidate = !g1h->humongous_region_is_always_live(region_idx);
+    // Is_candidate already filters out humongous regions with some remembered set.
+    // This will not lead to humongous object that we mistakenly keep alive because
+    // during young collection the remembered sets will only be added to.
+    if (is_candidate) {
+      g1h->register_humongous_region_with_in_cset_fast_test(region_idx);
+      _candidate_humongous++;
+    }
+    _total_humongous++;
+
+    return false;
+  }
+
+  size_t total_humongous() const { return _total_humongous; }
+  size_t candidate_humongous() const { return _candidate_humongous; }
+};
+
+void G1CollectedHeap::register_humongous_regions_with_in_cset_fast_test() {
+  if (!G1ReclaimDeadHumongousObjectsAtYoungGC) {
+    g1_policy()->phase_times()->record_fast_reclaim_humongous_stats(0, 0);
+    return;
+  }
+
+  RegisterHumongousWithInCSetFastTestClosure cl;
+  heap_region_iterate(&cl);
+  g1_policy()->phase_times()->record_fast_reclaim_humongous_stats(cl.total_humongous(),
+                                                                  cl.candidate_humongous());
+  _has_humongous_reclaim_candidates = cl.candidate_humongous() > 0;
+
+  if (_has_humongous_reclaim_candidates) {
+    clear_humongous_is_live_table();
+  }
+}
+
 void
 G1CollectedHeap::setup_surviving_young_words() {
   assert(_surviving_young_words == NULL, "pre-condition");
@@ -3893,8 +3941,7 @@
     return;
   }
 
-  gclog_or_tty->date_stamp(PrintGCDateStamps);
-  gclog_or_tty->stamp(PrintGCTimeStamps);
+  gclog_or_tty->gclog_stamp(_gc_tracer_stw->gc_id());
 
   GCCauseString gc_cause_str = GCCauseString("GC pause", gc_cause())
     .append(g1_policy()->gcs_are_young() ? "(young)" : "(mixed)")
@@ -4015,6 +4062,7 @@
       increment_gc_time_stamp();
 
       verify_before_gc();
+      check_bitmaps("GC Start");
 
       COMPILER2_PRESENT(DerivedPointerTable::clear());
 
@@ -4090,6 +4138,8 @@
 
         g1_policy()->finalize_cset(target_pause_time_ms, evacuation_info);
 
+        register_humongous_regions_with_in_cset_fast_test();
+
         _cm->note_start_of_gc();
         // We should not verify the per-thread SATB buffers given that
         // we have not filtered them yet (we'll do so during the
@@ -4140,6 +4190,9 @@
                                  true  /* verify_fingers */);
 
         free_collection_set(g1_policy()->collection_set(), evacuation_info);
+
+        eagerly_reclaim_humongous_regions();
+
         g1_policy()->clear_collection_set();
 
         cleanup_surviving_young_words();
@@ -4147,9 +4200,6 @@
         // Start a new incremental collection set for the next pause.
         g1_policy()->start_incremental_cset_building();
 
-        // Clear the _cset_fast_test bitmap in anticipation of adding
-        // regions to the incremental collection set for the next
-        // evacuation pause.
         clear_cset_fast_test();
 
         _young_list->reset_sampled_info();
@@ -4263,6 +4313,7 @@
         increment_gc_time_stamp();
 
         verify_after_gc();
+        check_bitmaps("GC End");
 
         assert(!ref_processor_stw()->discovery_enabled(), "Postcondition");
         ref_processor_stw()->verify_no_references_recorded();
@@ -4326,7 +4377,7 @@
     // this point does not assume that we are the only GC thread
     // running. Note: of course, the actual marking work will
     // not start until the safepoint itself is released in
-    // ConcurrentGCThread::safepoint_desynchronize().
+    // SuspendibleThreadSet::desynchronize().
     doConcurrentMark();
   }
 
@@ -4366,11 +4417,7 @@
   assert(_mutator_alloc_region.get() == NULL, "post-condition");
 }
 
-void G1CollectedHeap::init_gc_alloc_regions(EvacuationInfo& evacuation_info) {
-  assert_at_safepoint(true /* should_be_vm_thread */);
-
-  _survivor_gc_alloc_region.init();
-  _old_gc_alloc_region.init();
+void G1CollectedHeap::use_retained_old_gc_alloc_region(EvacuationInfo& evacuation_info) {
   HeapRegion* retained_region = _retained_old_gc_alloc_region;
   _retained_old_gc_alloc_region = NULL;
 
@@ -4388,7 +4435,7 @@
       !(retained_region->top() == retained_region->end()) &&
       !retained_region->is_empty() &&
       !retained_region->isHumongous()) {
-    retained_region->set_saved_mark();
+    retained_region->record_top_and_timestamp();
     // The retained region was added to the old region set when it was
     // retired. We have to remove it now, since we don't allow regions
     // we allocate to in the region sets. We'll re-add it later, when
@@ -4402,6 +4449,15 @@
   }
 }
 
+void G1CollectedHeap::init_gc_alloc_regions(EvacuationInfo& evacuation_info) {
+  assert_at_safepoint(true /* should_be_vm_thread */);
+
+  _survivor_gc_alloc_region.init();
+  _old_gc_alloc_region.init();
+
+  use_retained_old_gc_alloc_region(evacuation_info);
+}
+
 void G1CollectedHeap::release_gc_alloc_regions(uint no_of_gc_workers, EvacuationInfo& evacuation_info) {
   evacuation_info.set_allocation_regions(_survivor_gc_alloc_region.count() +
                                          _old_gc_alloc_region.count());
@@ -4593,127 +4649,7 @@
 }
 
 G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size) :
-  ParGCAllocBuffer(gclab_word_size), _retired(false) { }
-
-G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num, ReferenceProcessor* rp)
-  : _g1h(g1h),
-    _refs(g1h->task_queue(queue_num)),
-    _dcq(&g1h->dirty_card_queue_set()),
-    _ct_bs(g1h->g1_barrier_set()),
-    _g1_rem(g1h->g1_rem_set()),
-    _hash_seed(17), _queue_num(queue_num),
-    _term_attempts(0),
-    _surviving_alloc_buffer(g1h->desired_plab_sz(GCAllocForSurvived)),
-    _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured)),
-    _age_table(false), _scanner(g1h, this, rp),
-    _strong_roots_time(0), _term_time(0),
-    _alloc_buffer_waste(0), _undo_waste(0) {
-  // we allocate G1YoungSurvRateNumRegions plus one entries, since
-  // we "sacrifice" entry 0 to keep track of surviving bytes for
-  // non-young regions (where the age is -1)
-  // We also add a few elements at the beginning and at the end in
-  // an attempt to eliminate cache contention
-  uint real_length = 1 + _g1h->g1_policy()->young_cset_region_length();
-  uint array_length = PADDING_ELEM_NUM +
-                      real_length +
-                      PADDING_ELEM_NUM;
-  _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length, mtGC);
-  if (_surviving_young_words_base == NULL)
-    vm_exit_out_of_memory(array_length * sizeof(size_t), OOM_MALLOC_ERROR,
-                          "Not enough space for young surv histo.");
-  _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM;
-  memset(_surviving_young_words, 0, (size_t) real_length * sizeof(size_t));
-
-  _alloc_buffers[GCAllocForSurvived] = &_surviving_alloc_buffer;
-  _alloc_buffers[GCAllocForTenured]  = &_tenured_alloc_buffer;
-
-  _start = os::elapsedTime();
-}
-
-void
-G1ParScanThreadState::print_termination_stats_hdr(outputStream* const st)
-{
-  st->print_raw_cr("GC Termination Stats");
-  st->print_raw_cr("     elapsed  --strong roots-- -------termination-------"
-                   " ------waste (KiB)------");
-  st->print_raw_cr("thr     ms        ms      %        ms      %    attempts"
-                   "  total   alloc    undo");
-  st->print_raw_cr("--- --------- --------- ------ --------- ------ --------"
-                   " ------- ------- -------");
-}
-
-void
-G1ParScanThreadState::print_termination_stats(int i,
-                                              outputStream* const st) const
-{
-  const double elapsed_ms = elapsed_time() * 1000.0;
-  const double s_roots_ms = strong_roots_time() * 1000.0;
-  const double term_ms    = term_time() * 1000.0;
-  st->print_cr("%3d %9.2f %9.2f %6.2f "
-               "%9.2f %6.2f " SIZE_FORMAT_W(8) " "
-               SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7),
-               i, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms,
-               term_ms, term_ms * 100 / elapsed_ms, term_attempts(),
-               (alloc_buffer_waste() + undo_waste()) * HeapWordSize / K,
-               alloc_buffer_waste() * HeapWordSize / K,
-               undo_waste() * HeapWordSize / K);
-}
-
-#ifdef ASSERT
-bool G1ParScanThreadState::verify_ref(narrowOop* ref) const {
-  assert(ref != NULL, "invariant");
-  assert(UseCompressedOops, "sanity");
-  assert(!has_partial_array_mask(ref), err_msg("ref=" PTR_FORMAT, ref));
-  oop p = oopDesc::load_decode_heap_oop(ref);
-  assert(_g1h->is_in_g1_reserved(p),
-         err_msg("ref=" PTR_FORMAT " p=" PTR_FORMAT, ref, (void *)p));
-  return true;
-}
-
-bool G1ParScanThreadState::verify_ref(oop* ref) const {
-  assert(ref != NULL, "invariant");
-  if (has_partial_array_mask(ref)) {
-    // Must be in the collection set--it's already been copied.
-    oop p = clear_partial_array_mask(ref);
-    assert(_g1h->obj_in_cs(p),
-           err_msg("ref=" PTR_FORMAT " p=" PTR_FORMAT, ref, (void *)p));
-  } else {
-    oop p = oopDesc::load_decode_heap_oop(ref);
-    assert(_g1h->is_in_g1_reserved(p),
-           err_msg("ref=" PTR_FORMAT " p=" PTR_FORMAT, ref, (void *)p));
-  }
-  return true;
-}
-
-bool G1ParScanThreadState::verify_task(StarTask ref) const {
-  if (ref.is_narrow()) {
-    return verify_ref((narrowOop*) ref);
-  } else {
-    return verify_ref((oop*) ref);
-  }
-}
-#endif // ASSERT
-
-void G1ParScanThreadState::trim_queue() {
-  assert(_evac_failure_cl != NULL, "not set");
-
-  StarTask ref;
-  do {
-    // Drain the overflow stack first, so other threads can steal.
-    while (refs()->pop_overflow(ref)) {
-      deal_with_reference(ref);
-    }
-
-    while (refs()->pop_local(ref)) {
-      deal_with_reference(ref);
-    }
-  } while (!refs()->is_empty());
-}
-
-G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1,
-                                     G1ParScanThreadState* par_scan_state) :
-  _g1(g1), _par_scan_state(par_scan_state),
-  _worker_id(par_scan_state->queue_num()) { }
+  ParGCAllocBuffer(gclab_word_size), _retired(true) { }
 
 void G1ParCopyHelper::mark_object(oop obj) {
 #ifdef ASSERT
@@ -4748,107 +4684,6 @@
   _cm->grayRoot(to_obj, (size_t) from_obj->size(), _worker_id);
 }
 
-oop G1ParScanThreadState::copy_to_survivor_space(oop const old) {
-  size_t word_sz = old->size();
-  HeapRegion* from_region = _g1h->heap_region_containing_raw(old);
-  // +1 to make the -1 indexes valid...
-  int       young_index = from_region->young_index_in_cset()+1;
-  assert( (from_region->is_young() && young_index >  0) ||
-         (!from_region->is_young() && young_index == 0), "invariant" );
-  G1CollectorPolicy* g1p = _g1h->g1_policy();
-  markOop m = old->mark();
-  int age = m->has_displaced_mark_helper() ? m->displaced_mark_helper()->age()
-                                           : m->age();
-  GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, age,
-                                                             word_sz);
-  HeapWord* obj_ptr = allocate(alloc_purpose, word_sz);
-#ifndef PRODUCT
-  // Should this evacuation fail?
-  if (_g1h->evacuation_should_fail()) {
-    if (obj_ptr != NULL) {
-      undo_allocation(alloc_purpose, obj_ptr, word_sz);
-      obj_ptr = NULL;
-    }
-  }
-#endif // !PRODUCT
-
-  if (obj_ptr == NULL) {
-    // This will either forward-to-self, or detect that someone else has
-    // installed a forwarding pointer.
-    return _g1h->handle_evacuation_failure_par(this, old);
-  }
-
-  oop obj = oop(obj_ptr);
-
-  // We're going to allocate linearly, so might as well prefetch ahead.
-  Prefetch::write(obj_ptr, PrefetchCopyIntervalInBytes);
-
-  oop forward_ptr = old->forward_to_atomic(obj);
-  if (forward_ptr == NULL) {
-    Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz);
-
-    // alloc_purpose is just a hint to allocate() above, recheck the type of region
-    // we actually allocated from and update alloc_purpose accordingly
-    HeapRegion* to_region = _g1h->heap_region_containing_raw(obj_ptr);
-    alloc_purpose = to_region->is_young() ? GCAllocForSurvived : GCAllocForTenured;
-
-    if (g1p->track_object_age(alloc_purpose)) {
-      // We could simply do obj->incr_age(). However, this causes a
-      // performance issue. obj->incr_age() will first check whether
-      // the object has a displaced mark by checking its mark word;
-      // getting the mark word from the new location of the object
-      // stalls. So, given that we already have the mark word and we
-      // are about to install it anyway, it's better to increase the
-      // age on the mark word, when the object does not have a
-      // displaced mark word. We're not expecting many objects to have
-      // a displaced marked word, so that case is not optimized
-      // further (it could be...) and we simply call obj->incr_age().
-
-      if (m->has_displaced_mark_helper()) {
-        // in this case, we have to install the mark word first,
-        // otherwise obj looks to be forwarded (the old mark word,
-        // which contains the forward pointer, was copied)
-        obj->set_mark(m);
-        obj->incr_age();
-      } else {
-        m = m->incr_age();
-        obj->set_mark(m);
-      }
-      age_table()->add(obj, word_sz);
-    } else {
-      obj->set_mark(m);
-    }
-
-    if (G1StringDedup::is_enabled()) {
-      G1StringDedup::enqueue_from_evacuation(from_region->is_young(),
-                                             to_region->is_young(),
-                                             queue_num(),
-                                             obj);
-    }
-
-    size_t* surv_young_words = surviving_young_words();
-    surv_young_words[young_index] += word_sz;
-
-    if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
-      // We keep track of the next start index in the length field of
-      // the to-space object. The actual length can be found in the
-      // length field of the from-space object.
-      arrayOop(obj)->set_length(0);
-      oop* old_p = set_partial_array_mask(old);
-      push_on_queue(old_p);
-    } else {
-      // No point in using the slower heap_region_containing() method,
-      // given that we know obj is in the heap.
-      _scanner.set_region(_g1h->heap_region_containing_raw(obj));
-      obj->oop_iterate_backwards(&_scanner);
-    }
-  } else {
-    undo_allocation(alloc_purpose, obj_ptr, word_sz);
-    obj = forward_ptr;
-  }
-  return obj;
-}
-
 template <class T>
 void G1ParCopyHelper::do_klass_barrier(T* p, oop new_obj) {
   if (_g1->heap_region_containing_raw(new_obj)->is_young()) {
@@ -4856,7 +4691,7 @@
   }
 }
 
-template <G1Barrier barrier, bool do_mark_object>
+template <G1Barrier barrier, G1Mark do_mark_object>
 template <class T>
 void G1ParCopyClosure<barrier, do_mark_object>::do_oop_work(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
@@ -4869,7 +4704,9 @@
 
   assert(_worker_id == _par_scan_state->queue_num(), "sanity");
 
-  if (_g1->in_cset_fast_test(obj)) {
+  G1CollectedHeap::in_cset_state_t state = _g1->in_cset_state(obj);
+
+  if (state == G1CollectedHeap::InCSet) {
     oop forwardee;
     if (obj->is_forwarded()) {
       forwardee = obj->forwardee();
@@ -4878,7 +4715,7 @@
     }
     assert(forwardee != NULL, "forwardee should not be NULL");
     oopDesc::encode_store_heap_oop(p, forwardee);
-    if (do_mark_object && forwardee != obj) {
+    if (do_mark_object != G1MarkNone && forwardee != obj) {
       // If the object is self-forwarded we don't need to explicitly
       // mark it, the evacuation failure protocol will do so.
       mark_forwarded_object(obj, forwardee);
@@ -4888,10 +4725,12 @@
       do_klass_barrier(p, forwardee);
     }
   } else {
+    if (state == G1CollectedHeap::IsHumongous) {
+      _g1->set_humongous_is_live(obj);
+    }
     // The object is not in collection set. If we're a root scanning
-    // closure during an initial mark pause (i.e. do_mark_object will
-    // be true) then attempt to mark the object.
-    if (do_mark_object) {
+    // closure during an initial mark pause then attempt to mark the object.
+    if (do_mark_object == G1MarkFromRoot) {
       mark_object(obj);
     }
   }
@@ -4901,8 +4740,8 @@
   }
 }
 
-template void G1ParCopyClosure<G1BarrierEvac, false>::do_oop_work(oop* p);
-template void G1ParCopyClosure<G1BarrierEvac, false>::do_oop_work(narrowOop* p);
+template void G1ParCopyClosure<G1BarrierEvac, G1MarkNone>::do_oop_work(oop* p);
+template void G1ParCopyClosure<G1BarrierEvac, G1MarkNone>::do_oop_work(narrowOop* p);
 
 class G1ParEvacuateFollowersClosure : public VoidClosure {
 protected:
@@ -4938,27 +4777,11 @@
 }
 
 void G1ParEvacuateFollowersClosure::do_void() {
-  StarTask stolen_task;
   G1ParScanThreadState* const pss = par_scan_state();
   pss->trim_queue();
-
   do {
-    while (queues()->steal(pss->queue_num(), pss->hash_seed(), stolen_task)) {
-      assert(pss->verify_task(stolen_task), "sanity");
-      if (stolen_task.is_narrow()) {
-        pss->deal_with_reference((narrowOop*) stolen_task);
-      } else {
-        pss->deal_with_reference((oop*) stolen_task);
-      }
-
-      // We've just processed a reference and we might have made
-      // available new entries on the queues. So we have to make sure
-      // we drain the queues as necessary.
-      pss->trim_queue();
-    }
+    pss->steal_and_trim_queue(queues());
   } while (!offer_termination());
-
-  pss->retire_alloc_buffers();
 }
 
 class G1KlassScanClosure : public KlassClosure {
@@ -4997,14 +4820,8 @@
   Mutex _stats_lock;
   Mutex* stats_lock() { return &_stats_lock; }
 
-  size_t getNCards() {
-    return (_g1h->capacity() + G1BlockOffsetSharedArray::N_bytes - 1)
-      / G1BlockOffsetSharedArray::N_bytes;
-  }
-
 public:
-  G1ParTask(G1CollectedHeap* g1h,
-            RefToScanQueueSet *task_queues)
+  G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues)
     : AbstractGangTask("G1 collection"),
       _g1h(g1h),
       _queues(task_queues),
@@ -5032,6 +4849,51 @@
     _n_workers = active_workers;
   }
 
+  // Helps out with CLD processing.
+  //
+  // During InitialMark we need to:
+  // 1) Scavenge all CLDs for the young GC.
+  // 2) Mark all objects directly reachable from strong CLDs.
+  template <G1Mark do_mark_object>
+  class G1CLDClosure : public CLDClosure {
+    G1ParCopyClosure<G1BarrierNone,  do_mark_object>* _oop_closure;
+    G1ParCopyClosure<G1BarrierKlass, do_mark_object>  _oop_in_klass_closure;
+    G1KlassScanClosure                                _klass_in_cld_closure;
+    bool                                              _claim;
+
+   public:
+    G1CLDClosure(G1ParCopyClosure<G1BarrierNone, do_mark_object>* oop_closure,
+                 bool only_young, bool claim)
+        : _oop_closure(oop_closure),
+          _oop_in_klass_closure(oop_closure->g1(),
+                                oop_closure->pss(),
+                                oop_closure->rp()),
+          _klass_in_cld_closure(&_oop_in_klass_closure, only_young),
+          _claim(claim) {
+
+    }
+
+    void do_cld(ClassLoaderData* cld) {
+      cld->oops_do(_oop_closure, &_klass_in_cld_closure, _claim);
+    }
+  };
+
+  class G1CodeBlobClosure: public CodeBlobClosure {
+    OopClosure* _f;
+
+   public:
+    G1CodeBlobClosure(OopClosure* f) : _f(f) {}
+    void do_code_blob(CodeBlob* blob) {
+      nmethod* that = blob->as_nmethod_or_null();
+      if (that != NULL) {
+        if (!that->test_set_oops_do_mark()) {
+          that->oops_do(_f);
+          that->fix_oop_relocations();
+        }
+      }
+    }
+  };
+
   void work(uint worker_id) {
     if (worker_id >= _n_workers) return;  // no work needed this round
 
@@ -5049,40 +4911,67 @@
 
       pss.set_evac_failure_closure(&evac_failure_cl);
 
-      G1ParScanExtRootClosure        only_scan_root_cl(_g1h, &pss, rp);
-      G1ParScanMetadataClosure       only_scan_metadata_cl(_g1h, &pss, rp);
-
-      G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss, rp);
-      G1ParScanAndMarkMetadataClosure scan_mark_metadata_cl(_g1h, &pss, rp);
-
-      bool only_young                 = _g1h->g1_policy()->gcs_are_young();
-      G1KlassScanClosure              scan_mark_klasses_cl_s(&scan_mark_metadata_cl, false);
-      G1KlassScanClosure              only_scan_klasses_cl_s(&only_scan_metadata_cl, only_young);
-
-      OopClosure*                    scan_root_cl = &only_scan_root_cl;
-      G1KlassScanClosure*            scan_klasses_cl = &only_scan_klasses_cl_s;
+      bool only_young = _g1h->g1_policy()->gcs_are_young();
+
+      // Non-IM young GC.
+      G1ParCopyClosure<G1BarrierNone, G1MarkNone>             scan_only_root_cl(_g1h, &pss, rp);
+      G1CLDClosure<G1MarkNone>                                scan_only_cld_cl(&scan_only_root_cl,
+                                                                               only_young, // Only process dirty klasses.
+                                                                               false);     // No need to claim CLDs.
+      // IM young GC.
+      //    Strong roots closures.
+      G1ParCopyClosure<G1BarrierNone, G1MarkFromRoot>         scan_mark_root_cl(_g1h, &pss, rp);
+      G1CLDClosure<G1MarkFromRoot>                            scan_mark_cld_cl(&scan_mark_root_cl,
+                                                                               false, // Process all klasses.
+                                                                               true); // Need to claim CLDs.
+      //    Weak roots closures.
+      G1ParCopyClosure<G1BarrierNone, G1MarkPromotedFromRoot> scan_mark_weak_root_cl(_g1h, &pss, rp);
+      G1CLDClosure<G1MarkPromotedFromRoot>                    scan_mark_weak_cld_cl(&scan_mark_weak_root_cl,
+                                                                                    false, // Process all klasses.
+                                                                                    true); // Need to claim CLDs.
+
+      G1CodeBlobClosure scan_only_code_cl(&scan_only_root_cl);
+      G1CodeBlobClosure scan_mark_code_cl(&scan_mark_root_cl);
+      // IM Weak code roots are handled later.
+
+      OopClosure* strong_root_cl;
+      OopClosure* weak_root_cl;
+      CLDClosure* strong_cld_cl;
+      CLDClosure* weak_cld_cl;
+      CodeBlobClosure* strong_code_cl;
 
       if (_g1h->g1_policy()->during_initial_mark_pause()) {
         // We also need to mark copied objects.
-        scan_root_cl = &scan_mark_root_cl;
-        scan_klasses_cl = &scan_mark_klasses_cl_s;
+        strong_root_cl = &scan_mark_root_cl;
+        strong_cld_cl  = &scan_mark_cld_cl;
+        strong_code_cl = &scan_mark_code_cl;
+        if (ClassUnloadingWithConcurrentMark) {
+          weak_root_cl = &scan_mark_weak_root_cl;
+          weak_cld_cl  = &scan_mark_weak_cld_cl;
+        } else {
+          weak_root_cl = &scan_mark_root_cl;
+          weak_cld_cl  = &scan_mark_cld_cl;
+        }
+      } else {
+        strong_root_cl = &scan_only_root_cl;
+        weak_root_cl   = &scan_only_root_cl;
+        strong_cld_cl  = &scan_only_cld_cl;
+        weak_cld_cl    = &scan_only_cld_cl;
+        strong_code_cl = &scan_only_code_cl;
       }
 
-      G1ParPushHeapRSClosure          push_heap_rs_cl(_g1h, &pss);
-
-      // Don't scan the scavengable methods in the code cache as part
-      // of strong root scanning. The code roots that point into a
-      // region in the collection set are scanned when we scan the
-      // region's RSet.
-      int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings;
+
+      G1ParPushHeapRSClosure  push_heap_rs_cl(_g1h, &pss);
 
       pss.start_strong_roots();
-      _g1h->g1_process_strong_roots(/* is scavenging */ true,
-                                    SharedHeap::ScanningOption(so),
-                                    scan_root_cl,
-                                    &push_heap_rs_cl,
-                                    scan_klasses_cl,
-                                    worker_id);
+      _g1h->g1_process_roots(strong_root_cl,
+                             weak_root_cl,
+                             &push_heap_rs_cl,
+                             strong_cld_cl,
+                             weak_cld_cl,
+                             strong_code_cl,
+                             worker_id);
+
       pss.end_strong_roots();
 
       {
@@ -5102,7 +4991,7 @@
         pss.print_termination_stats(worker_id);
       }
 
-      assert(pss.refs()->is_empty(), "should be empty");
+      assert(pss.queue_is_empty(), "should be empty");
 
       // Close the inner scope so that the ResourceMark and HandleMark
       // destructors are executed here and are included as part of the
@@ -5120,30 +5009,32 @@
 
 void
 G1CollectedHeap::
-g1_process_strong_roots(bool is_scavenging,
-                        ScanningOption so,
-                        OopClosure* scan_non_heap_roots,
-                        OopsInHeapRegionClosure* scan_rs,
-                        G1KlassScanClosure* scan_klasses,
-                        uint worker_i) {
-
-  // First scan the strong roots
+g1_process_roots(OopClosure* scan_non_heap_roots,
+                 OopClosure* scan_non_heap_weak_roots,
+                 OopsInHeapRegionClosure* scan_rs,
+                 CLDClosure* scan_strong_clds,
+                 CLDClosure* scan_weak_clds,
+                 CodeBlobClosure* scan_strong_code,
+                 uint worker_i) {
+
+  // First scan the shared roots.
   double ext_roots_start = os::elapsedTime();
   double closure_app_time_sec = 0.0;
 
+  bool during_im = _g1h->g1_policy()->during_initial_mark_pause();
+  bool trace_metadata = during_im && ClassUnloadingWithConcurrentMark;
+
   BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots);
-
-  assert(so & SO_CodeCache || scan_rs != NULL, "must scan code roots somehow");
-  // Walk the code cache/strong code roots w/o buffering, because StarTask
-  // cannot handle unaligned oop locations.
-  CodeBlobToOopClosure eager_scan_code_roots(scan_non_heap_roots, true /* do_marking */);
-
-  process_strong_roots(false, // no scoping; this is parallel code
-                       is_scavenging, so,
-                       &buf_scan_non_heap_roots,
-                       &eager_scan_code_roots,
-                       scan_klasses
-                       );
+  BufferingOopClosure buf_scan_non_heap_weak_roots(scan_non_heap_weak_roots);
+
+  process_roots(false, // no scoping; this is parallel code
+                SharedHeap::SO_None,
+                &buf_scan_non_heap_roots,
+                &buf_scan_non_heap_weak_roots,
+                scan_strong_clds,
+                // Unloading Initial Marks handle the weak CLDs separately.
+                (trace_metadata ? NULL : scan_weak_clds),
+                scan_strong_code);
 
   // Now the CM ref_processor roots.
   if (!_process_strong_tasks->is_task_claimed(G1H_PS_refProcessor_oops_do)) {
@@ -5154,10 +5045,21 @@
     ref_processor_cm()->weak_oops_do(&buf_scan_non_heap_roots);
   }
 
+  if (trace_metadata) {
+    // Barrier to make sure all workers passed
+    // the strong CLD and strong nmethods phases.
+    active_strong_roots_scope()->wait_until_all_workers_done_with_threads(n_par_threads());
+
+    // Now take the complement of the strong CLDs.
+    ClassLoaderDataGraph::roots_cld_do(NULL, scan_weak_clds);
+  }
+
   // Finish up any enqueued closure apps (attributed as object copy time).
   buf_scan_non_heap_roots.done();
-
-  double obj_copy_time_sec = buf_scan_non_heap_roots.closure_app_seconds();
+  buf_scan_non_heap_weak_roots.done();
+
+  double obj_copy_time_sec = buf_scan_non_heap_roots.closure_app_seconds()
+      + buf_scan_non_heap_weak_roots.closure_app_seconds();
 
   g1_policy()->phase_times()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0);
 
@@ -5181,32 +5083,14 @@
   }
   g1_policy()->phase_times()->record_satb_filtering_time(worker_i, satb_filtering_ms);
 
-  // If this is an initial mark pause, and we're not scanning
-  // the entire code cache, we need to mark the oops in the
-  // strong code root lists for the regions that are not in
-  // the collection set.
-  // Note all threads participate in this set of root tasks.
-  double mark_strong_code_roots_ms = 0.0;
-  if (g1_policy()->during_initial_mark_pause() && !(so & SO_CodeCache)) {
-    double mark_strong_roots_start = os::elapsedTime();
-    mark_strong_code_roots(worker_i);
-    mark_strong_code_roots_ms = (os::elapsedTime() - mark_strong_roots_start) * 1000.0;
-  }
-  g1_policy()->phase_times()->record_strong_code_root_mark_time(worker_i, mark_strong_code_roots_ms);
-
   // Now scan the complement of the collection set.
-  if (scan_rs != NULL) {
-    g1_rem_set()->oops_into_collection_set_do(scan_rs, &eager_scan_code_roots, worker_i);
-  }
+  MarkingCodeBlobClosure scavenge_cs_nmethods(scan_non_heap_weak_roots, CodeBlobToOopClosure::FixRelocations);
+
+  g1_rem_set()->oops_into_collection_set_do(scan_rs, &scavenge_cs_nmethods, worker_i);
+
   _process_strong_tasks->all_tasks_completed();
 }
 
-void
-G1CollectedHeap::g1_process_weak_roots(OopClosure* root_closure) {
-  CodeBlobToOopClosure roots_in_blobs(root_closure, /*do_marking=*/ false);
-  SharedHeap::process_weak_roots(root_closure, &roots_in_blobs);
-}
-
 class G1StringSymbolTableUnlinkTask : public AbstractGangTask {
 private:
   BoolObjectClosure* _is_alive;
@@ -5224,7 +5108,8 @@
   bool _do_in_parallel;
 public:
   G1StringSymbolTableUnlinkTask(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols) :
-    AbstractGangTask("Par String/Symbol table unlink"), _is_alive(is_alive),
+    AbstractGangTask("String/Symbol Unlinking"),
+    _is_alive(is_alive),
     _do_in_parallel(G1CollectedHeap::use_parallel_gc_threads()),
     _process_strings(process_strings), _strings_processed(0), _strings_removed(0),
     _process_symbols(process_symbols), _symbols_processed(0), _symbols_removed(0) {
@@ -5246,6 +5131,14 @@
     guarantee(!_process_symbols || !_do_in_parallel || SymbolTable::parallel_claimed_index() >= _initial_symbol_table_size,
               err_msg("claim value "INT32_FORMAT" after unlink less than initial symbol table size "INT32_FORMAT,
                       SymbolTable::parallel_claimed_index(), _initial_symbol_table_size));
+
+    if (G1TraceStringSymbolTableScrubbing) {
+      gclog_or_tty->print_cr("Cleaned string and symbol table, "
+                             "strings: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed, "
+                             "symbols: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed",
+                             strings_processed(), strings_removed(),
+                             symbols_processed(), symbols_removed());
+    }
   }
 
   void work(uint worker_id) {
@@ -5281,12 +5174,279 @@
   size_t symbols_removed()   const { return (size_t)_symbols_removed; }
 };
 
-void G1CollectedHeap::unlink_string_and_symbol_table(BoolObjectClosure* is_alive,
-                                                     bool process_strings, bool process_symbols) {
+class G1CodeCacheUnloadingTask VALUE_OBJ_CLASS_SPEC {
+private:
+  static Monitor* _lock;
+
+  BoolObjectClosure* const _is_alive;
+  const bool               _unloading_occurred;
+  const uint               _num_workers;
+
+  // Variables used to claim nmethods.
+  nmethod* _first_nmethod;
+  volatile nmethod* _claimed_nmethod;
+
+  // The list of nmethods that need to be processed by the second pass.
+  volatile nmethod* _postponed_list;
+  volatile uint     _num_entered_barrier;
+
+ public:
+  G1CodeCacheUnloadingTask(uint num_workers, BoolObjectClosure* is_alive, bool unloading_occurred) :
+      _is_alive(is_alive),
+      _unloading_occurred(unloading_occurred),
+      _num_workers(num_workers),
+      _first_nmethod(NULL),
+      _claimed_nmethod(NULL),
+      _postponed_list(NULL),
+      _num_entered_barrier(0)
+  {
+    nmethod::increase_unloading_clock();
+    _first_nmethod = CodeCache::alive_nmethod(CodeCache::first());
+    _claimed_nmethod = (volatile nmethod*)_first_nmethod;
+  }
+
+  ~G1CodeCacheUnloadingTask() {
+    CodeCache::verify_clean_inline_caches();
+
+    CodeCache::set_needs_cache_clean(false);
+    guarantee(CodeCache::scavenge_root_nmethods() == NULL, "Must be");
+
+    CodeCache::verify_icholder_relocations();
+  }
+
+ private:
+  void add_to_postponed_list(nmethod* nm) {
+      nmethod* old;
+      do {
+        old = (nmethod*)_postponed_list;
+        nm->set_unloading_next(old);
+      } while ((nmethod*)Atomic::cmpxchg_ptr(nm, &_postponed_list, old) != old);
+  }
+
+  void clean_nmethod(nmethod* nm) {
+    bool postponed = nm->do_unloading_parallel(_is_alive, _unloading_occurred);
+
+    if (postponed) {
+      // This nmethod referred to an nmethod that has not been cleaned/unloaded yet.
+      add_to_postponed_list(nm);
+    }
+
+    // Mark that this thread has been cleaned/unloaded.
+    // After this call, it will be safe to ask if this nmethod was unloaded or not.
+    nm->set_unloading_clock(nmethod::global_unloading_clock());
+  }
+
+  void clean_nmethod_postponed(nmethod* nm) {
+    nm->do_unloading_parallel_postponed(_is_alive, _unloading_occurred);
+  }
+
+  static const int MaxClaimNmethods = 16;
+
+  void claim_nmethods(nmethod** claimed_nmethods, int *num_claimed_nmethods) {
+    nmethod* first;
+    nmethod* last;
+
+    do {
+      *num_claimed_nmethods = 0;
+
+      first = last = (nmethod*)_claimed_nmethod;
+
+      if (first != NULL) {
+        for (int i = 0; i < MaxClaimNmethods; i++) {
+          last = CodeCache::alive_nmethod(CodeCache::next(last));
+
+          if (last == NULL) {
+            break;
+          }
+
+          claimed_nmethods[i] = last;
+          (*num_claimed_nmethods)++;
+        }
+      }
+
+    } while ((nmethod*)Atomic::cmpxchg_ptr(last, &_claimed_nmethod, first) != first);
+  }
+
+  nmethod* claim_postponed_nmethod() {
+    nmethod* claim;
+    nmethod* next;
+
+    do {
+      claim = (nmethod*)_postponed_list;
+      if (claim == NULL) {
+        return NULL;
+      }
+
+      next = claim->unloading_next();
+
+    } while ((nmethod*)Atomic::cmpxchg_ptr(next, &_postponed_list, claim) != claim);
+
+    return claim;
+  }
+
+ public:
+  // Mark that we're done with the first pass of nmethod cleaning.
+  void barrier_mark(uint worker_id) {
+    MonitorLockerEx ml(_lock, Mutex::_no_safepoint_check_flag);
+    _num_entered_barrier++;
+    if (_num_entered_barrier == _num_workers) {
+      ml.notify_all();
+    }
+  }
+
+  // See if we have to wait for the other workers to
+  // finish their first-pass nmethod cleaning work.
+  void barrier_wait(uint worker_id) {
+    if (_num_entered_barrier < _num_workers) {
+      MonitorLockerEx ml(_lock, Mutex::_no_safepoint_check_flag);
+      while (_num_entered_barrier < _num_workers) {
+          ml.wait(Mutex::_no_safepoint_check_flag, 0, false);
+      }
+    }
+  }
+
+  // Cleaning and unloading of nmethods. Some work has to be postponed
+  // to the second pass, when we know which nmethods survive.
+  void work_first_pass(uint worker_id) {
+    // The first nmethods is claimed by the first worker.
+    if (worker_id == 0 && _first_nmethod != NULL) {
+      clean_nmethod(_first_nmethod);
+      _first_nmethod = NULL;
+    }
+
+    int num_claimed_nmethods;
+    nmethod* claimed_nmethods[MaxClaimNmethods];
+
+    while (true) {
+      claim_nmethods(claimed_nmethods, &num_claimed_nmethods);
+
+      if (num_claimed_nmethods == 0) {
+        break;
+      }
+
+      for (int i = 0; i < num_claimed_nmethods; i++) {
+        clean_nmethod(claimed_nmethods[i]);
+      }
+    }
+  }
+
+  void work_second_pass(uint worker_id) {
+    nmethod* nm;
+    // Take care of postponed nmethods.
+    while ((nm = claim_postponed_nmethod()) != NULL) {
+      clean_nmethod_postponed(nm);
+    }
+  }
+};
+
+Monitor* G1CodeCacheUnloadingTask::_lock = new Monitor(Mutex::leaf, "Code Cache Unload lock");
+
+class G1KlassCleaningTask : public StackObj {
+  BoolObjectClosure*                      _is_alive;
+  volatile jint                           _clean_klass_tree_claimed;
+  ClassLoaderDataGraphKlassIteratorAtomic _klass_iterator;
+
+ public:
+  G1KlassCleaningTask(BoolObjectClosure* is_alive) :
+      _is_alive(is_alive),
+      _clean_klass_tree_claimed(0),
+      _klass_iterator() {
+  }
+
+ private:
+  bool claim_clean_klass_tree_task() {
+    if (_clean_klass_tree_claimed) {
+      return false;
+    }
+
+    return Atomic::cmpxchg(1, (jint*)&_clean_klass_tree_claimed, 0) == 0;
+  }
+
+  InstanceKlass* claim_next_klass() {
+    Klass* klass;
+    do {
+      klass =_klass_iterator.next_klass();
+    } while (klass != NULL && !klass->oop_is_instance());
+
+    return (InstanceKlass*)klass;
+  }
+
+public:
+
+  void clean_klass(InstanceKlass* ik) {
+    ik->clean_implementors_list(_is_alive);
+    ik->clean_method_data(_is_alive);
+
+    // G1 specific cleanup work that has
+    // been moved here to be done in parallel.
+    ik->clean_dependent_nmethods();
+  }
+
+  void work() {
+    ResourceMark rm;
+
+    // One worker will clean the subklass/sibling klass tree.
+    if (claim_clean_klass_tree_task()) {
+      Klass::clean_subklass_tree(_is_alive);
+    }
+
+    // All workers will help cleaning the classes,
+    InstanceKlass* klass;
+    while ((klass = claim_next_klass()) != NULL) {
+      clean_klass(klass);
+    }
+  }
+};
+
+// To minimize the remark pause times, the tasks below are done in parallel.
+class G1ParallelCleaningTask : public AbstractGangTask {
+private:
+  G1StringSymbolTableUnlinkTask _string_symbol_task;
+  G1CodeCacheUnloadingTask      _code_cache_task;
+  G1KlassCleaningTask           _klass_cleaning_task;
+
+public:
+  // The constructor is run in the VMThread.
+  G1ParallelCleaningTask(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols, uint num_workers, bool unloading_occurred) :
+      AbstractGangTask("Parallel Cleaning"),
+      _string_symbol_task(is_alive, process_strings, process_symbols),
+      _code_cache_task(num_workers, is_alive, unloading_occurred),
+      _klass_cleaning_task(is_alive) {
+  }
+
+  // The parallel work done by all worker threads.
+  void work(uint worker_id) {
+    // Do first pass of code cache cleaning.
+    _code_cache_task.work_first_pass(worker_id);
+
+    // Let the threads mark that the first pass is done.
+    _code_cache_task.barrier_mark(worker_id);
+
+    // Clean the Strings and Symbols.
+    _string_symbol_task.work(worker_id);
+
+    // Wait for all workers to finish the first code cache cleaning pass.
+    _code_cache_task.barrier_wait(worker_id);
+
+    // Do the second code cache cleaning work, which realize on
+    // the liveness information gathered during the first pass.
+    _code_cache_task.work_second_pass(worker_id);
+
+    // Clean all klasses that were not unloaded.
+    _klass_cleaning_task.work();
+  }
+};
+
+
+void G1CollectedHeap::parallel_cleaning(BoolObjectClosure* is_alive,
+                                        bool process_strings,
+                                        bool process_symbols,
+                                        bool class_unloading_occurred) {
   uint n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
-                   _g1h->workers()->active_workers() : 1);
-
-  G1StringSymbolTableUnlinkTask g1_unlink_task(is_alive, process_strings, process_symbols);
+                    workers()->active_workers() : 1);
+
+  G1ParallelCleaningTask g1_unlink_task(is_alive, process_strings, process_symbols,
+                                        n_workers, class_unloading_occurred);
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     set_par_threads(n_workers);
     workers()->run_task(&g1_unlink_task);
@@ -5294,12 +5454,21 @@
   } else {
     g1_unlink_task.work(0);
   }
-  if (G1TraceStringSymbolTableScrubbing) {
-    gclog_or_tty->print_cr("Cleaned string and symbol table, "
-                           "strings: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed, "
-                           "symbols: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed",
-                           g1_unlink_task.strings_processed(), g1_unlink_task.strings_removed(),
-                           g1_unlink_task.symbols_processed(), g1_unlink_task.symbols_removed());
+}
+
+void G1CollectedHeap::unlink_string_and_symbol_table(BoolObjectClosure* is_alive,
+                                                     bool process_strings, bool process_symbols) {
+  {
+    uint n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+                     _g1h->workers()->active_workers() : 1);
+    G1StringSymbolTableUnlinkTask g1_unlink_task(is_alive, process_strings, process_symbols);
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
+      set_par_threads(n_workers);
+      workers()->run_task(&g1_unlink_task);
+      set_par_threads(0);
+    } else {
+      g1_unlink_task.work(0);
+    }
   }
 
   if (G1StringDedup::is_enabled()) {
@@ -5307,11 +5476,25 @@
   }
 }
 
-class RedirtyLoggedCardTableEntryFastClosure : public CardTableEntryClosure {
-public:
-  bool do_card_ptr(jbyte* card_ptr, uint worker_i) {
-    *card_ptr = CardTableModRefBS::dirty_card_val();
-    return true;
+class G1RedirtyLoggedCardsTask : public AbstractGangTask {
+ private:
+  DirtyCardQueueSet* _queue;
+ public:
+  G1RedirtyLoggedCardsTask(DirtyCardQueueSet* queue) : AbstractGangTask("Redirty Cards"), _queue(queue) { }
+
+  virtual void work(uint worker_id) {
+    double start_time = os::elapsedTime();
+
+    RedirtyLoggedCardTableEntryClosure cl;
+    if (G1CollectedHeap::heap()->use_parallel_gc_threads()) {
+      _queue->par_apply_closure_to_all_completed_buffers(&cl);
+    } else {
+      _queue->apply_closure_to_all_completed_buffers(&cl);
+    }
+
+    G1GCPhaseTimes* timer = G1CollectedHeap::heap()->g1_policy()->phase_times();
+    timer->record_redirty_logged_cards_time_ms(worker_id, (os::elapsedTime() - start_time) * 1000.0);
+    timer->record_redirty_logged_cards_processed_cards(worker_id, cl.num_processed());
   }
 };
 
@@ -5319,9 +5502,18 @@
   guarantee(G1DeferredRSUpdate, "Must only be called when using deferred RS updates.");
   double redirty_logged_cards_start = os::elapsedTime();
 
-  RedirtyLoggedCardTableEntryFastClosure redirty;
-  dirty_card_queue_set().set_closure(&redirty);
-  dirty_card_queue_set().apply_closure_to_all_completed_buffers();
+  uint n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+                   _g1h->workers()->active_workers() : 1);
+
+  G1RedirtyLoggedCardsTask redirty_task(&dirty_card_queue_set());
+  dirty_card_queue_set().reset_for_par_iteration();
+  if (use_parallel_gc_threads()) {
+    set_par_threads(n_workers);
+    workers()->run_task(&redirty_task);
+    set_par_threads(0);
+  } else {
+    redirty_task.work(0);
+  }
 
   DirtyCardQueueSet& dcq = JavaThread::dirty_card_queue_set();
   dcq.merge_bufferlists(&dirty_card_queue_set());
@@ -5360,12 +5552,21 @@
 public:
   G1KeepAliveClosure(G1CollectedHeap* g1) : _g1(g1) {}
   void do_oop(narrowOop* p) { guarantee(false, "Not needed"); }
-  void do_oop(      oop* p) {
+  void do_oop(oop* p) {
     oop obj = *p;
 
-    if (_g1->obj_in_cs(obj)) {
+    G1CollectedHeap::in_cset_state_t cset_state = _g1->in_cset_state(obj);
+    if (obj == NULL || cset_state == G1CollectedHeap::InNeither) {
+      return;
+    }
+    if (cset_state == G1CollectedHeap::InCSet) {
       assert( obj->is_forwarded(), "invariant" );
       *p = obj->forwardee();
+    } else {
+      assert(!obj->is_forwarded(), "invariant" );
+      assert(cset_state == G1CollectedHeap::IsHumongous,
+             err_msg("Only allowed InCSet state is IsHumongous, but is %d", cset_state));
+      _g1->set_humongous_is_live(obj);
     }
   }
 };
@@ -5378,17 +5579,14 @@
 class G1CopyingKeepAliveClosure: public OopClosure {
   G1CollectedHeap*         _g1h;
   OopClosure*              _copy_non_heap_obj_cl;
-  OopsInHeapRegionClosure* _copy_metadata_obj_cl;
   G1ParScanThreadState*    _par_scan_state;
 
 public:
   G1CopyingKeepAliveClosure(G1CollectedHeap* g1h,
                             OopClosure* non_heap_obj_cl,
-                            OopsInHeapRegionClosure* metadata_obj_cl,
                             G1ParScanThreadState* pss):
     _g1h(g1h),
     _copy_non_heap_obj_cl(non_heap_obj_cl),
-    _copy_metadata_obj_cl(metadata_obj_cl),
     _par_scan_state(pss)
   {}
 
@@ -5398,7 +5596,7 @@
   template <class T> void do_oop_work(T* p) {
     oop obj = oopDesc::load_decode_heap_oop(p);
 
-    if (_g1h->obj_in_cs(obj)) {
+    if (_g1h->is_in_cset_or_humongous(obj)) {
       // If the referent object has been forwarded (either copied
       // to a new location or to itself in the event of an
       // evacuation failure) then we need to update the reference
@@ -5421,12 +5619,12 @@
         _par_scan_state->push_on_queue(p);
       } else {
         assert(!Metaspace::contains((const void*)p),
-               err_msg("Otherwise need to call _copy_metadata_obj_cl->do_oop(p) "
+               err_msg("Unexpectedly found a pointer from metadata: "
                               PTR_FORMAT, p));
-          _copy_non_heap_obj_cl->do_oop(p);
-        }
+        _copy_non_heap_obj_cl->do_oop(p);
       }
     }
+  }
 };
 
 // Serial drain queue closure. Called as the 'complete_gc'
@@ -5516,22 +5714,18 @@
     pss.set_evac_failure_closure(&evac_failure_cl);
 
     G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, &pss, NULL);
-    G1ParScanMetadataClosure       only_copy_metadata_cl(_g1h, &pss, NULL);
 
     G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, &pss, NULL);
-    G1ParScanAndMarkMetadataClosure copy_mark_metadata_cl(_g1h, &pss, NULL);
 
     OopClosure*                    copy_non_heap_cl = &only_copy_non_heap_cl;
-    OopsInHeapRegionClosure*       copy_metadata_cl = &only_copy_metadata_cl;
 
     if (_g1h->g1_policy()->during_initial_mark_pause()) {
       // We also need to mark copied objects.
       copy_non_heap_cl = &copy_mark_non_heap_cl;
-      copy_metadata_cl = &copy_mark_metadata_cl;
     }
 
     // Keep alive closure.
-    G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, copy_metadata_cl, &pss);
+    G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, &pss);
 
     // Complete GC closure
     G1ParEvacuateFollowersClosure drain_queue(_g1h, &pss, _task_queues, _terminator);
@@ -5622,22 +5816,17 @@
 
     pss.set_evac_failure_closure(&evac_failure_cl);
 
-    assert(pss.refs()->is_empty(), "both queue and overflow should be empty");
-
+    assert(pss.queue_is_empty(), "both queue and overflow should be empty");
 
     G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, &pss, NULL);
-    G1ParScanMetadataClosure       only_copy_metadata_cl(_g1h, &pss, NULL);
 
     G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, &pss, NULL);
-    G1ParScanAndMarkMetadataClosure copy_mark_metadata_cl(_g1h, &pss, NULL);
 
     OopClosure*                    copy_non_heap_cl = &only_copy_non_heap_cl;
-    OopsInHeapRegionClosure*       copy_metadata_cl = &only_copy_metadata_cl;
 
     if (_g1h->g1_policy()->during_initial_mark_pause()) {
       // We also need to mark copied objects.
       copy_non_heap_cl = &copy_mark_non_heap_cl;
-      copy_metadata_cl = &copy_mark_metadata_cl;
     }
 
     // Is alive closure
@@ -5645,7 +5834,7 @@
 
     // Copying keep alive closure. Applied to referent objects that need
     // to be copied.
-    G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, copy_metadata_cl, &pss);
+    G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, &pss);
 
     ReferenceProcessor* rp = _g1h->ref_processor_cm();
 
@@ -5681,7 +5870,7 @@
     G1ParEvacuateFollowersClosure drain_queue(_g1h, &pss, _queues, &_terminator);
     drain_queue.do_void();
     // Allocation buffers were retired at the end of G1ParEvacuateFollowersClosure
-    assert(pss.refs()->is_empty(), "should be");
+    assert(pss.queue_is_empty(), "should be");
   }
 };
 
@@ -5748,25 +5937,21 @@
 
   pss.set_evac_failure_closure(&evac_failure_cl);
 
-  assert(pss.refs()->is_empty(), "pre-condition");
+  assert(pss.queue_is_empty(), "pre-condition");
 
   G1ParScanExtRootClosure        only_copy_non_heap_cl(this, &pss, NULL);
-  G1ParScanMetadataClosure       only_copy_metadata_cl(this, &pss, NULL);
 
   G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(this, &pss, NULL);
-  G1ParScanAndMarkMetadataClosure copy_mark_metadata_cl(this, &pss, NULL);
 
   OopClosure*                    copy_non_heap_cl = &only_copy_non_heap_cl;
-  OopsInHeapRegionClosure*       copy_metadata_cl = &only_copy_metadata_cl;
 
   if (_g1h->g1_policy()->during_initial_mark_pause()) {
     // We also need to mark copied objects.
     copy_non_heap_cl = &copy_mark_non_heap_cl;
-    copy_metadata_cl = &copy_mark_metadata_cl;
   }
 
   // Keep alive closure.
-  G1CopyingKeepAliveClosure keep_alive(this, copy_non_heap_cl, copy_metadata_cl, &pss);
+  G1CopyingKeepAliveClosure keep_alive(this, copy_non_heap_cl, &pss);
 
   // Serial Complete GC closure
   G1STWDrainQueueClosure drain_queue(this, &pss);
@@ -5781,7 +5966,8 @@
                                               &keep_alive,
                                               &drain_queue,
                                               NULL,
-                                              _gc_timer_stw);
+                                              _gc_timer_stw,
+                                              _gc_tracer_stw->gc_id());
   } else {
     // Parallel reference processing
     assert(rp->num_q() == no_of_gc_workers, "sanity");
@@ -5792,15 +5978,14 @@
                                               &keep_alive,
                                               &drain_queue,
                                               &par_task_executor,
-                                              _gc_timer_stw);
+                                              _gc_timer_stw,
+                                              _gc_tracer_stw->gc_id());
   }
 
   _gc_tracer_stw->report_gc_reference_stats(stats);
-  // We have completed copying any necessary live referent objects
-  // (that were not copied during the actual pause) so we can
-  // retire any active alloc buffers
-  pss.retire_alloc_buffers();
-  assert(pss.refs()->is_empty(), "both queue and overflow should be empty");
+
+  // We have completed copying any necessary live referent objects.
+  assert(pss.queue_is_empty(), "both queue and overflow should be empty");
 
   double ref_proc_time = os::elapsedTime() - ref_proc_start;
   g1_policy()->phase_times()->record_ref_proc_time(ref_proc_time * 1000.0);
@@ -5885,6 +6070,10 @@
 
   {
     StrongRootsScope srs(this);
+    // InitialMark needs claim bits to keep track of the marked-through CLDs.
+    if (g1_policy()->during_initial_mark_pause()) {
+      ClassLoaderDataGraph::clear_claimed_marks();
+    }
 
     if (G1CollectedHeap::use_parallel_gc_threads()) {
       // The individual threads will set their evac-failure closures.
@@ -5989,6 +6178,11 @@
   assert(!hr->is_empty(), "the region should not be empty");
   assert(free_list != NULL, "pre-condition");
 
+  if (G1VerifyBitmaps) {
+    MemRegion mr(hr->bottom(), hr->end());
+    concurrent_mark()->clearRangePrevBitmap(mr);
+  }
+
   // Clear the card counts for this region.
   // Note: we only need to do this if the region is not young
   // (since we don't refine cards in young regions).
@@ -6123,7 +6317,87 @@
 void G1CollectedHeap::verify_dirty_young_regions() {
   verify_dirty_young_list(_young_list->first_region());
 }
-#endif
+
+bool G1CollectedHeap::verify_no_bits_over_tams(const char* bitmap_name, CMBitMapRO* bitmap,
+                                               HeapWord* tams, HeapWord* end) {
+  guarantee(tams <= end,
+            err_msg("tams: "PTR_FORMAT" end: "PTR_FORMAT, tams, end));
+  HeapWord* result = bitmap->getNextMarkedWordAddress(tams, end);
+  if (result < end) {
+    gclog_or_tty->cr();
+    gclog_or_tty->print_cr("## wrong marked address on %s bitmap: "PTR_FORMAT,
+                           bitmap_name, result);
+    gclog_or_tty->print_cr("## %s tams: "PTR_FORMAT" end: "PTR_FORMAT,
+                           bitmap_name, tams, end);
+    return false;
+  }
+  return true;
+}
+
+bool G1CollectedHeap::verify_bitmaps(const char* caller, HeapRegion* hr) {
+  CMBitMapRO* prev_bitmap = concurrent_mark()->prevMarkBitMap();
+  CMBitMapRO* next_bitmap = (CMBitMapRO*) concurrent_mark()->nextMarkBitMap();
+
+  HeapWord* bottom = hr->bottom();
+  HeapWord* ptams  = hr->prev_top_at_mark_start();
+  HeapWord* ntams  = hr->next_top_at_mark_start();
+  HeapWord* end    = hr->end();
+
+  bool res_p = verify_no_bits_over_tams("prev", prev_bitmap, ptams, end);
+
+  bool res_n = true;
+  // We reset mark_in_progress() before we reset _cmThread->in_progress() and in this window
+  // we do the clearing of the next bitmap concurrently. Thus, we can not verify the bitmap
+  // if we happen to be in that state.
+  if (mark_in_progress() || !_cmThread->in_progress()) {
+    res_n = verify_no_bits_over_tams("next", next_bitmap, ntams, end);
+  }
+  if (!res_p || !res_n) {
+    gclog_or_tty->print_cr("#### Bitmap verification failed for "HR_FORMAT,
+                           HR_FORMAT_PARAMS(hr));
+    gclog_or_tty->print_cr("#### Caller: %s", caller);
+    return false;
+  }
+  return true;
+}
+
+void G1CollectedHeap::check_bitmaps(const char* caller, HeapRegion* hr) {
+  if (!G1VerifyBitmaps) return;
+
+  guarantee(verify_bitmaps(caller, hr), "bitmap verification");
+}
+
+class G1VerifyBitmapClosure : public HeapRegionClosure {
+private:
+  const char* _caller;
+  G1CollectedHeap* _g1h;
+  bool _failures;
+
+public:
+  G1VerifyBitmapClosure(const char* caller, G1CollectedHeap* g1h) :
+    _caller(caller), _g1h(g1h), _failures(false) { }
+
+  bool failures() { return _failures; }
+
+  virtual bool doHeapRegion(HeapRegion* hr) {
+    if (hr->continuesHumongous()) return false;
+
+    bool result = _g1h->verify_bitmaps(_caller, hr);
+    if (!result) {
+      _failures = true;
+    }
+    return false;
+  }
+};
+
+void G1CollectedHeap::check_bitmaps(const char* caller) {
+  if (!G1VerifyBitmaps) return;
+
+  G1VerifyBitmapClosure cl(caller, this);
+  heap_region_iterate(&cl);
+  guarantee(!cl.failures(), "bitmap verification");
+}
+#endif // PRODUCT
 
 void G1CollectedHeap::cleanUpCardTable() {
   G1SATBCardTableModRefBS* ct_bs = g1_barrier_set();
@@ -6272,6 +6546,154 @@
   policy->phase_times()->record_non_young_free_cset_time_ms(non_young_time_ms);
 }
 
+class G1FreeHumongousRegionClosure : public HeapRegionClosure {
+ private:
+  FreeRegionList* _free_region_list;
+  HeapRegionSet* _proxy_set;
+  HeapRegionSetCount _humongous_regions_removed;
+  size_t _freed_bytes;
+ public:
+
+  G1FreeHumongousRegionClosure(FreeRegionList* free_region_list) :
+    _free_region_list(free_region_list), _humongous_regions_removed(), _freed_bytes(0) {
+  }
+
+  virtual bool doHeapRegion(HeapRegion* r) {
+    if (!r->startsHumongous()) {
+      return false;
+    }
+
+    G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+    oop obj = (oop)r->bottom();
+    CMBitMap* next_bitmap = g1h->concurrent_mark()->nextMarkBitMap();
+
+    // The following checks whether the humongous object is live are sufficient.
+    // The main additional check (in addition to having a reference from the roots
+    // or the young gen) is whether the humongous object has a remembered set entry.
+    //
+    // A humongous object cannot be live if there is no remembered set for it
+    // because:
+    // - there can be no references from within humongous starts regions referencing
+    // the object because we never allocate other objects into them.
+    // (I.e. there are no intra-region references that may be missed by the
+    // remembered set)
+    // - as soon there is a remembered set entry to the humongous starts region
+    // (i.e. it has "escaped" to an old object) this remembered set entry will stay
+    // until the end of a concurrent mark.
+    //
+    // It is not required to check whether the object has been found dead by marking
+    // or not, in fact it would prevent reclamation within a concurrent cycle, as
+    // all objects allocated during that time are considered live.
+    // SATB marking is even more conservative than the remembered set.
+    // So if at this point in the collection there is no remembered set entry,
+    // nobody has a reference to it.
+    // At the start of collection we flush all refinement logs, and remembered sets
+    // are completely up-to-date wrt to references to the humongous object.
+    //
+    // Other implementation considerations:
+    // - never consider object arrays: while they are a valid target, they have not
+    // been observed to be used as temporary objects.
+    // - they would also pose considerable effort for cleaning up the the remembered
+    // sets.
+    // While this cleanup is not strictly necessary to be done (or done instantly),
+    // given that their occurrence is very low, this saves us this additional
+    // complexity.
+    uint region_idx = r->hrs_index();
+    if (g1h->humongous_is_live(region_idx) ||
+        g1h->humongous_region_is_always_live(region_idx)) {
+
+      if (G1TraceReclaimDeadHumongousObjectsAtYoungGC) {
+        gclog_or_tty->print_cr("Live humongous %d region %d with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d live-other %d obj array %d",
+                               r->isHumongous(),
+                               region_idx,
+                               r->rem_set()->occupied(),
+                               r->rem_set()->strong_code_roots_list_length(),
+                               next_bitmap->isMarked(r->bottom()),
+                               g1h->humongous_is_live(region_idx),
+                               obj->is_objArray()
+                              );
+      }
+
+      return false;
+    }
+
+    guarantee(!obj->is_objArray(),
+              err_msg("Eagerly reclaiming object arrays is not supported, but the object "PTR_FORMAT" is.",
+                      r->bottom()));
+
+    if (G1TraceReclaimDeadHumongousObjectsAtYoungGC) {
+      gclog_or_tty->print_cr("Reclaim humongous region %d start "PTR_FORMAT" region %d length "UINT32_FORMAT" with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d live-other %d obj array %d",
+                             r->isHumongous(),
+                             r->bottom(),
+                             region_idx,
+                             r->region_num(),
+                             r->rem_set()->occupied(),
+                             r->rem_set()->strong_code_roots_list_length(),
+                             next_bitmap->isMarked(r->bottom()),
+                             g1h->humongous_is_live(region_idx),
+                             obj->is_objArray()
+                            );
+    }
+    // Need to clear mark bit of the humongous object if already set.
+    if (next_bitmap->isMarked(r->bottom())) {
+      next_bitmap->clear(r->bottom());
+    }
+    _freed_bytes += r->used();
+    r->set_containing_set(NULL);
+    _humongous_regions_removed.increment(1u, r->capacity());
+    g1h->free_humongous_region(r, _free_region_list, false);
+
+    return false;
+  }
+
+  HeapRegionSetCount& humongous_free_count() {
+    return _humongous_regions_removed;
+  }
+
+  size_t bytes_freed() const {
+    return _freed_bytes;
+  }
+
+  size_t humongous_reclaimed() const {
+    return _humongous_regions_removed.length();
+  }
+};
+
+void G1CollectedHeap::eagerly_reclaim_humongous_regions() {
+  assert_at_safepoint(true);
+
+  if (!G1ReclaimDeadHumongousObjectsAtYoungGC || !_has_humongous_reclaim_candidates) {
+    g1_policy()->phase_times()->record_fast_reclaim_humongous_time_ms(0.0, 0);
+    return;
+  }
+
+  double start_time = os::elapsedTime();
+
+  FreeRegionList local_cleanup_list("Local Humongous Cleanup List");
+
+  G1FreeHumongousRegionClosure cl(&local_cleanup_list);
+  heap_region_iterate(&cl);
+
+  HeapRegionSetCount empty_set;
+  remove_from_old_sets(empty_set, cl.humongous_free_count());
+
+  G1HRPrinter* hr_printer = _g1h->hr_printer();
+  if (hr_printer->is_active()) {
+    FreeRegionListIterator iter(&local_cleanup_list);
+    while (iter.more_available()) {
+      HeapRegion* hr = iter.get_next();
+      hr_printer->cleanup(hr);
+    }
+  }
+
+  prepend_to_freelist(&local_cleanup_list);
+  decrement_summary_bytes(cl.bytes_freed());
+
+  g1_policy()->phase_times()->record_fast_reclaim_humongous_time_ms((os::elapsedTime() - start_time) * 1000.0,
+                                                                    cl.humongous_reclaimed());
+}
+
 // This routine is similar to the above but does not record
 // any policy statistics or update free lists; we are abandoning
 // the current incremental collection set in preparation of a
@@ -6512,6 +6934,7 @@
     if (new_alloc_region != NULL) {
       set_region_short_lived_locked(new_alloc_region);
       _hr_printer.alloc(new_alloc_region, G1HRPrinter::Eden, young_list_full);
+      check_bitmaps("Mutator Region Allocation", new_alloc_region);
       return new_alloc_region;
     }
   }
@@ -6574,12 +6997,14 @@
       // We really only need to do this for old regions given that we
       // should never scan survivors. But it doesn't hurt to do it
       // for survivors too.
-      new_alloc_region->set_saved_mark();
+      new_alloc_region->record_top_and_timestamp();
       if (survivor) {
         new_alloc_region->set_survivor();
         _hr_printer.alloc(new_alloc_region, G1HRPrinter::Survivor);
+        check_bitmaps("Survivor Region Allocation", new_alloc_region);
       } else {
         _hr_printer.alloc(new_alloc_region, G1HRPrinter::Old);
+        check_bitmaps("Old Region Allocation", new_alloc_region);
       }
       bool during_im = g1_policy()->during_initial_mark_pause();
       new_alloc_region->note_start_of_copying(during_im);
@@ -6836,106 +7261,6 @@
   g1_policy()->phase_times()->record_strong_code_root_purge_time(purge_time_ms);
 }
 
-// Mark all the code roots that point into regions *not* in the
-// collection set.
-//
-// Note we do not want to use a "marking" CodeBlobToOopClosure while
-// walking the the code roots lists of regions not in the collection
-// set. Suppose we have an nmethod (M) that points to objects in two
-// separate regions - one in the collection set (R1) and one not (R2).
-// Using a "marking" CodeBlobToOopClosure here would result in "marking"
-// nmethod M when walking the code roots for R1. When we come to scan
-// the code roots for R2, we would see that M is already marked and it
-// would be skipped and the objects in R2 that are referenced from M
-// would not be evacuated.
-
-class MarkStrongCodeRootCodeBlobClosure: public CodeBlobClosure {
-
-  class MarkStrongCodeRootOopClosure: public OopClosure {
-    ConcurrentMark* _cm;
-    HeapRegion* _hr;
-    uint _worker_id;
-
-    template <class T> void do_oop_work(T* p) {
-      T heap_oop = oopDesc::load_heap_oop(p);
-      if (!oopDesc::is_null(heap_oop)) {
-        oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-        // Only mark objects in the region (which is assumed
-        // to be not in the collection set).
-        if (_hr->is_in(obj)) {
-          _cm->grayRoot(obj, (size_t) obj->size(), _worker_id);
-        }
-      }
-    }
-
-  public:
-    MarkStrongCodeRootOopClosure(ConcurrentMark* cm, HeapRegion* hr, uint worker_id) :
-      _cm(cm), _hr(hr), _worker_id(worker_id) {
-      assert(!_hr->in_collection_set(), "sanity");
-    }
-
-    void do_oop(narrowOop* p) { do_oop_work(p); }
-    void do_oop(oop* p)       { do_oop_work(p); }
-  };
-
-  MarkStrongCodeRootOopClosure _oop_cl;
-
-public:
-  MarkStrongCodeRootCodeBlobClosure(ConcurrentMark* cm, HeapRegion* hr, uint worker_id):
-    _oop_cl(cm, hr, worker_id) {}
-
-  void do_code_blob(CodeBlob* cb) {
-    nmethod* nm = (cb == NULL) ? NULL : cb->as_nmethod_or_null();
-    if (nm != NULL) {
-      nm->oops_do(&_oop_cl);
-    }
-  }
-};
-
-class MarkStrongCodeRootsHRClosure: public HeapRegionClosure {
-  G1CollectedHeap* _g1h;
-  uint _worker_id;
-
-public:
-  MarkStrongCodeRootsHRClosure(G1CollectedHeap* g1h, uint worker_id) :
-    _g1h(g1h), _worker_id(worker_id) {}
-
-  bool doHeapRegion(HeapRegion *hr) {
-    HeapRegionRemSet* hrrs = hr->rem_set();
-    if (hr->continuesHumongous()) {
-      // Code roots should never be attached to a continuation of a humongous region
-      assert(hrrs->strong_code_roots_list_length() == 0,
-             err_msg("code roots should never be attached to continuations of humongous region "HR_FORMAT
-                     " starting at "HR_FORMAT", but has "SIZE_FORMAT,
-                     HR_FORMAT_PARAMS(hr), HR_FORMAT_PARAMS(hr->humongous_start_region()),
-                     hrrs->strong_code_roots_list_length()));
-      return false;
-    }
-
-    if (hr->in_collection_set()) {
-      // Don't mark code roots into regions in the collection set here.
-      // They will be marked when we scan them.
-      return false;
-    }
-
-    MarkStrongCodeRootCodeBlobClosure cb_cl(_g1h->concurrent_mark(), hr, _worker_id);
-    hr->strong_code_roots_do(&cb_cl);
-    return false;
-  }
-};
-
-void G1CollectedHeap::mark_strong_code_roots(uint worker_id) {
-  MarkStrongCodeRootsHRClosure cl(this, worker_id);
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    heap_region_par_iterate_chunked(&cl,
-                                    worker_id,
-                                    workers()->active_workers(),
-                                    HeapRegion::ParMarkRootClaimValue);
-  } else {
-    heap_region_iterate(&cl);
-  }
-}
-
 class RebuildStrongCodeRootClosure: public CodeBlobClosure {
   G1CollectedHeap* _g1h;
 
@@ -6949,7 +7274,7 @@
       return;
     }
 
-    if (ScavengeRootsInCode && nm->detect_scavenge_root_oops()) {
+    if (ScavengeRootsInCode) {
       _g1h->register_nmethod(nm);
     }
   }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -28,9 +28,9 @@
 #include "gc_implementation/g1/concurrentMark.hpp"
 #include "gc_implementation/g1/evacuationInfo.hpp"
 #include "gc_implementation/g1/g1AllocRegion.hpp"
+#include "gc_implementation/g1/g1BiasedArray.hpp"
 #include "gc_implementation/g1/g1HRPrinter.hpp"
 #include "gc_implementation/g1/g1MonitoringSupport.hpp"
-#include "gc_implementation/g1/g1RemSet.hpp"
 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc_implementation/g1/g1YCTypes.hpp"
 #include "gc_implementation/g1/heapRegionSeq.hpp"
@@ -200,6 +200,7 @@
 class RefineCardTableEntryClosure;
 
 class G1CollectedHeap : public SharedHeap {
+  friend class VM_CollectForMetadataAllocation;
   friend class VM_G1CollectForAllocation;
   friend class VM_G1CollectFull;
   friend class VM_G1IncCollectionPause;
@@ -209,7 +210,7 @@
   friend class OldGCAllocRegion;
 
   // Closures used in implementation.
-  template <G1Barrier barrier, bool do_mark_object>
+  template <G1Barrier barrier, G1Mark do_mark_object>
   friend class G1ParCopyClosure;
   friend class G1IsAliveClosure;
   friend class G1EvacuateFollowersClosure;
@@ -226,6 +227,7 @@
   friend class EvacPopObjClosure;
   friend class G1ParCleanupCTTask;
 
+  friend class G1FreeHumongousRegionClosure;
   // Other related classes.
   friend class G1MarkSweep;
 
@@ -256,6 +258,9 @@
   // It keeps track of the humongous regions.
   HeapRegionSet _humongous_set;
 
+  void clear_humongous_is_live_table();
+  void eagerly_reclaim_humongous_regions();
+
   // The number of regions we could create by expansion.
   uint _expansion_regions;
 
@@ -336,6 +341,9 @@
   // It initializes the GC alloc regions at the start of a GC.
   void init_gc_alloc_regions(EvacuationInfo& evacuation_info);
 
+  // Setup the retained old gc alloc region as the currrent old gc alloc region.
+  void use_retained_old_gc_alloc_region(EvacuationInfo& evacuation_info);
+
   // It releases the GC alloc regions at the end of a GC.
   void release_gc_alloc_regions(uint no_of_gc_workers, EvacuationInfo& evacuation_info);
 
@@ -353,26 +361,25 @@
   // than the current allocation region.
   size_t _summary_bytes_used;
 
-  // This is used for a quick test on whether a reference points into
-  // the collection set or not. Basically, we have an array, with one
-  // byte per region, and that byte denotes whether the corresponding
-  // region is in the collection set or not. The entry corresponding
-  // the bottom of the heap, i.e., region 0, is pointed to by
-  // _in_cset_fast_test_base.  The _in_cset_fast_test field has been
-  // biased so that it actually points to address 0 of the address
-  // space, to make the test as fast as possible (we can simply shift
-  // the address to address into it, instead of having to subtract the
-  // bottom of the heap from the address before shifting it; basically
-  // it works in the same way the card table works).
-  bool* _in_cset_fast_test;
+  // Records whether the region at the given index is kept live by roots or
+  // references from the young generation.
+  class HumongousIsLiveBiasedMappedArray : public G1BiasedMappedArray<bool> {
+   protected:
+    bool default_value() const { return false; }
+   public:
+    void clear() { G1BiasedMappedArray<bool>::clear(); }
+    void set_live(uint region) {
+      set_by_index(region, true);
+    }
+    bool is_live(uint region) {
+      return get_by_index(region);
+    }
+  };
 
-  // The allocated array used for the fast test on whether a reference
-  // points into the collection set or not. This field is also used to
-  // free the array.
-  bool* _in_cset_fast_test_base;
-
-  // The length of the _in_cset_fast_test_base array.
-  uint _in_cset_fast_test_length;
+  HumongousIsLiveBiasedMappedArray _humongous_is_live;
+  // Stores whether during humongous object registration we found candidate regions.
+  // If not, we can skip a few steps.
+  bool _has_humongous_reclaim_candidates;
 
   volatile unsigned _gc_time_stamp;
 
@@ -692,15 +699,24 @@
   virtual void gc_prologue(bool full);
   virtual void gc_epilogue(bool full);
 
+  inline void set_humongous_is_live(oop obj);
+
+  bool humongous_is_live(uint region) {
+    return _humongous_is_live.is_live(region);
+  }
+
+  // Returns whether the given region (which must be a humongous (start) region)
+  // is to be considered conservatively live regardless of any other conditions.
+  bool humongous_region_is_always_live(uint index);
+  // Register the given region to be part of the collection set.
+  inline void register_humongous_region_with_in_cset_fast_test(uint index);
+  // Register regions with humongous objects (actually on the start region) in
+  // the in_cset_fast_test table.
+  void register_humongous_regions_with_in_cset_fast_test();
   // We register a region with the fast "in collection set" test. We
   // simply set to true the array slot corresponding to this region.
   void register_region_with_in_cset_fast_test(HeapRegion* r) {
-    assert(_in_cset_fast_test_base != NULL, "sanity");
-    assert(r->in_collection_set(), "invariant");
-    uint index = r->hrs_index();
-    assert(index < _in_cset_fast_test_length, "invariant");
-    assert(!_in_cset_fast_test_base[index], "invariant");
-    _in_cset_fast_test_base[index] = true;
+    _in_cset_fast_test.set_in_cset(r->hrs_index());
   }
 
   // This is a fast test on whether a reference points into the
@@ -709,9 +725,7 @@
   inline bool in_cset_fast_test(oop obj);
 
   void clear_cset_fast_test() {
-    assert(_in_cset_fast_test_base != NULL, "sanity");
-    memset(_in_cset_fast_test_base, false,
-           (size_t) _in_cset_fast_test_length * sizeof(bool));
+    _in_cset_fast_test.clear();
   }
 
   // This is called at the start of either a concurrent cycle or a Full
@@ -840,17 +854,13 @@
   // param is for use with parallel roots processing, and should be
   // the "i" of the calling parallel worker thread's work(i) function.
   // In the sequential case this param will be ignored.
-  void g1_process_strong_roots(bool is_scavenging,
-                               ScanningOption so,
-                               OopClosure* scan_non_heap_roots,
-                               OopsInHeapRegionClosure* scan_rs,
-                               G1KlassScanClosure* scan_klasses,
-                               uint worker_i);
-
-  // Apply "blk" to all the weak roots of the system.  These include
-  // JNI weak roots, the code cache, system dictionary, symbol table,
-  // string table, and referents of reachable weak refs.
-  void g1_process_weak_roots(OopClosure* root_closure);
+  void g1_process_roots(OopClosure* scan_non_heap_roots,
+                        OopClosure* scan_non_heap_weak_roots,
+                        OopsInHeapRegionClosure* scan_rs,
+                        CLDClosure* scan_strong_clds,
+                        CLDClosure* scan_weak_clds,
+                        CodeBlobClosure* scan_strong_code,
+                        uint worker_i);
 
   // Notifies all the necessary spaces that the committed space has
   // been updated (either expanded or shrunk). It should be called
@@ -1043,7 +1053,7 @@
   // of G1CollectedHeap::_gc_time_stamp.
   unsigned int* _worker_cset_start_region_time_stamp;
 
-  enum G1H_process_strong_roots_tasks {
+  enum G1H_process_roots_tasks {
     G1H_PS_filter_satb_buffers,
     G1H_PS_refProcessor_oops_do,
     // Leave this one last.
@@ -1124,20 +1134,11 @@
     return _gc_time_stamp;
   }
 
-  void reset_gc_time_stamp() {
-    _gc_time_stamp = 0;
-    OrderAccess::fence();
-    // Clear the cached CSet starting regions and time stamps.
-    // Their validity is dependent on the GC timestamp.
-    clear_cset_start_regions();
-  }
+  inline void reset_gc_time_stamp();
 
   void check_gc_time_stamps() PRODUCT_RETURN;
 
-  void increment_gc_time_stamp() {
-    ++_gc_time_stamp;
-    OrderAccess::fence();
-  }
+  inline void increment_gc_time_stamp();
 
   // Reset the given region's GC timestamp. If it's starts humongous,
   // also reset the GC timestamp of its corresponding
@@ -1189,19 +1190,19 @@
   }
 
   // The total number of regions in the heap.
-  uint n_regions() { return _hrs.length(); }
+  uint n_regions() const { return _hrs.length(); }
 
   // The max number of regions in the heap.
-  uint max_regions() { return _hrs.max_length(); }
+  uint max_regions() const { return _hrs.max_length(); }
 
   // The number of regions that are completely free.
-  uint free_regions() { return _free_list.length(); }
+  uint free_regions() const { return _free_list.length(); }
 
   // The number of regions that are not completely free.
-  uint used_regions() { return n_regions() - free_regions(); }
+  uint used_regions() const { return n_regions() - free_regions(); }
 
   // The number of regions available for "regular" expansion.
-  uint expansion_regions() { return _expansion_regions; }
+  uint expansion_regions() const { return _expansion_regions; }
 
   // Factory method for HeapRegion instances. It will return NULL if
   // the allocation fails.
@@ -1212,6 +1213,30 @@
   void verify_dirty_young_list(HeapRegion* head) PRODUCT_RETURN;
   void verify_dirty_young_regions() PRODUCT_RETURN;
 
+#ifndef PRODUCT
+  // Make sure that the given bitmap has no marked objects in the
+  // range [from,limit). If it does, print an error message and return
+  // false. Otherwise, just return true. bitmap_name should be "prev"
+  // or "next".
+  bool verify_no_bits_over_tams(const char* bitmap_name, CMBitMapRO* bitmap,
+                                HeapWord* from, HeapWord* limit);
+
+  // Verify that the prev / next bitmap range [tams,end) for the given
+  // region has no marks. Return true if all is well, false if errors
+  // are detected.
+  bool verify_bitmaps(const char* caller, HeapRegion* hr);
+#endif // PRODUCT
+
+  // If G1VerifyBitmaps is set, verify that the marking bitmaps for
+  // the given region do not have any spurious marks. If errors are
+  // detected, print appropriate error messages and crash.
+  void check_bitmaps(const char* caller, HeapRegion* hr) PRODUCT_RETURN;
+
+  // If G1VerifyBitmaps is set, verify that the marking bitmaps do not
+  // have any spurious marks. If errors are detected, print
+  // appropriate error messages and crash.
+  void check_bitmaps(const char* caller) PRODUCT_RETURN;
+
   // verify_region_sets() performs verification over the region
   // lists. It will be compiled in the product code to be used when
   // necessary (i.e., during heap verification).
@@ -1290,9 +1315,61 @@
   virtual bool is_in(const void* p) const;
 
   // Return "TRUE" iff the given object address is within the collection
-  // set.
+  // set. Slow implementation.
   inline bool obj_in_cs(oop obj);
 
+  inline bool is_in_cset(oop obj);
+
+  inline bool is_in_cset_or_humongous(const oop obj);
+
+  enum in_cset_state_t {
+   InNeither,           // neither in collection set nor humongous
+   InCSet,              // region is in collection set only
+   IsHumongous          // region is a humongous start region
+  };
+ private:
+  // Instances of this class are used for quick tests on whether a reference points
+  // into the collection set or is a humongous object (points into a humongous
+  // object).
+  // Each of the array's elements denotes whether the corresponding region is in
+  // the collection set or a humongous region.
+  // We use this to quickly reclaim humongous objects: by making a humongous region
+  // succeed this test, we sort-of add it to the collection set. During the reference
+  // iteration closures, when we see a humongous region, we simply mark it as
+  // referenced, i.e. live.
+  class G1FastCSetBiasedMappedArray : public G1BiasedMappedArray<char> {
+   protected:
+    char default_value() const { return G1CollectedHeap::InNeither; }
+   public:
+    void set_humongous(uintptr_t index) {
+      assert(get_by_index(index) != InCSet, "Should not overwrite InCSet values");
+      set_by_index(index, G1CollectedHeap::IsHumongous);
+    }
+
+    void clear_humongous(uintptr_t index) {
+      set_by_index(index, G1CollectedHeap::InNeither);
+    }
+
+    void set_in_cset(uintptr_t index) {
+      assert(get_by_index(index) != G1CollectedHeap::IsHumongous, "Should not overwrite IsHumongous value");
+      set_by_index(index, G1CollectedHeap::InCSet);
+    }
+
+    bool is_in_cset_or_humongous(HeapWord* addr) const { return get_by_address(addr) != G1CollectedHeap::InNeither; }
+    bool is_in_cset(HeapWord* addr) const { return get_by_address(addr) == G1CollectedHeap::InCSet; }
+    G1CollectedHeap::in_cset_state_t at(HeapWord* addr) const { return (G1CollectedHeap::in_cset_state_t)get_by_address(addr); }
+    void clear() { G1BiasedMappedArray<char>::clear(); }
+  };
+
+  // This array is used for a quick test on whether a reference points into
+  // the collection set or not. Each of the array's elements denotes whether the
+  // corresponding region is in the collection set or not.
+  G1FastCSetBiasedMappedArray _in_cset_fast_test;
+
+ public:
+
+  inline in_cset_state_t in_cset_state(const oop obj);
+
   // Return "TRUE" iff the given object address is in the reserved
   // region of g1.
   bool is_in_g1_reserved(const void* p) const {
@@ -1327,9 +1404,6 @@
   // "cl.do_oop" on each.
   virtual void oop_iterate(ExtendedOopClosure* cl);
 
-  // Same as above, restricted to a memory region.
-  void oop_iterate(MemRegion mr, ExtendedOopClosure* cl);
-
   // Iterate over all objects, calling "cl.do_object" on each.
   virtual void object_iterate(ObjectClosure* cl);
 
@@ -1347,6 +1421,10 @@
   // Return the region with the given index. It assumes the index is valid.
   inline HeapRegion* region_at(uint index) const;
 
+  // Calculate the region index of the given address. Given address must be
+  // within the heap.
+  inline uint addr_to_region(HeapWord* addr) const;
+
   // Divide the heap region sequence into "chunks" of some size (the number
   // of regions divided by the number of parallel threads times some
   // overpartition factor, currently 4).  Assumes that this will be called
@@ -1399,8 +1477,7 @@
   // As above but starting from region r
   void collection_set_iterate_from(HeapRegion* r, HeapRegionClosure *blk);
 
-  // Returns the first (lowest address) compactible space in the heap.
-  virtual CompactibleSpace* first_compactible_space();
+  HeapRegion* next_compaction_region(const HeapRegion* from) const;
 
   // A CollectedHeap will contain some number of spaces.  This finds the
   // space containing a given address, or else returns NULL.
@@ -1623,10 +1700,6 @@
   // Free up superfluous code root memory.
   void purge_code_root_memory();
 
-  // During an initial mark pause, mark all the code roots that
-  // point into regions *not* in the collection set.
-  void mark_strong_code_roots(uint worker_id);
-
   // Rebuild the stong code root lists for each region
   // after a full GC
   void rebuild_strong_code_roots();
@@ -1635,6 +1708,9 @@
   // in symbol table, possibly in parallel.
   void unlink_string_and_symbol_table(BoolObjectClosure* is_alive, bool unlink_strings = true, bool unlink_symbols = true);
 
+  // Parallel phase of unloading/cleaning after G1 concurrent mark.
+  void parallel_cleaning(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols, bool class_unloading_occurred);
+
   // Redirty logged cards in the refinement queue.
   void redirty_logged_cards();
   // Verification
@@ -1712,268 +1788,22 @@
 
 public:
   G1ParGCAllocBuffer(size_t gclab_word_size);
+  virtual ~G1ParGCAllocBuffer() {
+    guarantee(_retired, "Allocation buffer has not been retired");
+  }
 
-  void set_buf(HeapWord* buf) {
+  virtual void set_buf(HeapWord* buf) {
     ParGCAllocBuffer::set_buf(buf);
     _retired = false;
   }
 
-  void retire(bool end_of_gc, bool retain) {
-    if (_retired)
+  virtual void retire(bool end_of_gc, bool retain) {
+    if (_retired) {
       return;
+    }
     ParGCAllocBuffer::retire(end_of_gc, retain);
     _retired = true;
   }
 };
 
-class G1ParScanThreadState : public StackObj {
-protected:
-  G1CollectedHeap* _g1h;
-  RefToScanQueue*  _refs;
-  DirtyCardQueue   _dcq;
-  G1SATBCardTableModRefBS* _ct_bs;
-  G1RemSet* _g1_rem;
-
-  G1ParGCAllocBuffer  _surviving_alloc_buffer;
-  G1ParGCAllocBuffer  _tenured_alloc_buffer;
-  G1ParGCAllocBuffer* _alloc_buffers[GCAllocPurposeCount];
-  ageTable            _age_table;
-
-  G1ParScanClosure    _scanner;
-
-  size_t           _alloc_buffer_waste;
-  size_t           _undo_waste;
-
-  OopsInHeapRegionClosure*      _evac_failure_cl;
-
-  int  _hash_seed;
-  uint _queue_num;
-
-  size_t _term_attempts;
-
-  double _start;
-  double _start_strong_roots;
-  double _strong_roots_time;
-  double _start_term;
-  double _term_time;
-
-  // Map from young-age-index (0 == not young, 1 is youngest) to
-  // surviving words. base is what we get back from the malloc call
-  size_t* _surviving_young_words_base;
-  // this points into the array, as we use the first few entries for padding
-  size_t* _surviving_young_words;
-
-#define PADDING_ELEM_NUM (DEFAULT_CACHE_LINE_SIZE / sizeof(size_t))
-
-  void   add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; }
-
-  void   add_to_undo_waste(size_t waste)         { _undo_waste += waste; }
-
-  DirtyCardQueue& dirty_card_queue()             { return _dcq;  }
-  G1SATBCardTableModRefBS* ctbs()                { return _ct_bs; }
-
-  template <class T> inline void immediate_rs_update(HeapRegion* from, T* p, int tid);
-
-  template <class T> void deferred_rs_update(HeapRegion* from, T* p, int tid) {
-    // If the new value of the field points to the same region or
-    // is the to-space, we don't need to include it in the Rset updates.
-    if (!from->is_in_reserved(oopDesc::load_decode_heap_oop(p)) && !from->is_survivor()) {
-      size_t card_index = ctbs()->index_for(p);
-      // If the card hasn't been added to the buffer, do it.
-      if (ctbs()->mark_card_deferred(card_index)) {
-        dirty_card_queue().enqueue((jbyte*)ctbs()->byte_for_index(card_index));
-      }
-    }
-  }
-
-public:
-  G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num, ReferenceProcessor* rp);
-
-  ~G1ParScanThreadState() {
-    FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base, mtGC);
-  }
-
-  RefToScanQueue*   refs()            { return _refs;             }
-  ageTable*         age_table()       { return &_age_table;       }
-
-  G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) {
-    return _alloc_buffers[purpose];
-  }
-
-  size_t alloc_buffer_waste() const              { return _alloc_buffer_waste; }
-  size_t undo_waste() const                      { return _undo_waste; }
-
-#ifdef ASSERT
-  bool verify_ref(narrowOop* ref) const;
-  bool verify_ref(oop* ref) const;
-  bool verify_task(StarTask ref) const;
-#endif // ASSERT
-
-  template <class T> void push_on_queue(T* ref) {
-    assert(verify_ref(ref), "sanity");
-    refs()->push(ref);
-  }
-
-  template <class T> inline void update_rs(HeapRegion* from, T* p, int tid);
-
-  HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz) {
-    HeapWord* obj = NULL;
-    size_t gclab_word_size = _g1h->desired_plab_sz(purpose);
-    if (word_sz * 100 < gclab_word_size * ParallelGCBufferWastePct) {
-      G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose);
-      add_to_alloc_buffer_waste(alloc_buf->words_remaining());
-      alloc_buf->retire(false /* end_of_gc */, false /* retain */);
-
-      HeapWord* buf = _g1h->par_allocate_during_gc(purpose, gclab_word_size);
-      if (buf == NULL) return NULL; // Let caller handle allocation failure.
-      // Otherwise.
-      alloc_buf->set_word_size(gclab_word_size);
-      alloc_buf->set_buf(buf);
-
-      obj = alloc_buf->allocate(word_sz);
-      assert(obj != NULL, "buffer was definitely big enough...");
-    } else {
-      obj = _g1h->par_allocate_during_gc(purpose, word_sz);
-    }
-    return obj;
-  }
-
-  HeapWord* allocate(GCAllocPurpose purpose, size_t word_sz) {
-    HeapWord* obj = alloc_buffer(purpose)->allocate(word_sz);
-    if (obj != NULL) return obj;
-    return allocate_slow(purpose, word_sz);
-  }
-
-  void undo_allocation(GCAllocPurpose purpose, HeapWord* obj, size_t word_sz) {
-    if (alloc_buffer(purpose)->contains(obj)) {
-      assert(alloc_buffer(purpose)->contains(obj + word_sz - 1),
-             "should contain whole object");
-      alloc_buffer(purpose)->undo_allocation(obj, word_sz);
-    } else {
-      CollectedHeap::fill_with_object(obj, word_sz);
-      add_to_undo_waste(word_sz);
-    }
-  }
-
-  void set_evac_failure_closure(OopsInHeapRegionClosure* evac_failure_cl) {
-    _evac_failure_cl = evac_failure_cl;
-  }
-  OopsInHeapRegionClosure* evac_failure_closure() {
-    return _evac_failure_cl;
-  }
-
-  int* hash_seed() { return &_hash_seed; }
-  uint queue_num() { return _queue_num; }
-
-  size_t term_attempts() const  { return _term_attempts; }
-  void note_term_attempt() { _term_attempts++; }
-
-  void start_strong_roots() {
-    _start_strong_roots = os::elapsedTime();
-  }
-  void end_strong_roots() {
-    _strong_roots_time += (os::elapsedTime() - _start_strong_roots);
-  }
-  double strong_roots_time() const { return _strong_roots_time; }
-
-  void start_term_time() {
-    note_term_attempt();
-    _start_term = os::elapsedTime();
-  }
-  void end_term_time() {
-    _term_time += (os::elapsedTime() - _start_term);
-  }
-  double term_time() const { return _term_time; }
-
-  double elapsed_time() const {
-    return os::elapsedTime() - _start;
-  }
-
-  static void
-    print_termination_stats_hdr(outputStream* const st = gclog_or_tty);
-  void
-    print_termination_stats(int i, outputStream* const st = gclog_or_tty) const;
-
-  size_t* surviving_young_words() {
-    // We add on to hide entry 0 which accumulates surviving words for
-    // age -1 regions (i.e. non-young ones)
-    return _surviving_young_words;
-  }
-
-  void retire_alloc_buffers() {
-    for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
-      size_t waste = _alloc_buffers[ap]->words_remaining();
-      add_to_alloc_buffer_waste(waste);
-      _alloc_buffers[ap]->flush_stats_and_retire(_g1h->stats_for_purpose((GCAllocPurpose)ap),
-                                                 true /* end_of_gc */,
-                                                 false /* retain */);
-    }
-  }
-private:
-  #define G1_PARTIAL_ARRAY_MASK 0x2
-
-  inline bool has_partial_array_mask(oop* ref) const {
-    return ((uintptr_t)ref & G1_PARTIAL_ARRAY_MASK) == G1_PARTIAL_ARRAY_MASK;
-  }
-
-  // We never encode partial array oops as narrowOop*, so return false immediately.
-  // This allows the compiler to create optimized code when popping references from
-  // the work queue.
-  inline bool has_partial_array_mask(narrowOop* ref) const {
-    assert(((uintptr_t)ref & G1_PARTIAL_ARRAY_MASK) != G1_PARTIAL_ARRAY_MASK, "Partial array oop reference encoded as narrowOop*");
-    return false;
-  }
-
-  // Only implement set_partial_array_mask() for regular oops, not for narrowOops.
-  // We always encode partial arrays as regular oop, to allow the
-  // specialization for has_partial_array_mask() for narrowOops above.
-  // This means that unintentional use of this method with narrowOops are caught
-  // by the compiler.
-  inline oop* set_partial_array_mask(oop obj) const {
-    assert(((uintptr_t)(void *)obj & G1_PARTIAL_ARRAY_MASK) == 0, "Information loss!");
-    return (oop*) ((uintptr_t)(void *)obj | G1_PARTIAL_ARRAY_MASK);
-  }
-
-  inline oop clear_partial_array_mask(oop* ref) const {
-    return cast_to_oop((intptr_t)ref & ~G1_PARTIAL_ARRAY_MASK);
-  }
-
-  inline void do_oop_partial_array(oop* p);
-
-  // This method is applied to the fields of the objects that have just been copied.
-  template <class T> void do_oop_evac(T* p, HeapRegion* from) {
-    assert(!oopDesc::is_null(oopDesc::load_decode_heap_oop(p)),
-           "Reference should not be NULL here as such are never pushed to the task queue.");
-    oop obj = oopDesc::load_decode_heap_oop_not_null(p);
-
-    // Although we never intentionally push references outside of the collection
-    // set, due to (benign) races in the claim mechanism during RSet scanning more
-    // than one thread might claim the same card. So the same card may be
-    // processed multiple times. So redo this check.
-    if (_g1h->in_cset_fast_test(obj)) {
-      oop forwardee;
-      if (obj->is_forwarded()) {
-        forwardee = obj->forwardee();
-      } else {
-        forwardee = copy_to_survivor_space(obj);
-      }
-      assert(forwardee != NULL, "forwardee should not be NULL");
-      oopDesc::encode_store_heap_oop(p, forwardee);
-    }
-
-    assert(obj != NULL, "Must be");
-    update_rs(from, p, queue_num());
-  }
-public:
-
-  oop copy_to_survivor_space(oop const obj);
-
-  template <class T> inline void deal_with_reference(T* ref_to_scan);
-
-  inline void deal_with_reference(StarTask ref);
-
-public:
-  void trim_queue();
-};
-
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1COLLECTEDHEAP_HPP
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -29,10 +29,10 @@
 #include "gc_implementation/g1/g1CollectedHeap.hpp"
 #include "gc_implementation/g1/g1AllocRegion.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
-#include "gc_implementation/g1/g1RemSet.inline.hpp"
 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc_implementation/g1/heapRegionSet.inline.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "utilities/taskqueue.hpp"
 
 // Inline functions for G1CollectedHeap
@@ -40,6 +40,13 @@
 // Return the region with the given index. It assumes the index is valid.
 inline HeapRegion* G1CollectedHeap::region_at(uint index) const { return _hrs.at(index); }
 
+inline uint G1CollectedHeap::addr_to_region(HeapWord* addr) const {
+  assert(is_in_reserved(addr),
+         err_msg("Cannot calculate region index for address "PTR_FORMAT" that is outside of the heap ["PTR_FORMAT", "PTR_FORMAT")",
+                 p2i(addr), p2i(_reserved.start()), p2i(_reserved.end())));
+  return (uint)(pointer_delta(addr, _reserved.start(), sizeof(uint8_t)) >> HeapRegion::LogOfHRGrainBytes);
+}
+
 template <class T>
 inline HeapRegion*
 G1CollectedHeap::heap_region_containing(const T addr) const {
@@ -59,6 +66,19 @@
   return res;
 }
 
+inline void G1CollectedHeap::reset_gc_time_stamp() {
+  _gc_time_stamp = 0;
+  OrderAccess::fence();
+  // Clear the cached CSet starting regions and time stamps.
+  // Their validity is dependent on the GC timestamp.
+  clear_cset_start_regions();
+}
+
+inline void G1CollectedHeap::increment_gc_time_stamp() {
+  ++_gc_time_stamp;
+  OrderAccess::fence();
+}
+
 inline void G1CollectedHeap::old_set_remove(HeapRegion* hr) {
   _old_set.remove(hr);
 }
@@ -159,17 +179,11 @@
   return _cm->nextMarkBitMap()->isMarked((HeapWord *)obj);
 }
 
-
 // This is a fast test on whether a reference points into the
 // collection set or not. Assume that the reference
 // points into the heap.
-inline bool G1CollectedHeap::in_cset_fast_test(oop obj) {
-  assert(_in_cset_fast_test != NULL, "sanity");
-  assert(_g1_committed.contains((HeapWord*) obj), err_msg("Given reference outside of heap, is "PTR_FORMAT, p2i((HeapWord*)obj)));
-  // no need to subtract the bottom of the heap from obj,
-  // _in_cset_fast_test is biased
-  uintx index = cast_from_oop<uintx>(obj) >> HeapRegion::LogOfHRGrainBytes;
-  bool ret = _in_cset_fast_test[index];
+inline bool G1CollectedHeap::is_in_cset(oop obj) {
+  bool ret = _in_cset_fast_test.is_in_cset((HeapWord*)obj);
   // let's make sure the result is consistent with what the slower
   // test returns
   assert( ret || !obj_in_cs(obj), "sanity");
@@ -177,6 +191,18 @@
   return ret;
 }
 
+bool G1CollectedHeap::is_in_cset_or_humongous(const oop obj) {
+  return _in_cset_fast_test.is_in_cset_or_humongous((HeapWord*)obj);
+}
+
+G1CollectedHeap::in_cset_state_t G1CollectedHeap::in_cset_state(const oop obj) {
+  return _in_cset_fast_test.at((HeapWord*)obj);
+}
+
+void G1CollectedHeap::register_humongous_region_with_in_cset_fast_test(uint index) {
+  _in_cset_fast_test.set_humongous(index);
+}
+
 #ifndef PRODUCT
 // Support for G1EvacuationFailureALot
 
@@ -282,88 +308,21 @@
   else return is_obj_ill(obj, hr);
 }
 
-template <class T> inline void G1ParScanThreadState::immediate_rs_update(HeapRegion* from, T* p, int tid) {
-  if (!from->is_survivor()) {
-    _g1_rem->par_write_ref(from, p, tid);
-  }
-}
-
-template <class T> void G1ParScanThreadState::update_rs(HeapRegion* from, T* p, int tid) {
-  if (G1DeferredRSUpdate) {
-    deferred_rs_update(from, p, tid);
-  } else {
-    immediate_rs_update(from, p, tid);
-  }
-}
-
-
-inline void G1ParScanThreadState::do_oop_partial_array(oop* p) {
-  assert(has_partial_array_mask(p), "invariant");
-  oop from_obj = clear_partial_array_mask(p);
-
-  assert(Universe::heap()->is_in_reserved(from_obj), "must be in heap.");
-  assert(from_obj->is_objArray(), "must be obj array");
-  objArrayOop from_obj_array = objArrayOop(from_obj);
-  // The from-space object contains the real length.
-  int length                 = from_obj_array->length();
-
-  assert(from_obj->is_forwarded(), "must be forwarded");
-  oop to_obj                 = from_obj->forwardee();
-  assert(from_obj != to_obj, "should not be chunking self-forwarded objects");
-  objArrayOop to_obj_array   = objArrayOop(to_obj);
-  // We keep track of the next start index in the length field of the
-  // to-space object.
-  int next_index             = to_obj_array->length();
-  assert(0 <= next_index && next_index < length,
-         err_msg("invariant, next index: %d, length: %d", next_index, length));
-
-  int start                  = next_index;
-  int end                    = length;
-  int remainder              = end - start;
-  // We'll try not to push a range that's smaller than ParGCArrayScanChunk.
-  if (remainder > 2 * ParGCArrayScanChunk) {
-    end = start + ParGCArrayScanChunk;
-    to_obj_array->set_length(end);
-    // Push the remainder before we process the range in case another
-    // worker has run out of things to do and can steal it.
-    oop* from_obj_p = set_partial_array_mask(from_obj);
-    push_on_queue(from_obj_p);
-  } else {
-    assert(length == end, "sanity");
-    // We'll process the final range for this object. Restore the length
-    // so that the heap remains parsable in case of evacuation failure.
-    to_obj_array->set_length(end);
-  }
-  _scanner.set_region(_g1h->heap_region_containing_raw(to_obj));
-  // Process indexes [start,end). It will also process the header
-  // along with the first chunk (i.e., the chunk with start == 0).
-  // Note that at this point the length field of to_obj_array is not
-  // correct given that we are using it to keep track of the next
-  // start index. oop_iterate_range() (thankfully!) ignores the length
-  // field and only relies on the start / end parameters.  It does
-  // however return the size of the object which will be incorrect. So
-  // we have to ignore it even if we wanted to use it.
-  to_obj_array->oop_iterate_range(&_scanner, start, end);
-}
-
-template <class T> inline void G1ParScanThreadState::deal_with_reference(T* ref_to_scan) {
-  if (!has_partial_array_mask(ref_to_scan)) {
-    // Note: we can use "raw" versions of "region_containing" because
-    // "obj_to_scan" is definitely in the heap, and is not in a
-    // humongous region.
-    HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan);
-    do_oop_evac(ref_to_scan, r);
-  } else {
-    do_oop_partial_array((oop*)ref_to_scan);
-  }
-}
-
-inline void G1ParScanThreadState::deal_with_reference(StarTask ref) {
-  assert(verify_task(ref), "sanity");
-  if (ref.is_narrow()) {
-    deal_with_reference((narrowOop*)ref);
-  } else {
-    deal_with_reference((oop*)ref);
+inline void G1CollectedHeap::set_humongous_is_live(oop obj) {
+  uint region = addr_to_region((HeapWord*)obj);
+  // We not only set the "live" flag in the humongous_is_live table, but also
+  // reset the entry in the _in_cset_fast_test table so that subsequent references
+  // to the same humongous object do not go into the slow path again.
+  // This is racy, as multiple threads may at the same time enter here, but this
+  // is benign.
+  // During collection we only ever set the "live" flag, and only ever clear the
+  // entry in the in_cset_fast_table.
+  // We only ever evaluate the contents of these tables (in the VM thread) after
+  // having synchronized the worker threads with the VM thread, or in the same
+  // thread (i.e. within the VM thread).
+  if (!_humongous_is_live.is_live(region)) {
+    _humongous_is_live.set_live(region);
+    _in_cset_fast_test.clear_humongous(region);
   }
 }
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1046,7 +1046,7 @@
 
   bool new_in_marking_window = _in_marking_window;
   bool new_in_marking_window_im = false;
-  if (during_initial_mark_pause()) {
+  if (last_pause_included_initial_mark) {
     new_in_marking_window = true;
     new_in_marking_window_im = true;
   }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1EvacFailure.hpp
--- a/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -71,6 +71,9 @@
   bool _during_initial_mark;
   bool _during_conc_mark;
   uint _worker_id;
+  HeapWord* _end_of_last_gap;
+  HeapWord* _last_gap_threshold;
+  HeapWord* _last_obj_threshold;
 
 public:
   RemoveSelfForwardPtrObjClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
@@ -83,7 +86,10 @@
     _update_rset_cl(update_rset_cl),
     _during_initial_mark(during_initial_mark),
     _during_conc_mark(during_conc_mark),
-    _worker_id(worker_id) { }
+    _worker_id(worker_id),
+    _end_of_last_gap(hr->bottom()),
+    _last_gap_threshold(hr->bottom()),
+    _last_obj_threshold(hr->bottom()) { }
 
   size_t marked_bytes() { return _marked_bytes; }
 
@@ -107,7 +113,12 @@
     HeapWord* obj_addr = (HeapWord*) obj;
     assert(_hr->is_in(obj_addr), "sanity");
     size_t obj_size = obj->size();
-    _hr->update_bot_for_object(obj_addr, obj_size);
+    HeapWord* obj_end = obj_addr + obj_size;
+
+    if (_end_of_last_gap != obj_addr) {
+      // there was a gap before obj_addr
+      _last_gap_threshold = _hr->cross_threshold(_end_of_last_gap, obj_addr);
+    }
 
     if (obj->is_forwarded() && obj->forwardee() == obj) {
       // The object failed to move.
@@ -115,7 +126,9 @@
       // We consider all objects that we find self-forwarded to be
       // live. What we'll do is that we'll update the prev marking
       // info so that they are all under PTAMS and explicitly marked.
-      _cm->markPrev(obj);
+      if (!_cm->isPrevMarked(obj)) {
+        _cm->markPrev(obj);
+      }
       if (_during_initial_mark) {
         // For the next marking info we'll only mark the
         // self-forwarded objects explicitly if we are during
@@ -145,13 +158,18 @@
       // remembered set entries missing given that we skipped cards on
       // the collection set. So, we'll recreate such entries now.
       obj->oop_iterate(_update_rset_cl);
-      assert(_cm->isPrevMarked(obj), "Should be marked!");
     } else {
+
       // The object has been either evacuated or is dead. Fill it with a
       // dummy object.
-      MemRegion mr((HeapWord*) obj, obj_size);
+      MemRegion mr(obj_addr, obj_size);
       CollectedHeap::fill_with_object(mr);
+
+      // must nuke all dead objects which we skipped when iterating over the region
+      _cm->clearRangePrevBitmap(MemRegion(_end_of_last_gap, obj_end));
     }
+    _end_of_last_gap = obj_end;
+    _last_obj_threshold = _hr->cross_threshold(obj_addr, obj_end);
   }
 };
 
@@ -182,15 +200,9 @@
                                             during_conc_mark,
                                             _worker_id);
 
-        MemRegion mr(hr->bottom(), hr->end());
-        // We'll recreate the prev marking info so we'll first clear
-        // the prev bitmap range for this region. We never mark any
-        // CSet objects explicitly so the next bitmap range should be
-        // cleared anyway.
-        _cm->clearRangePrevBitmap(mr);
-
         hr->note_self_forwarding_removal_start(during_initial_mark,
                                                during_conc_mark);
+        _g1h->check_bitmaps("Self-Forwarding Ptr Removal", hr);
 
         // In the common case (i.e. when there is no evacuation
         // failure) we make sure that the following is done when
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -166,13 +166,14 @@
   _last_update_rs_processed_buffers(_max_gc_threads, "%d"),
   _last_scan_rs_times_ms(_max_gc_threads, "%.1lf"),
   _last_strong_code_root_scan_times_ms(_max_gc_threads, "%.1lf"),
-  _last_strong_code_root_mark_times_ms(_max_gc_threads, "%.1lf"),
   _last_obj_copy_times_ms(_max_gc_threads, "%.1lf"),
   _last_termination_times_ms(_max_gc_threads, "%.1lf"),
   _last_termination_attempts(_max_gc_threads, SIZE_FORMAT),
   _last_gc_worker_end_times_ms(_max_gc_threads, "%.1lf", false),
   _last_gc_worker_times_ms(_max_gc_threads, "%.1lf"),
   _last_gc_worker_other_times_ms(_max_gc_threads, "%.1lf"),
+  _last_redirty_logged_cards_time_ms(_max_gc_threads, "%.1lf"),
+  _last_redirty_logged_cards_processed_cards(_max_gc_threads, SIZE_FORMAT),
   _cur_string_dedup_queue_fixup_worker_times_ms(_max_gc_threads, "%.1lf"),
   _cur_string_dedup_table_fixup_worker_times_ms(_max_gc_threads, "%.1lf")
 {
@@ -191,13 +192,16 @@
   _last_update_rs_processed_buffers.reset();
   _last_scan_rs_times_ms.reset();
   _last_strong_code_root_scan_times_ms.reset();
-  _last_strong_code_root_mark_times_ms.reset();
   _last_obj_copy_times_ms.reset();
   _last_termination_times_ms.reset();
   _last_termination_attempts.reset();
   _last_gc_worker_end_times_ms.reset();
   _last_gc_worker_times_ms.reset();
   _last_gc_worker_other_times_ms.reset();
+
+  _last_redirty_logged_cards_time_ms.reset();
+  _last_redirty_logged_cards_processed_cards.reset();
+
 }
 
 void G1GCPhaseTimes::note_gc_end() {
@@ -208,7 +212,6 @@
   _last_update_rs_processed_buffers.verify();
   _last_scan_rs_times_ms.verify();
   _last_strong_code_root_scan_times_ms.verify();
-  _last_strong_code_root_mark_times_ms.verify();
   _last_obj_copy_times_ms.verify();
   _last_termination_times_ms.verify();
   _last_termination_attempts.verify();
@@ -223,7 +226,6 @@
                                _last_update_rs_times_ms.get(i) +
                                _last_scan_rs_times_ms.get(i) +
                                _last_strong_code_root_scan_times_ms.get(i) +
-                               _last_strong_code_root_mark_times_ms.get(i) +
                                _last_obj_copy_times_ms.get(i) +
                                _last_termination_times_ms.get(i);
 
@@ -233,6 +235,11 @@
 
   _last_gc_worker_times_ms.verify();
   _last_gc_worker_other_times_ms.verify();
+
+  if (G1DeferredRSUpdate) {
+    _last_redirty_logged_cards_time_ms.verify();
+    _last_redirty_logged_cards_processed_cards.verify();
+  }
 }
 
 void G1GCPhaseTimes::note_string_dedup_fixup_start() {
@@ -249,6 +256,10 @@
   LineBuffer(level).append_and_print_cr("[%s: %.1lf ms]", str, value);
 }
 
+void G1GCPhaseTimes::print_stats(int level, const char* str, size_t value) {
+  LineBuffer(level).append_and_print_cr("[%s: "SIZE_FORMAT"]", str, value);
+}
+
 void G1GCPhaseTimes::print_stats(int level, const char* str, double value, uint workers) {
   LineBuffer(level).append_and_print_cr("[%s: %.1lf ms, GC Workers: " UINT32_FORMAT "]", str, value, workers);
 }
@@ -292,9 +303,6 @@
     if (_last_satb_filtering_times_ms.sum() > 0.0) {
       _last_satb_filtering_times_ms.print(2, "SATB Filtering (ms)");
     }
-    if (_last_strong_code_root_mark_times_ms.sum() > 0.0) {
-     _last_strong_code_root_mark_times_ms.print(2, "Code Root Marking (ms)");
-    }
     _last_update_rs_times_ms.print(2, "Update RS (ms)");
       _last_update_rs_processed_buffers.print(3, "Processed Buffers");
     _last_scan_rs_times_ms.print(2, "Scan RS (ms)");
@@ -312,9 +320,6 @@
     if (_last_satb_filtering_times_ms.sum() > 0.0) {
       _last_satb_filtering_times_ms.print(1, "SATB Filtering (ms)");
     }
-    if (_last_strong_code_root_mark_times_ms.sum() > 0.0) {
-      _last_strong_code_root_mark_times_ms.print(1, "Code Root Marking (ms)");
-    }
     _last_update_rs_times_ms.print(1, "Update RS (ms)");
       _last_update_rs_processed_buffers.print(2, "Processed Buffers");
     _last_scan_rs_times_ms.print(1, "Scan RS (ms)");
@@ -352,6 +357,18 @@
   print_stats(2, "Ref Enq", _cur_ref_enq_time_ms);
   if (G1DeferredRSUpdate) {
     print_stats(2, "Redirty Cards", _recorded_redirty_logged_cards_time_ms);
+    if (G1Log::finest()) {
+      _last_redirty_logged_cards_time_ms.print(3, "Parallel Redirty");
+      _last_redirty_logged_cards_processed_cards.print(3, "Redirtied Cards");
+    }
+  }
+  if (G1ReclaimDeadHumongousObjectsAtYoungGC) {
+    print_stats(2, "Humongous Reclaim", _cur_fast_reclaim_humongous_time_ms);
+    if (G1Log::finest()) {
+      print_stats(3, "Humongous Total", _cur_fast_reclaim_humongous_total);
+      print_stats(3, "Humongous Candidate", _cur_fast_reclaim_humongous_candidates);
+      print_stats(3, "Humongous Reclaimed", _cur_fast_reclaim_humongous_reclaimed);
+    }
   }
   print_stats(2, "Free CSet",
     (_recorded_young_free_cset_time_ms +
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -120,7 +120,6 @@
   WorkerDataArray<int>    _last_update_rs_processed_buffers;
   WorkerDataArray<double> _last_scan_rs_times_ms;
   WorkerDataArray<double> _last_strong_code_root_scan_times_ms;
-  WorkerDataArray<double> _last_strong_code_root_mark_times_ms;
   WorkerDataArray<double> _last_obj_copy_times_ms;
   WorkerDataArray<double> _last_termination_times_ms;
   WorkerDataArray<size_t> _last_termination_attempts;
@@ -151,16 +150,24 @@
   double _recorded_young_cset_choice_time_ms;
   double _recorded_non_young_cset_choice_time_ms;
 
+  WorkerDataArray<double> _last_redirty_logged_cards_time_ms;
+  WorkerDataArray<size_t> _last_redirty_logged_cards_processed_cards;
   double _recorded_redirty_logged_cards_time_ms;
 
   double _recorded_young_free_cset_time_ms;
   double _recorded_non_young_free_cset_time_ms;
 
+  double _cur_fast_reclaim_humongous_time_ms;
+  size_t _cur_fast_reclaim_humongous_total;
+  size_t _cur_fast_reclaim_humongous_candidates;
+  size_t _cur_fast_reclaim_humongous_reclaimed;
+
   double _cur_verify_before_time_ms;
   double _cur_verify_after_time_ms;
 
   // Helper methods for detailed logging
   void print_stats(int level, const char* str, double value);
+  void print_stats(int level, const char* str, size_t value);
   void print_stats(int level, const char* str, double value, uint workers);
 
  public:
@@ -197,10 +204,6 @@
     _last_strong_code_root_scan_times_ms.set(worker_i, ms);
   }
 
-  void record_strong_code_root_mark_time(uint worker_i, double ms) {
-    _last_strong_code_root_mark_times_ms.set(worker_i, ms);
-  }
-
   void record_obj_copy_time(uint worker_i, double ms) {
     _last_obj_copy_times_ms.set(worker_i, ms);
   }
@@ -285,6 +288,16 @@
     _recorded_non_young_free_cset_time_ms = time_ms;
   }
 
+  void record_fast_reclaim_humongous_stats(size_t total, size_t candidates) {
+    _cur_fast_reclaim_humongous_total = total;
+    _cur_fast_reclaim_humongous_candidates = candidates;
+  }
+
+  void record_fast_reclaim_humongous_time_ms(double value, size_t reclaimed) {
+    _cur_fast_reclaim_humongous_time_ms = value;
+    _cur_fast_reclaim_humongous_reclaimed = reclaimed;
+  }
+
   void record_young_cset_choice_time_ms(double time_ms) {
     _recorded_young_cset_choice_time_ms = time_ms;
   }
@@ -293,6 +306,14 @@
     _recorded_non_young_cset_choice_time_ms = time_ms;
   }
 
+  void record_redirty_logged_cards_time_ms(uint worker_i, double time_ms) {
+    _last_redirty_logged_cards_time_ms.set(worker_i, time_ms);
+  }
+
+  void record_redirty_logged_cards_processed_cards(uint worker_i, size_t processed_buffers) {
+    _last_redirty_logged_cards_processed_cards.set(worker_i, processed_buffers);
+  }
+
   void record_redirty_logged_cards_time_ms(double time_ms) {
     _recorded_redirty_logged_cards_time_ms = time_ms;
   }
@@ -343,6 +364,10 @@
     return _recorded_non_young_free_cset_time_ms;
   }
 
+  double fast_reclaim_humongous_time_ms() {
+    return _cur_fast_reclaim_humongous_time_ms;
+  }
+
   double average_last_update_rs_time() {
     return _last_update_rs_times_ms.average();
   }
@@ -359,10 +384,6 @@
     return _last_strong_code_root_scan_times_ms.average();
   }
 
-  double average_last_strong_code_root_mark_time(){
-    return _last_strong_code_root_mark_times_ms.average();
-  }
-
   double average_last_obj_copy_time() {
     return _last_obj_copy_times_ms.average();
   }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1MarkSweep.cpp
--- a/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -123,20 +123,20 @@
 void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading,
                                     bool clear_all_softrefs) {
   // Recursively traverse all live objects and mark them
-  GCTraceTime tm("phase 1", G1Log::fine() && Verbose, true, gc_timer());
+  GCTraceTime tm("phase 1", G1Log::fine() && Verbose, true, gc_timer(), gc_tracer()->gc_id());
   GenMarkSweep::trace(" 1");
 
   SharedHeap* sh = SharedHeap::heap();
 
-  // Need cleared claim bits for the strong roots processing
+  // Need cleared claim bits for the roots processing
   ClassLoaderDataGraph::clear_claimed_marks();
 
-  sh->process_strong_roots(true,  // activate StrongRootsScope
-                           false, // not scavenging.
-                           SharedHeap::SO_SystemClasses,
+  MarkingCodeBlobClosure follow_code_closure(&GenMarkSweep::follow_root_closure, !CodeBlobToOopClosure::FixRelocations);
+  sh->process_strong_roots(true,   // activate StrongRootsScope
+                           SharedHeap::SO_None,
                            &GenMarkSweep::follow_root_closure,
-                           &GenMarkSweep::follow_code_root_closure,
-                           &GenMarkSweep::follow_klass_closure);
+                           &GenMarkSweep::follow_cld_closure,
+                           &follow_code_closure);
 
   // Process reference objects found during marking
   ReferenceProcessor* rp = GenMarkSweep::ref_processor();
@@ -148,7 +148,8 @@
                                       &GenMarkSweep::keep_alive,
                                       &GenMarkSweep::follow_stack_closure,
                                       NULL,
-                                      gc_timer());
+                                      gc_timer(),
+                                      gc_tracer()->gc_id());
   gc_tracer()->report_gc_reference_stats(stats);
 
 
@@ -199,6 +200,23 @@
   CompactPoint _cp;
   HeapRegionSetCount _humongous_regions_removed;
 
+  bool is_cp_initialized() const {
+    return _cp.space != NULL;
+  }
+
+  void prepare_for_compaction(HeapRegion* hr, HeapWord* end) {
+    // If this is the first live region that we came across which we can compact,
+    // initialize the CompactPoint.
+    if (!is_cp_initialized()) {
+      _cp.space = hr;
+      _cp.threshold = hr->initialize_threshold();
+    }
+    hr->prepare_for_compaction(&_cp);
+    // Also clear the part of the card table that will be unused after
+    // compaction.
+    _mrbs->clear(MemRegion(hr->compaction_top(), end));
+  }
+
   void free_humongous_region(HeapRegion* hr) {
     HeapWord* end = hr->end();
     FreeRegionList dummy_free_list("Dummy Free List for G1MarkSweep");
@@ -210,18 +228,15 @@
     _humongous_regions_removed.increment(1u, hr->capacity());
 
     _g1h->free_humongous_region(hr, &dummy_free_list, false /* par */);
-    hr->prepare_for_compaction(&_cp);
-    // Also clear the part of the card table that will be unused after
-    // compaction.
-    _mrbs->clear(MemRegion(hr->compaction_top(), end));
+    prepare_for_compaction(hr, end);
     dummy_free_list.remove_all();
   }
 
 public:
-  G1PrepareCompactClosure(CompactibleSpace* cs)
+  G1PrepareCompactClosure()
   : _g1h(G1CollectedHeap::heap()),
     _mrbs(_g1h->g1_barrier_set()),
-    _cp(NULL, cs, cs->initialize_threshold()),
+    _cp(NULL),
     _humongous_regions_removed() { }
 
   void update_sets() {
@@ -244,10 +259,7 @@
         assert(hr->continuesHumongous(), "Invalid humongous.");
       }
     } else {
-      hr->prepare_for_compaction(&_cp);
-      // Also clear the part of the card table that will be unused after
-      // compaction.
-      _mrbs->clear(MemRegion(hr->compaction_top(), hr->end()));
+      prepare_for_compaction(hr, hr->end());
     }
     return false;
   }
@@ -262,17 +274,10 @@
 
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 
-  GCTraceTime tm("phase 2", G1Log::fine() && Verbose, true, gc_timer());
+  GCTraceTime tm("phase 2", G1Log::fine() && Verbose, true, gc_timer(), gc_tracer()->gc_id());
   GenMarkSweep::trace("2");
 
-  // find the first region
-  HeapRegion* r = g1h->region_at(0);
-  CompactibleSpace* sp = r;
-  if (r->isHumongous() && oop(r->bottom())->is_gc_marked()) {
-    sp = r->next_compaction_space();
-  }
-
-  G1PrepareCompactClosure blk(sp);
+  G1PrepareCompactClosure blk;
   g1h->heap_region_iterate(&blk);
   blk.update_sets();
 }
@@ -299,27 +304,27 @@
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 
   // Adjust the pointers to reflect the new locations
-  GCTraceTime tm("phase 3", G1Log::fine() && Verbose, true, gc_timer());
+  GCTraceTime tm("phase 3", G1Log::fine() && Verbose, true, gc_timer(), gc_tracer()->gc_id());
   GenMarkSweep::trace("3");
 
   SharedHeap* sh = SharedHeap::heap();
 
-  // Need cleared claim bits for the strong roots processing
+  // Need cleared claim bits for the roots processing
   ClassLoaderDataGraph::clear_claimed_marks();
 
-  sh->process_strong_roots(true,  // activate StrongRootsScope
-                           false, // not scavenging.
-                           SharedHeap::SO_AllClasses,
-                           &GenMarkSweep::adjust_pointer_closure,
-                           NULL,  // do not touch code cache here
-                           &GenMarkSweep::adjust_klass_closure);
+  CodeBlobToOopClosure adjust_code_closure(&GenMarkSweep::adjust_pointer_closure, CodeBlobToOopClosure::FixRelocations);
+  sh->process_all_roots(true,  // activate StrongRootsScope
+                        SharedHeap::SO_AllCodeCache,
+                        &GenMarkSweep::adjust_pointer_closure,
+                        &GenMarkSweep::adjust_cld_closure,
+                        &adjust_code_closure);
 
   assert(GenMarkSweep::ref_processor() == g1h->ref_processor_stw(), "Sanity");
   g1h->ref_processor_stw()->weak_oops_do(&GenMarkSweep::adjust_pointer_closure);
 
   // Now adjust pointers in remaining weak roots.  (All of which should
   // have been cleared if they pointed to non-surviving objects.)
-  g1h->g1_process_weak_roots(&GenMarkSweep::adjust_pointer_closure);
+  sh->process_weak_roots(&GenMarkSweep::adjust_pointer_closure);
 
   if (G1StringDedup::is_enabled()) {
     G1StringDedup::oops_do(&GenMarkSweep::adjust_pointer_closure);
@@ -362,7 +367,7 @@
   // to use a higher index (saved from phase2) when verifying perm_gen.
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 
-  GCTraceTime tm("phase 4", G1Log::fine() && Verbose, true, gc_timer());
+  GCTraceTime tm("phase 4", G1Log::fine() && Verbose, true, gc_timer(), gc_tracer()->gc_id());
   GenMarkSweep::trace("4");
 
   G1SpaceCompactClosure blk;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1OopClosures.cpp
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -25,7 +25,28 @@
 #include "precompiled.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
+#include "gc_implementation/g1/g1ParScanThreadState.hpp"
 
 G1ParCopyHelper::G1ParCopyHelper(G1CollectedHeap* g1,  G1ParScanThreadState* par_scan_state) :
   G1ParClosureSuper(g1, par_scan_state), _scanned_klass(NULL),
   _cm(_g1->concurrent_mark()) {}
+
+G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1) :
+  _g1(g1), _par_scan_state(NULL), _worker_id(UINT_MAX) { }
+
+G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+  _g1(g1), _par_scan_state(NULL),
+  _worker_id(UINT_MAX) {
+  set_par_scan_thread_state(par_scan_state);
+}
+
+void G1ParClosureSuper::set_par_scan_thread_state(G1ParScanThreadState* par_scan_state) {
+  assert(_par_scan_state == NULL, "_par_scan_state must only be set once");
+  assert(par_scan_state != NULL, "Must set par_scan_state to non-NULL.");
+
+  _par_scan_state = par_scan_state;
+  _worker_id = par_scan_state->queue_num();
+
+  assert(_worker_id < MAX2((uint)ParallelGCThreads, 1u),
+         err_msg("The given worker id %u must be less than the number of threads %u", _worker_id, MAX2((uint)ParallelGCThreads, 1u)));
+}
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1OopClosures.hpp
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -25,6 +25,8 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_HPP
 
+#include "memory/iterator.hpp"
+
 class HeapRegion;
 class G1CollectedHeap;
 class G1RemSet;
@@ -51,8 +53,13 @@
   G1ParScanThreadState* _par_scan_state;
   uint _worker_id;
 public:
+  // Initializes the instance, leaving _par_scan_state uninitialized. Must be done
+  // later using the set_par_scan_thread_state() method.
+  G1ParClosureSuper(G1CollectedHeap* g1);
   G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state);
   bool apply_to_weak_ref_discovered_field() { return true; }
+
+  void set_par_scan_thread_state(G1ParScanThreadState* par_scan_state);
 };
 
 class G1ParPushHeapRSClosure : public G1ParClosureSuper {
@@ -68,9 +75,8 @@
 
 class G1ParScanClosure : public G1ParClosureSuper {
 public:
-  G1ParScanClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state, ReferenceProcessor* rp) :
-    G1ParClosureSuper(g1, par_scan_state)
-  {
+  G1ParScanClosure(G1CollectedHeap* g1, ReferenceProcessor* rp) :
+    G1ParClosureSuper(g1) {
     assert(_ref_processor == NULL, "sanity");
     _ref_processor = rp;
   }
@@ -102,7 +108,7 @@
   template <class T> void do_klass_barrier(T* p, oop new_obj);
 };
 
-template <G1Barrier barrier, bool do_mark_object>
+template <G1Barrier barrier, G1Mark do_mark_object>
 class G1ParCopyClosure : public G1ParCopyHelper {
 private:
   template <class T> void do_oop_work(T* p);
@@ -117,19 +123,19 @@
   template <class T> void do_oop_nv(T* p) { do_oop_work(p); }
   virtual void do_oop(oop* p)       { do_oop_nv(p); }
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+
+  G1CollectedHeap*      g1()  { return _g1; };
+  G1ParScanThreadState* pss() { return _par_scan_state; }
+  ReferenceProcessor*   rp()  { return _ref_processor; };
 };
 
-typedef G1ParCopyClosure<G1BarrierNone, false> G1ParScanExtRootClosure;
-typedef G1ParCopyClosure<G1BarrierKlass, false> G1ParScanMetadataClosure;
-
-
-typedef G1ParCopyClosure<G1BarrierNone, true> G1ParScanAndMarkExtRootClosure;
-typedef G1ParCopyClosure<G1BarrierKlass, true> G1ParScanAndMarkMetadataClosure;
-
+typedef G1ParCopyClosure<G1BarrierNone,  G1MarkNone>             G1ParScanExtRootClosure;
+typedef G1ParCopyClosure<G1BarrierNone,  G1MarkFromRoot>         G1ParScanAndMarkExtRootClosure;
+typedef G1ParCopyClosure<G1BarrierNone,  G1MarkPromotedFromRoot> G1ParScanAndMarkWeakExtRootClosure;
 // We use a separate closure to handle references during evacuation
 // failure processing.
 
-typedef G1ParCopyClosure<G1BarrierEvac, false> G1ParScanHeapEvacFailureClosure;
+typedef G1ParCopyClosure<G1BarrierEvac, G1MarkNone> G1ParScanHeapEvacFailureClosure;
 
 class FilterIntoCSClosure: public ExtendedOopClosure {
   G1CollectedHeap* _g1;
@@ -160,10 +166,11 @@
 };
 
 // Closure for iterating over object fields during concurrent marking
-class G1CMOopClosure : public ExtendedOopClosure {
+class G1CMOopClosure : public MetadataAwareOopClosure {
+protected:
+  ConcurrentMark*    _cm;
 private:
   G1CollectedHeap*   _g1h;
-  ConcurrentMark*    _cm;
   CMTask*            _task;
 public:
   G1CMOopClosure(G1CollectedHeap* g1h, ConcurrentMark* cm, CMTask* task);
@@ -173,7 +180,7 @@
 };
 
 // Closure to scan the root regions during concurrent marking
-class G1RootRegionScanClosure : public ExtendedOopClosure {
+class G1RootRegionScanClosure : public MetadataAwareOopClosure {
 private:
   G1CollectedHeap* _g1h;
   ConcurrentMark*  _cm;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -28,9 +28,12 @@
 #include "gc_implementation/g1/concurrentMark.inline.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.hpp"
 #include "gc_implementation/g1/g1OopClosures.hpp"
+#include "gc_implementation/g1/g1ParScanThreadState.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.hpp"
 #include "gc_implementation/g1/g1RemSet.inline.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
+#include "memory/iterator.inline.hpp"
+#include "runtime/prefetch.inline.hpp"
 
 /*
  * This really ought to be an inline function, but apparently the C++
@@ -41,7 +44,7 @@
 inline void FilterIntoCSClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
   if (!oopDesc::is_null(heap_oop) &&
-      _g1->obj_in_cs(oopDesc::decode_heap_oop_not_null(heap_oop))) {
+      _g1->is_in_cset_or_humongous(oopDesc::decode_heap_oop_not_null(heap_oop))) {
     _oc->do_oop(p);
   }
 }
@@ -64,7 +67,8 @@
 
   if (!oopDesc::is_null(heap_oop)) {
     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    if (_g1->in_cset_fast_test(obj)) {
+    G1CollectedHeap::in_cset_state_t state = _g1->in_cset_state(obj);
+    if (state == G1CollectedHeap::InCSet) {
       // We're not going to even bother checking whether the object is
       // already forwarded or not, as this usually causes an immediate
       // stall. We'll try to prefetch the object (for write, given that
@@ -83,6 +87,9 @@
 
       _par_scan_state->push_on_queue(p);
     } else {
+      if (state == G1CollectedHeap::IsHumongous) {
+        _g1->set_humongous_is_live(obj);
+      }
       _par_scan_state->update_rs(_from, p, _worker_id);
     }
   }
@@ -94,22 +101,20 @@
 
   if (!oopDesc::is_null(heap_oop)) {
     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    if (_g1->in_cset_fast_test(obj)) {
+    if (_g1->is_in_cset_or_humongous(obj)) {
       Prefetch::write(obj->mark_addr(), 0);
       Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
 
       // Place on the references queue
       _par_scan_state->push_on_queue(p);
+    } else {
+      assert(!_g1->obj_in_cs(obj), "checking");
     }
   }
 }
 
 template <class T>
 inline void G1CMOopClosure::do_oop_nv(T* p) {
-  assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
-  assert(!_g1h->is_on_master_free_list(
-                    _g1h->heap_region_containing((HeapWord*) p)), "invariant");
-
   oop obj = oopDesc::load_decode_heap_oop(p);
   if (_cm->verbose_high()) {
     gclog_or_tty->print_cr("[%u] we're looking at location "
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,311 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1OopClosures.inline.hpp"
+#include "gc_implementation/g1/g1ParScanThreadState.inline.hpp"
+#include "oops/oop.inline.hpp"
+#include "oops/oop.pcgc.inline.hpp"
+#include "runtime/prefetch.inline.hpp"
+
+G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num, ReferenceProcessor* rp)
+  : _g1h(g1h),
+    _refs(g1h->task_queue(queue_num)),
+    _dcq(&g1h->dirty_card_queue_set()),
+    _ct_bs(g1h->g1_barrier_set()),
+    _g1_rem(g1h->g1_rem_set()),
+    _hash_seed(17), _queue_num(queue_num),
+    _term_attempts(0),
+    _surviving_alloc_buffer(g1h->desired_plab_sz(GCAllocForSurvived)),
+    _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured)),
+    _age_table(false), _scanner(g1h, rp),
+    _strong_roots_time(0), _term_time(0),
+    _alloc_buffer_waste(0), _undo_waste(0) {
+  _scanner.set_par_scan_thread_state(this);
+  // we allocate G1YoungSurvRateNumRegions plus one entries, since
+  // we "sacrifice" entry 0 to keep track of surviving bytes for
+  // non-young regions (where the age is -1)
+  // We also add a few elements at the beginning and at the end in
+  // an attempt to eliminate cache contention
+  uint real_length = 1 + _g1h->g1_policy()->young_cset_region_length();
+  uint array_length = PADDING_ELEM_NUM +
+                      real_length +
+                      PADDING_ELEM_NUM;
+  _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length, mtGC);
+  if (_surviving_young_words_base == NULL)
+    vm_exit_out_of_memory(array_length * sizeof(size_t), OOM_MALLOC_ERROR,
+                          "Not enough space for young surv histo.");
+  _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM;
+  memset(_surviving_young_words, 0, (size_t) real_length * sizeof(size_t));
+
+  _alloc_buffers[GCAllocForSurvived] = &_surviving_alloc_buffer;
+  _alloc_buffers[GCAllocForTenured]  = &_tenured_alloc_buffer;
+
+  _start = os::elapsedTime();
+}
+
+G1ParScanThreadState::~G1ParScanThreadState() {
+  retire_alloc_buffers();
+  FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base, mtGC);
+}
+
+void
+G1ParScanThreadState::print_termination_stats_hdr(outputStream* const st)
+{
+  st->print_raw_cr("GC Termination Stats");
+  st->print_raw_cr("     elapsed  --strong roots-- -------termination-------"
+                   " ------waste (KiB)------");
+  st->print_raw_cr("thr     ms        ms      %        ms      %    attempts"
+                   "  total   alloc    undo");
+  st->print_raw_cr("--- --------- --------- ------ --------- ------ --------"
+                   " ------- ------- -------");
+}
+
+void
+G1ParScanThreadState::print_termination_stats(int i,
+                                              outputStream* const st) const
+{
+  const double elapsed_ms = elapsed_time() * 1000.0;
+  const double s_roots_ms = strong_roots_time() * 1000.0;
+  const double term_ms    = term_time() * 1000.0;
+  st->print_cr("%3d %9.2f %9.2f %6.2f "
+               "%9.2f %6.2f " SIZE_FORMAT_W(8) " "
+               SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7),
+               i, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms,
+               term_ms, term_ms * 100 / elapsed_ms, term_attempts(),
+               (alloc_buffer_waste() + undo_waste()) * HeapWordSize / K,
+               alloc_buffer_waste() * HeapWordSize / K,
+               undo_waste() * HeapWordSize / K);
+}
+
+#ifdef ASSERT
+bool G1ParScanThreadState::verify_ref(narrowOop* ref) const {
+  assert(ref != NULL, "invariant");
+  assert(UseCompressedOops, "sanity");
+  assert(!has_partial_array_mask(ref), err_msg("ref=" PTR_FORMAT, p2i(ref)));
+  oop p = oopDesc::load_decode_heap_oop(ref);
+  assert(_g1h->is_in_g1_reserved(p),
+         err_msg("ref=" PTR_FORMAT " p=" PTR_FORMAT, p2i(ref), p2i(p)));
+  return true;
+}
+
+bool G1ParScanThreadState::verify_ref(oop* ref) const {
+  assert(ref != NULL, "invariant");
+  if (has_partial_array_mask(ref)) {
+    // Must be in the collection set--it's already been copied.
+    oop p = clear_partial_array_mask(ref);
+    assert(_g1h->obj_in_cs(p),
+           err_msg("ref=" PTR_FORMAT " p=" PTR_FORMAT, p2i(ref), p2i(p)));
+  } else {
+    oop p = oopDesc::load_decode_heap_oop(ref);
+    assert(_g1h->is_in_g1_reserved(p),
+           err_msg("ref=" PTR_FORMAT " p=" PTR_FORMAT, p2i(ref), p2i(p)));
+  }
+  return true;
+}
+
+bool G1ParScanThreadState::verify_task(StarTask ref) const {
+  if (ref.is_narrow()) {
+    return verify_ref((narrowOop*) ref);
+  } else {
+    return verify_ref((oop*) ref);
+  }
+}
+#endif // ASSERT
+
+void G1ParScanThreadState::trim_queue() {
+  assert(_evac_failure_cl != NULL, "not set");
+
+  StarTask ref;
+  do {
+    // Drain the overflow stack first, so other threads can steal.
+    while (_refs->pop_overflow(ref)) {
+      dispatch_reference(ref);
+    }
+
+    while (_refs->pop_local(ref)) {
+      dispatch_reference(ref);
+    }
+  } while (!_refs->is_empty());
+}
+
+oop G1ParScanThreadState::copy_to_survivor_space(oop const old) {
+  size_t word_sz = old->size();
+  HeapRegion* from_region = _g1h->heap_region_containing_raw(old);
+  // +1 to make the -1 indexes valid...
+  int       young_index = from_region->young_index_in_cset()+1;
+  assert( (from_region->is_young() && young_index >  0) ||
+         (!from_region->is_young() && young_index == 0), "invariant" );
+  G1CollectorPolicy* g1p = _g1h->g1_policy();
+  markOop m = old->mark();
+  int age = m->has_displaced_mark_helper() ? m->displaced_mark_helper()->age()
+                                           : m->age();
+  GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, age,
+                                                             word_sz);
+  HeapWord* obj_ptr = allocate(alloc_purpose, word_sz);
+#ifndef PRODUCT
+  // Should this evacuation fail?
+  if (_g1h->evacuation_should_fail()) {
+    if (obj_ptr != NULL) {
+      undo_allocation(alloc_purpose, obj_ptr, word_sz);
+      obj_ptr = NULL;
+    }
+  }
+#endif // !PRODUCT
+
+  if (obj_ptr == NULL) {
+    // This will either forward-to-self, or detect that someone else has
+    // installed a forwarding pointer.
+    return _g1h->handle_evacuation_failure_par(this, old);
+  }
+
+  oop obj = oop(obj_ptr);
+
+  // We're going to allocate linearly, so might as well prefetch ahead.
+  Prefetch::write(obj_ptr, PrefetchCopyIntervalInBytes);
+
+  oop forward_ptr = old->forward_to_atomic(obj);
+  if (forward_ptr == NULL) {
+    Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz);
+
+    // alloc_purpose is just a hint to allocate() above, recheck the type of region
+    // we actually allocated from and update alloc_purpose accordingly
+    HeapRegion* to_region = _g1h->heap_region_containing_raw(obj_ptr);
+    alloc_purpose = to_region->is_young() ? GCAllocForSurvived : GCAllocForTenured;
+
+    if (g1p->track_object_age(alloc_purpose)) {
+      // We could simply do obj->incr_age(). However, this causes a
+      // performance issue. obj->incr_age() will first check whether
+      // the object has a displaced mark by checking its mark word;
+      // getting the mark word from the new location of the object
+      // stalls. So, given that we already have the mark word and we
+      // are about to install it anyway, it's better to increase the
+      // age on the mark word, when the object does not have a
+      // displaced mark word. We're not expecting many objects to have
+      // a displaced marked word, so that case is not optimized
+      // further (it could be...) and we simply call obj->incr_age().
+
+      if (m->has_displaced_mark_helper()) {
+        // in this case, we have to install the mark word first,
+        // otherwise obj looks to be forwarded (the old mark word,
+        // which contains the forward pointer, was copied)
+        obj->set_mark(m);
+        obj->incr_age();
+      } else {
+        m = m->incr_age();
+        obj->set_mark(m);
+      }
+      age_table()->add(obj, word_sz);
+    } else {
+      obj->set_mark(m);
+    }
+
+    if (G1StringDedup::is_enabled()) {
+      G1StringDedup::enqueue_from_evacuation(from_region->is_young(),
+                                             to_region->is_young(),
+                                             queue_num(),
+                                             obj);
+    }
+
+    size_t* surv_young_words = surviving_young_words();
+    surv_young_words[young_index] += word_sz;
+
+    if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
+      // We keep track of the next start index in the length field of
+      // the to-space object. The actual length can be found in the
+      // length field of the from-space object.
+      arrayOop(obj)->set_length(0);
+      oop* old_p = set_partial_array_mask(old);
+      push_on_queue(old_p);
+    } else {
+      // No point in using the slower heap_region_containing() method,
+      // given that we know obj is in the heap.
+      _scanner.set_region(_g1h->heap_region_containing_raw(obj));
+      obj->oop_iterate_backwards(&_scanner);
+    }
+  } else {
+    undo_allocation(alloc_purpose, obj_ptr, word_sz);
+    obj = forward_ptr;
+  }
+  return obj;
+}
+
+HeapWord* G1ParScanThreadState::allocate_slow(GCAllocPurpose purpose, size_t word_sz) {
+  HeapWord* obj = NULL;
+  size_t gclab_word_size = _g1h->desired_plab_sz(purpose);
+  if (word_sz * 100 < gclab_word_size * ParallelGCBufferWastePct) {
+    G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose);
+    add_to_alloc_buffer_waste(alloc_buf->words_remaining());
+    alloc_buf->retire(false /* end_of_gc */, false /* retain */);
+
+    HeapWord* buf = _g1h->par_allocate_during_gc(purpose, gclab_word_size);
+    if (buf == NULL) {
+      return NULL; // Let caller handle allocation failure.
+    }
+    // Otherwise.
+    alloc_buf->set_word_size(gclab_word_size);
+    alloc_buf->set_buf(buf);
+
+    obj = alloc_buf->allocate(word_sz);
+    assert(obj != NULL, "buffer was definitely big enough...");
+  } else {
+    obj = _g1h->par_allocate_during_gc(purpose, word_sz);
+  }
+  return obj;
+}
+
+void G1ParScanThreadState::undo_allocation(GCAllocPurpose purpose, HeapWord* obj, size_t word_sz) {
+  if (alloc_buffer(purpose)->contains(obj)) {
+    assert(alloc_buffer(purpose)->contains(obj + word_sz - 1),
+           "should contain whole object");
+    alloc_buffer(purpose)->undo_allocation(obj, word_sz);
+  } else {
+    CollectedHeap::fill_with_object(obj, word_sz);
+    add_to_undo_waste(word_sz);
+  }
+}
+
+HeapWord* G1ParScanThreadState::allocate(GCAllocPurpose purpose, size_t word_sz) {
+  HeapWord* obj = NULL;
+  if (purpose == GCAllocForSurvived) {
+    obj = alloc_buffer(GCAllocForSurvived)->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
+  } else {
+    obj = alloc_buffer(GCAllocForTenured)->allocate(word_sz);
+  }
+  if (obj != NULL) {
+    return obj;
+  }
+  return allocate_slow(purpose, word_sz);
+}
+
+void G1ParScanThreadState::retire_alloc_buffers() {
+  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+    size_t waste = _alloc_buffers[ap]->words_remaining();
+    add_to_alloc_buffer_waste(waste);
+    _alloc_buffers[ap]->flush_stats_and_retire(_g1h->stats_for_purpose((GCAllocPurpose)ap),
+                                               true /* end_of_gc */,
+                                               false /* retain */);
+  }
+}
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1PARSCANTHREADSTATE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1PARSCANTHREADSTATE_HPP
+
+#include "gc_implementation/g1/dirtyCardQueue.hpp"
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.hpp"
+#include "gc_implementation/g1/g1CollectorPolicy.hpp"
+#include "gc_implementation/g1/g1OopClosures.hpp"
+#include "gc_implementation/g1/g1RemSet.hpp"
+#include "gc_implementation/shared/ageTable.hpp"
+#include "memory/allocation.hpp"
+#include "oops/oop.hpp"
+
+class HeapRegion;
+class outputStream;
+
+class G1ParScanThreadState : public StackObj {
+ private:
+  G1CollectedHeap* _g1h;
+  RefToScanQueue*  _refs;
+  DirtyCardQueue   _dcq;
+  G1SATBCardTableModRefBS* _ct_bs;
+  G1RemSet* _g1_rem;
+
+  G1ParGCAllocBuffer  _surviving_alloc_buffer;
+  G1ParGCAllocBuffer  _tenured_alloc_buffer;
+  G1ParGCAllocBuffer* _alloc_buffers[GCAllocPurposeCount];
+  ageTable            _age_table;
+
+  G1ParScanClosure    _scanner;
+
+  size_t           _alloc_buffer_waste;
+  size_t           _undo_waste;
+
+  OopsInHeapRegionClosure*      _evac_failure_cl;
+
+  int  _hash_seed;
+  uint _queue_num;
+
+  size_t _term_attempts;
+
+  double _start;
+  double _start_strong_roots;
+  double _strong_roots_time;
+  double _start_term;
+  double _term_time;
+
+  // Map from young-age-index (0 == not young, 1 is youngest) to
+  // surviving words. base is what we get back from the malloc call
+  size_t* _surviving_young_words_base;
+  // this points into the array, as we use the first few entries for padding
+  size_t* _surviving_young_words;
+
+#define PADDING_ELEM_NUM (DEFAULT_CACHE_LINE_SIZE / sizeof(size_t))
+
+  void   add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; }
+
+  void   add_to_undo_waste(size_t waste)         { _undo_waste += waste; }
+
+  DirtyCardQueue& dirty_card_queue()             { return _dcq;  }
+  G1SATBCardTableModRefBS* ctbs()                { return _ct_bs; }
+
+  template <class T> inline void immediate_rs_update(HeapRegion* from, T* p, int tid);
+
+  template <class T> void deferred_rs_update(HeapRegion* from, T* p, int tid) {
+    // If the new value of the field points to the same region or
+    // is the to-space, we don't need to include it in the Rset updates.
+    if (!from->is_in_reserved(oopDesc::load_decode_heap_oop(p)) && !from->is_survivor()) {
+      size_t card_index = ctbs()->index_for(p);
+      // If the card hasn't been added to the buffer, do it.
+      if (ctbs()->mark_card_deferred(card_index)) {
+        dirty_card_queue().enqueue((jbyte*)ctbs()->byte_for_index(card_index));
+      }
+    }
+  }
+
+ public:
+  G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num, ReferenceProcessor* rp);
+  ~G1ParScanThreadState();
+
+  ageTable*         age_table()       { return &_age_table;       }
+
+  G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) {
+    return _alloc_buffers[purpose];
+  }
+
+  size_t alloc_buffer_waste() const              { return _alloc_buffer_waste; }
+  size_t undo_waste() const                      { return _undo_waste; }
+
+#ifdef ASSERT
+  bool queue_is_empty() const { return _refs->is_empty(); }
+
+  bool verify_ref(narrowOop* ref) const;
+  bool verify_ref(oop* ref) const;
+  bool verify_task(StarTask ref) const;
+#endif // ASSERT
+
+  template <class T> void push_on_queue(T* ref) {
+    assert(verify_ref(ref), "sanity");
+    _refs->push(ref);
+  }
+
+  template <class T> inline void update_rs(HeapRegion* from, T* p, int tid);
+
+ private:
+
+  inline HeapWord* allocate(GCAllocPurpose purpose, size_t word_sz);
+  inline HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz);
+  inline void undo_allocation(GCAllocPurpose purpose, HeapWord* obj, size_t word_sz);
+
+ public:
+
+  void set_evac_failure_closure(OopsInHeapRegionClosure* evac_failure_cl) {
+    _evac_failure_cl = evac_failure_cl;
+  }
+
+  OopsInHeapRegionClosure* evac_failure_closure() { return _evac_failure_cl; }
+
+  int* hash_seed() { return &_hash_seed; }
+  uint queue_num() { return _queue_num; }
+
+  size_t term_attempts() const  { return _term_attempts; }
+  void note_term_attempt() { _term_attempts++; }
+
+  void start_strong_roots() {
+    _start_strong_roots = os::elapsedTime();
+  }
+  void end_strong_roots() {
+    _strong_roots_time += (os::elapsedTime() - _start_strong_roots);
+  }
+  double strong_roots_time() const { return _strong_roots_time; }
+
+  void start_term_time() {
+    note_term_attempt();
+    _start_term = os::elapsedTime();
+  }
+  void end_term_time() {
+    _term_time += (os::elapsedTime() - _start_term);
+  }
+  double term_time() const { return _term_time; }
+
+  double elapsed_time() const {
+    return os::elapsedTime() - _start;
+  }
+
+  static void print_termination_stats_hdr(outputStream* const st = gclog_or_tty);
+  void print_termination_stats(int i, outputStream* const st = gclog_or_tty) const;
+
+  size_t* surviving_young_words() {
+    // We add on to hide entry 0 which accumulates surviving words for
+    // age -1 regions (i.e. non-young ones)
+    return _surviving_young_words;
+  }
+
+ private:
+  void retire_alloc_buffers();
+
+  #define G1_PARTIAL_ARRAY_MASK 0x2
+
+  inline bool has_partial_array_mask(oop* ref) const {
+    return ((uintptr_t)ref & G1_PARTIAL_ARRAY_MASK) == G1_PARTIAL_ARRAY_MASK;
+  }
+
+  // We never encode partial array oops as narrowOop*, so return false immediately.
+  // This allows the compiler to create optimized code when popping references from
+  // the work queue.
+  inline bool has_partial_array_mask(narrowOop* ref) const {
+    assert(((uintptr_t)ref & G1_PARTIAL_ARRAY_MASK) != G1_PARTIAL_ARRAY_MASK, "Partial array oop reference encoded as narrowOop*");
+    return false;
+  }
+
+  // Only implement set_partial_array_mask() for regular oops, not for narrowOops.
+  // We always encode partial arrays as regular oop, to allow the
+  // specialization for has_partial_array_mask() for narrowOops above.
+  // This means that unintentional use of this method with narrowOops are caught
+  // by the compiler.
+  inline oop* set_partial_array_mask(oop obj) const {
+    assert(((uintptr_t)(void *)obj & G1_PARTIAL_ARRAY_MASK) == 0, "Information loss!");
+    return (oop*) ((uintptr_t)(void *)obj | G1_PARTIAL_ARRAY_MASK);
+  }
+
+  inline oop clear_partial_array_mask(oop* ref) const {
+    return cast_to_oop((intptr_t)ref & ~G1_PARTIAL_ARRAY_MASK);
+  }
+
+  inline void do_oop_partial_array(oop* p);
+
+  // This method is applied to the fields of the objects that have just been copied.
+  template <class T> inline void do_oop_evac(T* p, HeapRegion* from);
+
+  template <class T> inline void deal_with_reference(T* ref_to_scan);
+
+  inline void dispatch_reference(StarTask ref);
+ public:
+
+  oop copy_to_survivor_space(oop const obj);
+
+  void trim_queue();
+
+  inline void steal_and_trim_queue(RefToScanQueueSet *task_queues);
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1PARSCANTHREADSTATE_HPP
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1PARSCANTHREADSTATE_INLINE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1PARSCANTHREADSTATE_INLINE_HPP
+
+#include "gc_implementation/g1/g1ParScanThreadState.hpp"
+#include "gc_implementation/g1/g1RemSet.inline.hpp"
+#include "oops/oop.inline.hpp"
+
+template <class T> inline void G1ParScanThreadState::immediate_rs_update(HeapRegion* from, T* p, int tid) {
+  if (!from->is_survivor()) {
+    _g1_rem->par_write_ref(from, p, tid);
+  }
+}
+
+template <class T> void G1ParScanThreadState::update_rs(HeapRegion* from, T* p, int tid) {
+  if (G1DeferredRSUpdate) {
+    deferred_rs_update(from, p, tid);
+  } else {
+    immediate_rs_update(from, p, tid);
+  }
+}
+
+template <class T> void G1ParScanThreadState::do_oop_evac(T* p, HeapRegion* from) {
+  assert(!oopDesc::is_null(oopDesc::load_decode_heap_oop(p)),
+         "Reference should not be NULL here as such are never pushed to the task queue.");
+  oop obj = oopDesc::load_decode_heap_oop_not_null(p);
+
+  // Although we never intentionally push references outside of the collection
+  // set, due to (benign) races in the claim mechanism during RSet scanning more
+  // than one thread might claim the same card. So the same card may be
+  // processed multiple times. So redo this check.
+  G1CollectedHeap::in_cset_state_t in_cset_state = _g1h->in_cset_state(obj);
+  if (in_cset_state == G1CollectedHeap::InCSet) {
+    oop forwardee;
+    if (obj->is_forwarded()) {
+      forwardee = obj->forwardee();
+    } else {
+      forwardee = copy_to_survivor_space(obj);
+    }
+    oopDesc::encode_store_heap_oop(p, forwardee);
+  } else if (in_cset_state == G1CollectedHeap::IsHumongous) {
+    _g1h->set_humongous_is_live(obj);
+  } else {
+    assert(in_cset_state == G1CollectedHeap::InNeither,
+           err_msg("In_cset_state must be InNeither here, but is %d", in_cset_state));
+  }
+
+  assert(obj != NULL, "Must be");
+  update_rs(from, p, queue_num());
+}
+
+inline void G1ParScanThreadState::do_oop_partial_array(oop* p) {
+  assert(has_partial_array_mask(p), "invariant");
+  oop from_obj = clear_partial_array_mask(p);
+
+  assert(Universe::heap()->is_in_reserved(from_obj), "must be in heap.");
+  assert(from_obj->is_objArray(), "must be obj array");
+  objArrayOop from_obj_array = objArrayOop(from_obj);
+  // The from-space object contains the real length.
+  int length                 = from_obj_array->length();
+
+  assert(from_obj->is_forwarded(), "must be forwarded");
+  oop to_obj                 = from_obj->forwardee();
+  assert(from_obj != to_obj, "should not be chunking self-forwarded objects");
+  objArrayOop to_obj_array   = objArrayOop(to_obj);
+  // We keep track of the next start index in the length field of the
+  // to-space object.
+  int next_index             = to_obj_array->length();
+  assert(0 <= next_index && next_index < length,
+         err_msg("invariant, next index: %d, length: %d", next_index, length));
+
+  int start                  = next_index;
+  int end                    = length;
+  int remainder              = end - start;
+  // We'll try not to push a range that's smaller than ParGCArrayScanChunk.
+  if (remainder > 2 * ParGCArrayScanChunk) {
+    end = start + ParGCArrayScanChunk;
+    to_obj_array->set_length(end);
+    // Push the remainder before we process the range in case another
+    // worker has run out of things to do and can steal it.
+    oop* from_obj_p = set_partial_array_mask(from_obj);
+    push_on_queue(from_obj_p);
+  } else {
+    assert(length == end, "sanity");
+    // We'll process the final range for this object. Restore the length
+    // so that the heap remains parsable in case of evacuation failure.
+    to_obj_array->set_length(end);
+  }
+  _scanner.set_region(_g1h->heap_region_containing_raw(to_obj));
+  // Process indexes [start,end). It will also process the header
+  // along with the first chunk (i.e., the chunk with start == 0).
+  // Note that at this point the length field of to_obj_array is not
+  // correct given that we are using it to keep track of the next
+  // start index. oop_iterate_range() (thankfully!) ignores the length
+  // field and only relies on the start / end parameters.  It does
+  // however return the size of the object which will be incorrect. So
+  // we have to ignore it even if we wanted to use it.
+  to_obj_array->oop_iterate_range(&_scanner, start, end);
+}
+
+template <class T> inline void G1ParScanThreadState::deal_with_reference(T* ref_to_scan) {
+  if (!has_partial_array_mask(ref_to_scan)) {
+    // Note: we can use "raw" versions of "region_containing" because
+    // "obj_to_scan" is definitely in the heap, and is not in a
+    // humongous region.
+    HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan);
+    do_oop_evac(ref_to_scan, r);
+  } else {
+    do_oop_partial_array((oop*)ref_to_scan);
+  }
+}
+
+inline void G1ParScanThreadState::dispatch_reference(StarTask ref) {
+  assert(verify_task(ref), "sanity");
+  if (ref.is_narrow()) {
+    deal_with_reference((narrowOop*)ref);
+  } else {
+    deal_with_reference((oop*)ref);
+  }
+}
+
+void G1ParScanThreadState::steal_and_trim_queue(RefToScanQueueSet *task_queues) {
+  StarTask stolen_task;
+  while (task_queues->steal(queue_num(), hash_seed(), stolen_task)) {
+    assert(verify_task(stolen_task), "sanity");
+    dispatch_reference(stolen_task);
+
+    // We've just processed a reference and we might have made
+    // available new entries on the queues. So we have to make sure
+    // we drain the queues as necessary.
+    trim_queue();
+  }
+}
+
+#endif /* SHARE_VM_GC_IMPLEMENTATION_G1_G1PARSCANTHREADSTATE_INLINE_HPP */
+
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1RemSet.cpp
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -23,7 +23,6 @@
  */
 
 #include "precompiled.hpp"
-#include "gc_implementation/g1/bufferingOopClosure.hpp"
 #include "gc_implementation/g1/concurrentG1Refine.hpp"
 #include "gc_implementation/g1/concurrentG1RefineThread.hpp"
 #include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1RemSet.hpp
--- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp
--- a/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1REMSET_INLINE_HPP
 
 #include "gc_implementation/g1/g1RemSet.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "oops/oop.inline.hpp"
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp
--- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -27,6 +27,7 @@
 #include "gc_implementation/g1/heapRegion.hpp"
 #include "gc_implementation/g1/satbQueue.hpp"
 #include "runtime/mutexLocker.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/thread.inline.hpp"
 
 G1SATBCardTableModRefBS::G1SATBCardTableModRefBS(MemRegion whole_heap,
@@ -64,6 +65,17 @@
   }
 }
 
+void G1SATBCardTableModRefBS::write_ref_array_pre(oop* dst, int count, bool dest_uninitialized) {
+  if (!dest_uninitialized) {
+    write_ref_array_pre_work(dst, count);
+  }
+}
+void G1SATBCardTableModRefBS::write_ref_array_pre(narrowOop* dst, int count, bool dest_uninitialized) {
+  if (!dest_uninitialized) {
+    write_ref_array_pre_work(dst, count);
+  }
+}
+
 bool G1SATBCardTableModRefBS::mark_card_deferred(size_t card_index) {
   jbyte val = _byte_map[card_index];
   // It's already processed
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp
--- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -86,16 +86,8 @@
   }
 
   template <class T> void write_ref_array_pre_work(T* dst, int count);
-  virtual void write_ref_array_pre(oop* dst, int count, bool dest_uninitialized) {
-    if (!dest_uninitialized) {
-      write_ref_array_pre_work(dst, count);
-    }
-  }
-  virtual void write_ref_array_pre(narrowOop* dst, int count, bool dest_uninitialized) {
-    if (!dest_uninitialized) {
-      write_ref_array_pre_work(dst, count);
-    }
-  }
+  virtual void write_ref_array_pre(oop* dst, int count, bool dest_uninitialized);
+  virtual void write_ref_array_pre(narrowOop* dst, int count, bool dest_uninitialized);
 
 /*
    Claimed and deferred bits are used together in G1 during the evacuation
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1StringDedupThread.cpp
--- a/src/share/vm/gc_implementation/g1/g1StringDedupThread.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1StringDedupThread.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -77,38 +77,37 @@
       break;
     }
 
-    // Include this thread in safepoints
-    stsJoin();
+    {
+      // Include thread in safepoints
+      SuspendibleThreadSetJoiner sts;
 
-    stat.mark_exec();
+      stat.mark_exec();
 
-    // Process the queue
-    for (;;) {
-      oop java_string = G1StringDedupQueue::pop();
-      if (java_string == NULL) {
-        break;
+      // Process the queue
+      for (;;) {
+        oop java_string = G1StringDedupQueue::pop();
+        if (java_string == NULL) {
+          break;
+        }
+
+        G1StringDedupTable::deduplicate(java_string, stat);
+
+        // Safepoint this thread if needed
+        if (sts.should_yield()) {
+          stat.mark_block();
+          sts.yield();
+          stat.mark_unblock();
+        }
       }
 
-      G1StringDedupTable::deduplicate(java_string, stat);
+      G1StringDedupTable::trim_entry_cache();
 
-      // Safepoint this thread if needed
-      if (stsShouldYield()) {
-        stat.mark_block();
-        stsYield(NULL);
-        stat.mark_unblock();
-      }
-    }
+      stat.mark_done();
 
-    G1StringDedupTable::trim_entry_cache();
-
-    stat.mark_done();
-
-    // Print statistics
-    total_stat.add(stat);
-    print(gclog_or_tty, stat, total_stat);
-
-    // Exclude this thread from safepoints
-    stsLeave();
+      // Print statistics
+      total_stat.add(stat);
+      print(gclog_or_tty, stat, total_stat);
+    }
   }
 
   terminate();
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1_globals.hpp
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -289,6 +289,13 @@
           "The amount of code root chunks that should be kept at most "     \
           "as percentage of already allocated.")                            \
                                                                             \
+  experimental(bool, G1ReclaimDeadHumongousObjectsAtYoungGC, true,          \
+          "Try to reclaim dead large objects at every young GC.")           \
+                                                                            \
+  experimental(bool, G1TraceReclaimDeadHumongousObjectsAtYoungGC, false,    \
+          "Print some information about large object liveness "             \
+          "at every young GC.")                                             \
+                                                                            \
   experimental(uintx, G1OldCSetRegionThresholdPercent, 10,                  \
           "An upper bound for the number of old CSet regions expressed "    \
           "as a percentage of the heap size.")                              \
@@ -325,11 +332,14 @@
           "evacuation pauses")                                              \
                                                                             \
   diagnostic(bool, G1VerifyRSetsDuringFullGC, false,                        \
-             "If true, perform verification of each heap region's "         \
-             "remembered set when verifying the heap during a full GC.")    \
+          "If true, perform verification of each heap region's "            \
+          "remembered set when verifying the heap during a full GC.")       \
                                                                             \
   diagnostic(bool, G1VerifyHeapRegionCodeRoots, false,                      \
-             "Verify the code root lists attached to each heap region.")
+          "Verify the code root lists attached to each heap region.")       \
+                                                                            \
+  develop(bool, G1VerifyBitmaps, false,                                     \
+          "Verifies the consistency of the marking bitmaps")
 
 G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp
--- a/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -30,14 +30,21 @@
 // non-virtually, using a mechanism defined in this file.  Extend these
 // macros in the obvious way to add specializations for new closures.
 
-// Forward declarations.
 enum G1Barrier {
   G1BarrierNone,
   G1BarrierEvac,
   G1BarrierKlass
 };
 
-template<G1Barrier barrier, bool do_mark_object>
+enum G1Mark {
+  G1MarkNone,
+  G1MarkFromRoot,
+  G1MarkPromotedFromRoot
+};
+
+// Forward declarations.
+
+template<G1Barrier barrier, G1Mark do_mark_object>
 class G1ParCopyClosure;
 
 class G1ParScanClosure;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/heapRegion.cpp
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -30,9 +30,12 @@
 #include "gc_implementation/g1/heapRegion.inline.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
+#include "gc_implementation/shared/liveRange.hpp"
 #include "memory/genOopClosures.inline.hpp"
 #include "memory/iterator.hpp"
+#include "memory/space.inline.hpp"
 #include "oops/oop.inline.hpp"
+#include "runtime/orderAccess.inline.hpp"
 
 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
 
@@ -46,7 +49,7 @@
                                  HeapRegion* hr, ExtendedOopClosure* cl,
                                  CardTableModRefBS::PrecisionStyle precision,
                                  FilterKind fk) :
-  ContiguousSpaceDCTOC(hr, cl, precision, NULL),
+  DirtyCardToOopClosure(hr, cl, precision, NULL),
   _hr(hr), _fk(fk), _g1(g1) { }
 
 FilterOutOfRegionClosure::FilterOutOfRegionClosure(HeapRegion* r,
@@ -58,7 +61,7 @@
                                HeapRegion* hr,
                                HeapWord* cur, HeapWord* top) {
   oop cur_oop = oop(cur);
-  int oop_size = cur_oop->size();
+  size_t oop_size = hr->block_size(cur);
   HeapWord* next_obj = cur + oop_size;
   while (next_obj < top) {
     // Keep filtering the remembered set.
@@ -69,25 +72,24 @@
     }
     cur = next_obj;
     cur_oop = oop(cur);
-    oop_size = cur_oop->size();
+    oop_size = hr->block_size(cur);
     next_obj = cur + oop_size;
   }
   return cur;
 }
 
-void HeapRegionDCTOC::walk_mem_region_with_cl(MemRegion mr,
-                                              HeapWord* bottom,
-                                              HeapWord* top,
-                                              ExtendedOopClosure* cl) {
+void HeapRegionDCTOC::walk_mem_region(MemRegion mr,
+                                      HeapWord* bottom,
+                                      HeapWord* top) {
   G1CollectedHeap* g1h = _g1;
-  int oop_size;
+  size_t oop_size;
   ExtendedOopClosure* cl2 = NULL;
 
-  FilterIntoCSClosure intoCSFilt(this, g1h, cl);
-  FilterOutOfRegionClosure outOfRegionFilt(_hr, cl);
+  FilterIntoCSClosure intoCSFilt(this, g1h, _cl);
+  FilterOutOfRegionClosure outOfRegionFilt(_hr, _cl);
 
   switch (_fk) {
-  case NoFilterKind:          cl2 = cl; break;
+  case NoFilterKind:          cl2 = _cl; break;
   case IntoCSFilterKind:      cl2 = &intoCSFilt; break;
   case OutOfRegionFilterKind: cl2 = &outOfRegionFilt; break;
   default:                    ShouldNotReachHere();
@@ -100,7 +102,7 @@
   if (!g1h->is_obj_dead(oop(bottom), _hr)) {
     oop_size = oop(bottom)->oop_iterate(cl2, mr);
   } else {
-    oop_size = oop(bottom)->size();
+    oop_size = _hr->block_size(bottom);
   }
 
   bottom += oop_size;
@@ -109,17 +111,17 @@
     // We replicate the loop below for several kinds of possible filters.
     switch (_fk) {
     case NoFilterKind:
-      bottom = walk_mem_region_loop(cl, g1h, _hr, bottom, top);
+      bottom = walk_mem_region_loop(_cl, g1h, _hr, bottom, top);
       break;
 
     case IntoCSFilterKind: {
-      FilterIntoCSClosure filt(this, g1h, cl);
+      FilterIntoCSClosure filt(this, g1h, _cl);
       bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top);
       break;
     }
 
     case OutOfRegionFilterKind: {
-      FilterOutOfRegionClosure filt(_hr, cl);
+      FilterOutOfRegionClosure filt(_hr, _cl);
       bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top);
       break;
     }
@@ -372,50 +374,13 @@
   // region.
   hr_clear(false /*par*/, false /*clear_space*/);
   set_top(bottom());
-  set_saved_mark();
+  record_top_and_timestamp();
 
   assert(HeapRegionRemSet::num_par_rem_sets() > 0, "Invariant.");
 }
 
 CompactibleSpace* HeapRegion::next_compaction_space() const {
-  // We're not using an iterator given that it will wrap around when
-  // it reaches the last region and this is not what we want here.
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  uint index = hrs_index() + 1;
-  while (index < g1h->n_regions()) {
-    HeapRegion* hr = g1h->region_at(index);
-    if (!hr->isHumongous()) {
-      return hr;
-    }
-    index += 1;
-  }
-  return NULL;
-}
-
-void HeapRegion::save_marks() {
-  set_saved_mark();
-}
-
-void HeapRegion::oops_in_mr_iterate(MemRegion mr, ExtendedOopClosure* cl) {
-  HeapWord* p = mr.start();
-  HeapWord* e = mr.end();
-  oop obj;
-  while (p < e) {
-    obj = oop(p);
-    p += obj->oop_iterate(cl);
-  }
-  assert(p == e, "bad memregion: doesn't end on obj boundary");
-}
-
-#define HeapRegion_OOP_SINCE_SAVE_MARKS_DEFN(OopClosureType, nv_suffix) \
-void HeapRegion::oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl) { \
-  ContiguousSpace::oop_since_save_marks_iterate##nv_suffix(cl);              \
-}
-SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(HeapRegion_OOP_SINCE_SAVE_MARKS_DEFN)
-
-
-void HeapRegion::oop_before_save_marks_iterate(ExtendedOopClosure* cl) {
-  oops_in_mr_iterate(MemRegion(bottom(), saved_mark_word()), cl);
+  return G1CollectedHeap::heap()->next_compaction_region(this);
 }
 
 void HeapRegion::note_self_forwarding_removal_start(bool during_initial_mark,
@@ -423,7 +388,6 @@
   // We always recreate the prev marking info and we'll explicitly
   // mark all objects we find to be self-forwarded on the prev
   // bitmap. So all objects need to be below PTAMS.
-  _prev_top_at_mark_start = top();
   _prev_marked_bytes = 0;
 
   if (during_initial_mark) {
@@ -447,6 +411,7 @@
   assert(0 <= marked_bytes && marked_bytes <= used(),
          err_msg("marked: "SIZE_FORMAT" used: "SIZE_FORMAT,
                  marked_bytes, used()));
+  _prev_top_at_mark_start = top();
   _prev_marked_bytes = marked_bytes;
 }
 
@@ -477,7 +442,7 @@
     if (cl->abort()) return cur;
     // The check above must occur before the operation below, since an
     // abort might invalidate the "size" operation.
-    cur += obj->size();
+    cur += block_size(cur);
   }
   return NULL;
 }
@@ -549,7 +514,7 @@
       return cur;
     }
     // Otherwise...
-    next = (cur + obj->size());
+    next = cur + block_size(cur);
   }
 
   // If we finish the above loop...We have a parseable object that
@@ -557,10 +522,9 @@
   // inside or spans the entire region.
 
   assert(obj == oop(cur), "sanity");
-  assert(cur <= start &&
-         obj->klass_or_null() != NULL &&
-         (cur + obj->size()) > start,
-         "Loop postcondition");
+  assert(cur <= start, "Loop postcondition");
+  assert(obj->klass_or_null() != NULL, "Loop postcondition");
+  assert((cur + block_size(cur)) > start, "Loop postcondition");
 
   if (!g1h->is_obj_dead(obj)) {
     obj->oop_iterate(cl, mr);
@@ -574,7 +538,7 @@
     };
 
     // Otherwise:
-    next = (cur + obj->size());
+    next = cur + block_size(cur);
 
     if (!g1h->is_obj_dead(obj)) {
       if (next < end || !obj->is_objArray()) {
@@ -929,10 +893,11 @@
   size_t object_num = 0;
   while (p < top()) {
     oop obj = oop(p);
-    size_t obj_size = obj->size();
+    size_t obj_size = block_size(p);
     object_num += 1;
 
-    if (is_humongous != g1->isHumongous(obj_size)) {
+    if (is_humongous != g1->isHumongous(obj_size) &&
+        !g1->is_obj_dead(obj, this)) { // Dead objects may have bigger block_size since they span several objects.
       gclog_or_tty->print_cr("obj "PTR_FORMAT" is of %shumongous size ("
                              SIZE_FORMAT" words) in a %shumongous region",
                              p, g1->isHumongous(obj_size) ? "" : "non-",
@@ -943,7 +908,9 @@
 
     // If it returns false, verify_for_object() will output the
     // appropriate messasge.
-    if (do_bot_verify && !_offsets.verify_for_object(p, obj_size)) {
+    if (do_bot_verify &&
+        !g1->is_obj_dead(obj, this) &&
+        !_offsets.verify_for_object(p, obj_size)) {
       *failures = true;
       return;
     }
@@ -951,7 +918,10 @@
     if (!g1->is_obj_dead_cond(obj, this, vo)) {
       if (obj->is_oop()) {
         Klass* klass = obj->klass();
-        if (!klass->is_metaspace_object()) {
+        bool is_metaspace_object = Metaspace::contains(klass) ||
+                                   (vo == VerifyOption_G1UsePrevMarking &&
+                                   ClassLoaderDataGraph::unload_list_contains(klass));
+        if (!is_metaspace_object) {
           gclog_or_tty->print_cr("klass "PTR_FORMAT" of object "PTR_FORMAT" "
                                  "not metadata", klass, (void *)obj);
           *failures = true;
@@ -1065,7 +1035,9 @@
 // away eventually.
 
 void G1OffsetTableContigSpace::clear(bool mangle_space) {
-  ContiguousSpace::clear(mangle_space);
+  set_top(bottom());
+  set_saved_mark_word(bottom());
+  CompactibleSpace::clear(mangle_space);
   _offsets.zero_bottom_entry();
   _offsets.initialize_threshold();
 }
@@ -1103,10 +1075,10 @@
   if (_gc_time_stamp < g1h->get_gc_time_stamp())
     return top();
   else
-    return ContiguousSpace::saved_mark_word();
+    return Space::saved_mark_word();
 }
 
-void G1OffsetTableContigSpace::set_saved_mark() {
+void G1OffsetTableContigSpace::record_top_and_timestamp() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   unsigned curr_gc_time_stamp = g1h->get_gc_time_stamp();
 
@@ -1118,7 +1090,7 @@
     // of region. If it does so after _gc_time_stamp = ..., then it
     // will pick up the right saved_mark_word() as the high water mark
     // of the region. Either way, the behaviour will be correct.
-    ContiguousSpace::set_saved_mark();
+    Space::set_saved_mark_word(top());
     OrderAccess::storestore();
     _gc_time_stamp = curr_gc_time_stamp;
     // No need to do another barrier to flush the writes above. If
@@ -1129,6 +1101,26 @@
   }
 }
 
+void G1OffsetTableContigSpace::safe_object_iterate(ObjectClosure* blk) {
+  object_iterate(blk);
+}
+
+void G1OffsetTableContigSpace::object_iterate(ObjectClosure* blk) {
+  HeapWord* p = bottom();
+  while (p < top()) {
+    if (block_is_obj(p)) {
+      blk->do_object(oop(p));
+    }
+    p += block_size(p);
+  }
+}
+
+#define block_is_always_obj(q) true
+void G1OffsetTableContigSpace::prepare_for_compaction(CompactPoint* cp) {
+  SCAN_AND_FORWARD(cp, top, block_is_always_obj, block_size);
+}
+#undef block_is_always_obj
+
 G1OffsetTableContigSpace::
 G1OffsetTableContigSpace(G1BlockOffsetSharedArray* sharedOffsetArray,
                          MemRegion mr) :
@@ -1138,7 +1130,8 @@
 {
   _offsets.set_space(this);
   // false ==> we'll do the clearing if there's clearing to be done.
-  ContiguousSpace::initialize(mr, false, SpaceDecorator::Mangle);
+  CompactibleSpace::initialize(mr, false, SpaceDecorator::Mangle);
+  _top = bottom();
   _offsets.zero_bottom_entry();
   _offsets.initialize_threshold();
 }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/heapRegion.hpp
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -25,7 +25,7 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGION_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGION_HPP
 
-#include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp"
+#include "gc_implementation/g1/g1BlockOffsetTable.hpp"
 #include "gc_implementation/g1/g1_specialized_oop_closures.hpp"
 #include "gc_implementation/g1/survRateGroup.hpp"
 #include "gc_implementation/shared/ageTable.hpp"
@@ -46,8 +46,6 @@
 // The solution is to remove this method from the definition
 // of a Space.
 
-class CompactibleSpace;
-class ContiguousSpace;
 class HeapRegionRemSet;
 class HeapRegionRemSetIterator;
 class HeapRegion;
@@ -71,7 +69,7 @@
 // in the concurrent marker used by G1 to filter remembered
 // sets.
 
-class HeapRegionDCTOC : public ContiguousSpaceDCTOC {
+class HeapRegionDCTOC : public DirtyCardToOopClosure {
 public:
   // Specification of possible DirtyCardToOopClosure filtering.
   enum FilterKind {
@@ -85,39 +83,13 @@
   FilterKind _fk;
   G1CollectedHeap* _g1;
 
-  void walk_mem_region_with_cl(MemRegion mr,
-                               HeapWord* bottom, HeapWord* top,
-                               ExtendedOopClosure* cl);
-
-  // We don't specialize this for FilteringClosure; filtering is handled by
-  // the "FilterKind" mechanism.  But we provide this to avoid a compiler
-  // warning.
-  void walk_mem_region_with_cl(MemRegion mr,
-                               HeapWord* bottom, HeapWord* top,
-                               FilteringClosure* cl) {
-    HeapRegionDCTOC::walk_mem_region_with_cl(mr, bottom, top,
-                                             (ExtendedOopClosure*)cl);
-  }
-
-  // Get the actual top of the area on which the closure will
-  // operate, given where the top is assumed to be (the end of the
-  // memory region passed to do_MemRegion) and where the object
-  // at the top is assumed to start. For example, an object may
-  // start at the top but actually extend past the assumed top,
-  // in which case the top becomes the end of the object.
-  HeapWord* get_actual_top(HeapWord* top, HeapWord* top_obj) {
-    return ContiguousSpaceDCTOC::get_actual_top(top, top_obj);
-  }
-
   // Walk the given memory region from bottom to (actual) top
   // looking for objects and applying the oop closure (_cl) to
   // them. The base implementation of this treats the area as
   // blocks, where a block may or may not be an object. Sub-
   // classes should override this to provide more accurate
   // or possibly more efficient walking.
-  void walk_mem_region(MemRegion mr, HeapWord* bottom, HeapWord* top) {
-    Filtering_DCTOC::walk_mem_region(mr, bottom, top);
-  }
+  void walk_mem_region(MemRegion mr, HeapWord* bottom, HeapWord* top);
 
 public:
   HeapRegionDCTOC(G1CollectedHeap* g1,
@@ -151,9 +123,9 @@
 // the regions anyway) and at the end of a Full GC. The current scheme
 // that uses sequential unsigned ints will fail only if we have 4b
 // evacuation pauses between two cleanups, which is _highly_ unlikely.
-
-class G1OffsetTableContigSpace: public ContiguousSpace {
+class G1OffsetTableContigSpace: public CompactibleSpace {
   friend class VMStructs;
+  HeapWord* _top;
  protected:
   G1BlockOffsetArrayContigSpace _offsets;
   Mutex _par_alloc_lock;
@@ -170,11 +142,32 @@
   G1OffsetTableContigSpace(G1BlockOffsetSharedArray* sharedOffsetArray,
                            MemRegion mr);
 
+  void set_top(HeapWord* value) { _top = value; }
+  HeapWord* top() const { return _top; }
+
+ protected:
+  HeapWord** top_addr() { return &_top; }
+  // Allocation helpers (return NULL if full).
+  inline HeapWord* allocate_impl(size_t word_size, HeapWord* end_value);
+  inline HeapWord* par_allocate_impl(size_t word_size, HeapWord* end_value);
+
+ public:
+  void reset_after_compaction() { set_top(compaction_top()); }
+
+  size_t used() const { return byte_size(bottom(), top()); }
+  size_t free() const { return byte_size(top(), end()); }
+  bool is_free_block(const HeapWord* p) const { return p >= top(); }
+
+  MemRegion used_region() const { return MemRegion(bottom(), top()); }
+
+  void object_iterate(ObjectClosure* blk);
+  void safe_object_iterate(ObjectClosure* blk);
+
   void set_bottom(HeapWord* value);
   void set_end(HeapWord* value);
 
   virtual HeapWord* saved_mark_word() const;
-  virtual void set_saved_mark();
+  void record_top_and_timestamp();
   void reset_gc_time_stamp() { _gc_time_stamp = 0; }
   unsigned get_gc_time_stamp() { return _gc_time_stamp; }
 
@@ -194,6 +187,8 @@
   HeapWord* block_start(const void* p);
   HeapWord* block_start_const(const void* p) const;
 
+  void prepare_for_compaction(CompactPoint* cp);
+
   // Add offset table update.
   virtual HeapWord* allocate(size_t word_size);
   HeapWord* par_allocate(size_t word_size);
@@ -228,10 +223,6 @@
     ContinuesHumongous
   };
 
-  // Requires that the region "mr" be dense with objects, and begin and end
-  // with an object.
-  void oops_in_mr_iterate(MemRegion mr, ExtendedOopClosure* cl);
-
   // The remembered set for this region.
   // (Might want to make this "inline" later, to avoid some alloc failure
   // issues.)
@@ -256,11 +247,9 @@
   bool _evacuation_failed;
 
   // A heap region may be a member one of a number of special subsets, each
-  // represented as linked lists through the field below.  Currently, these
-  // sets include:
+  // represented as linked lists through the field below.  Currently, there
+  // is only one set:
   //   The collection set.
-  //   The set of allocation regions used in a collection pause.
-  //   Spaces that may contain gray objects.
   HeapRegion* _next_in_special_set;
 
   // next region in the young "generation" region set
@@ -379,14 +368,15 @@
     ParMarkRootClaimValue      = 9
   };
 
-  inline HeapWord* par_allocate_no_bot_updates(size_t word_size) {
-    assert(is_young(), "we can only skip BOT updates on young regions");
-    return ContiguousSpace::par_allocate(word_size);
-  }
-  inline HeapWord* allocate_no_bot_updates(size_t word_size) {
-    assert(is_young(), "we can only skip BOT updates on young regions");
-    return ContiguousSpace::allocate(word_size);
-  }
+  // All allocated blocks are occupied by objects in a HeapRegion
+  bool block_is_obj(const HeapWord* p) const;
+
+  // Returns the object size for all valid block starts
+  // and the amount of unallocated words if called on top()
+  size_t block_size(const HeapWord* p) const;
+
+  inline HeapWord* par_allocate_no_bot_updates(size_t word_size);
+  inline HeapWord* allocate_no_bot_updates(size_t word_size);
 
   // If this region is a member of a HeapRegionSeq, the index in that
   // sequence, otherwise -1.
@@ -595,9 +585,6 @@
 
   HeapWord* orig_end() { return _orig_end; }
 
-  // Allows logical separation between objects allocated before and after.
-  void save_marks();
-
   // Reset HR stuff to default values.
   void hr_clear(bool par, bool clear_space, bool locked = false);
   void par_clear();
@@ -606,10 +593,6 @@
   HeapWord* prev_top_at_mark_start() const { return _prev_top_at_mark_start; }
   HeapWord* next_top_at_mark_start() const { return _next_top_at_mark_start; }
 
-  // Apply "cl->do_oop" to (the addresses of) all reference fields in objects
-  // allocated in the current region before the last call to "save_mark".
-  void oop_before_save_marks_iterate(ExtendedOopClosure* cl);
-
   // Note the start or end of marking. This tells the heap region
   // that the collector is about to start or has finished (concurrently)
   // marking the heap.
@@ -795,10 +778,6 @@
     _predicted_bytes_to_copy = bytes;
   }
 
-#define HeapRegion_OOP_SINCE_SAVE_MARKS_DECL(OopClosureType, nv_suffix)  \
-  virtual void oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl);
-  SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(HeapRegion_OOP_SINCE_SAVE_MARKS_DECL)
-
   virtual CompactibleSpace* next_compaction_space() const;
 
   virtual void reset_after_compaction();
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/heapRegion.inline.hpp
--- a/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -25,8 +25,49 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGION_INLINE_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGION_INLINE_HPP
 
+#include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
+#include "memory/space.hpp"
+#include "runtime/atomic.inline.hpp"
+
+// This version requires locking.
+inline HeapWord* G1OffsetTableContigSpace::allocate_impl(size_t size,
+                                                HeapWord* const end_value) {
+  HeapWord* obj = top();
+  if (pointer_delta(end_value, obj) >= size) {
+    HeapWord* new_top = obj + size;
+    set_top(new_top);
+    assert(is_aligned(obj) && is_aligned(new_top), "checking alignment");
+    return obj;
+  } else {
+    return NULL;
+  }
+}
+
+// This version is lock-free.
+inline HeapWord* G1OffsetTableContigSpace::par_allocate_impl(size_t size,
+                                                    HeapWord* const end_value) {
+  do {
+    HeapWord* obj = top();
+    if (pointer_delta(end_value, obj) >= size) {
+      HeapWord* new_top = obj + size;
+      HeapWord* result = (HeapWord*)Atomic::cmpxchg_ptr(new_top, top_addr(), obj);
+      // result can be one of two:
+      //  the old top value: the exchange succeeded
+      //  otherwise: the new value of the top is returned.
+      if (result == obj) {
+        assert(is_aligned(obj) && is_aligned(new_top), "checking alignment");
+        return obj;
+      }
+    } else {
+      return NULL;
+    }
+  } while (true);
+}
+
 inline HeapWord* G1OffsetTableContigSpace::allocate(size_t size) {
-  HeapWord* res = ContiguousSpace::allocate(size);
+  HeapWord* res = allocate_impl(size, end());
   if (res != NULL) {
     _offsets.alloc_block(res, size);
   }
@@ -38,12 +79,7 @@
 // this is used for larger LAB allocations only.
 inline HeapWord* G1OffsetTableContigSpace::par_allocate(size_t size) {
   MutexLocker x(&_par_alloc_lock);
-  // Given that we take the lock no need to use par_allocate() here.
-  HeapWord* res = ContiguousSpace::allocate(size);
-  if (res != NULL) {
-    _offsets.alloc_block(res, size);
-  }
-  return res;
+  return allocate(size);
 }
 
 inline HeapWord* G1OffsetTableContigSpace::block_start(const void* p) {
@@ -55,6 +91,52 @@
   return _offsets.block_start_const(p);
 }
 
+inline bool
+HeapRegion::block_is_obj(const HeapWord* p) const {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  if (ClassUnloadingWithConcurrentMark) {
+    return !g1h->is_obj_dead(oop(p), this);
+  }
+  return p < top();
+}
+
+inline size_t
+HeapRegion::block_size(const HeapWord *addr) const {
+  if (addr == top()) {
+    return pointer_delta(end(), addr);
+  }
+
+  if (block_is_obj(addr)) {
+    return oop(addr)->size();
+  }
+
+  assert(ClassUnloadingWithConcurrentMark,
+      err_msg("All blocks should be objects if G1 Class Unloading isn't used. "
+              "HR: ["PTR_FORMAT", "PTR_FORMAT", "PTR_FORMAT") "
+              "addr: " PTR_FORMAT,
+              p2i(bottom()), p2i(top()), p2i(end()), p2i(addr)));
+
+  // Old regions' dead objects may have dead classes
+  // We need to find the next live object in some other
+  // manner than getting the oop size
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  HeapWord* next = g1h->concurrent_mark()->prevMarkBitMap()->
+      getNextMarkedWordAddress(addr, prev_top_at_mark_start());
+
+  assert(next > addr, "must get the next live object");
+  return pointer_delta(next, addr);
+}
+
+inline HeapWord* HeapRegion::par_allocate_no_bot_updates(size_t word_size) {
+  assert(is_young(), "we can only skip BOT updates on young regions");
+  return par_allocate_impl(word_size, end());
+}
+
+inline HeapWord* HeapRegion::allocate_no_bot_updates(size_t word_size) {
+  assert(is_young(), "we can only skip BOT updates on young regions");
+  return allocate_impl(word_size, end());
+}
+
 inline void HeapRegion::note_start_of_marking() {
   _next_marked_bytes = 0;
   _next_top_at_mark_start = top();
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -169,7 +169,7 @@
 
   // Mem size in bytes.
   size_t mem_size() const {
-    return sizeof(this) + _bm.size_in_words() * HeapWordSize;
+    return sizeof(PerRegionTable) + _bm.size_in_words() * HeapWordSize;
   }
 
   // Requires "from" to be in "hr()".
@@ -694,6 +694,9 @@
   clear_fcc();
 }
 
+bool OtherRegionsTable::is_empty() const {
+  return occ_sparse() == 0 && occ_coarse() == 0 && _first_all_fine_prts == NULL;
+}
 
 size_t OtherRegionsTable::occupied() const {
   size_t sum = occ_fine();
@@ -735,7 +738,7 @@
   sum += (sizeof(PerRegionTable*) * _max_fine_entries);
   sum += (_coarse_map.size_in_words() * HeapWordSize);
   sum += (_sparse_table.mem_size());
-  sum += sizeof(*this) - sizeof(_sparse_table); // Avoid double counting above.
+  sum += sizeof(OtherRegionsTable) - sizeof(_sparse_table); // Avoid double counting above.
   return sum;
 }
 
@@ -770,30 +773,6 @@
   clear_fcc();
 }
 
-void OtherRegionsTable::clear_incoming_entry(HeapRegion* from_hr) {
-  MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
-  size_t hrs_ind = (size_t) from_hr->hrs_index();
-  size_t ind = hrs_ind & _mod_max_fine_entries_mask;
-  if (del_single_region_table(ind, from_hr)) {
-    assert(!_coarse_map.at(hrs_ind), "Inv");
-  } else {
-    _coarse_map.par_at_put(hrs_ind, 0);
-  }
-  // Check to see if any of the fcc entries come from here.
-  uint hr_ind = hr()->hrs_index();
-  for (uint tid = 0; tid < HeapRegionRemSet::num_par_rem_sets(); tid++) {
-    int fcc_ent = FromCardCache::at(tid, hr_ind);
-    if (fcc_ent != FromCardCache::InvalidCard) {
-      HeapWord* card_addr = (HeapWord*)
-        (uintptr_t(fcc_ent) << CardTableModRefBS::card_shift);
-      if (hr()->is_in_reserved(card_addr)) {
-        // Clear the from card cache.
-        FromCardCache::set(tid, hr_ind, FromCardCache::InvalidCard);
-      }
-    }
-  }
-}
-
 bool OtherRegionsTable::del_single_region_table(size_t ind,
                                                 HeapRegion* hr) {
   assert(0 <= ind && ind < _max_fine_entries, "Preconditions.");
@@ -953,7 +932,10 @@
 
 void HeapRegionRemSet::remove_strong_code_root(nmethod* nm) {
   assert(nm != NULL, "sanity");
-  _code_roots.remove(nm);
+  assert_locked_or_safepoint(CodeCache_lock);
+
+  _code_roots.remove_lock_free(nm);
+
   // Check that there were no duplicates
   guarantee(!_code_roots.contains(nm), "duplicate entry found");
 }
@@ -1048,20 +1030,16 @@
   return _code_roots.mem_size();
 }
 
-//-------------------- Iteration --------------------
-
 HeapRegionRemSetIterator:: HeapRegionRemSetIterator(HeapRegionRemSet* hrrs) :
   _hrrs(hrrs),
   _g1h(G1CollectedHeap::heap()),
   _coarse_map(&hrrs->_other_regions._coarse_map),
-  _fine_grain_regions(hrrs->_other_regions._fine_grain_regions),
   _bosa(hrrs->bosa()),
   _is(Sparse),
   // Set these values so that we increment to the first region.
   _coarse_cur_region_index(-1),
   _coarse_cur_region_cur_card(HeapRegion::CardsPerRegion-1),
-  _cur_region_cur_card(0),
-  _fine_array_index(-1),
+  _cur_card_in_prt(HeapRegion::CardsPerRegion),
   _fine_cur_prt(NULL),
   _n_yielded_coarse(0),
   _n_yielded_fine(0),
@@ -1093,58 +1071,59 @@
   return true;
 }
 
-void HeapRegionRemSetIterator::fine_find_next_non_null_prt() {
-  // Otherwise, find the next bucket list in the array.
-  _fine_array_index++;
-  while (_fine_array_index < (int) OtherRegionsTable::_max_fine_entries) {
-    _fine_cur_prt = _fine_grain_regions[_fine_array_index];
-    if (_fine_cur_prt != NULL) return;
-    else _fine_array_index++;
-  }
-  assert(_fine_cur_prt == NULL, "Loop post");
-}
-
 bool HeapRegionRemSetIterator::fine_has_next(size_t& card_index) {
   if (fine_has_next()) {
-    _cur_region_cur_card =
-      _fine_cur_prt->_bm.get_next_one_offset(_cur_region_cur_card + 1);
+    _cur_card_in_prt =
+      _fine_cur_prt->_bm.get_next_one_offset(_cur_card_in_prt + 1);
   }
-  while (!fine_has_next()) {
-    if (_cur_region_cur_card == (size_t) HeapRegion::CardsPerRegion) {
-      _cur_region_cur_card = 0;
-      _fine_cur_prt = _fine_cur_prt->collision_list_next();
+  if (_cur_card_in_prt == HeapRegion::CardsPerRegion) {
+    // _fine_cur_prt may still be NULL in case if there are not PRTs at all for
+    // the remembered set.
+    if (_fine_cur_prt == NULL || _fine_cur_prt->next() == NULL) {
+      return false;
     }
-    if (_fine_cur_prt == NULL) {
-      fine_find_next_non_null_prt();
-      if (_fine_cur_prt == NULL) return false;
-    }
-    assert(_fine_cur_prt != NULL && _cur_region_cur_card == 0,
-           "inv.");
-    HeapWord* r_bot =
-      _fine_cur_prt->hr()->bottom();
-    _cur_region_card_offset = _bosa->index_for(r_bot);
-    _cur_region_cur_card = _fine_cur_prt->_bm.get_next_one_offset(0);
+    PerRegionTable* next_prt = _fine_cur_prt->next();
+    switch_to_prt(next_prt);
+    _cur_card_in_prt = _fine_cur_prt->_bm.get_next_one_offset(_cur_card_in_prt + 1);
   }
-  assert(fine_has_next(), "Or else we exited the loop via the return.");
-  card_index = _cur_region_card_offset + _cur_region_cur_card;
+
+  card_index = _cur_region_card_offset + _cur_card_in_prt;
+  guarantee(_cur_card_in_prt < HeapRegion::CardsPerRegion,
+            err_msg("Card index "SIZE_FORMAT" must be within the region", _cur_card_in_prt));
   return true;
 }
 
 bool HeapRegionRemSetIterator::fine_has_next() {
-  return
-    _fine_cur_prt != NULL &&
-    _cur_region_cur_card < HeapRegion::CardsPerRegion;
+  return _cur_card_in_prt != HeapRegion::CardsPerRegion;
+}
+
+void HeapRegionRemSetIterator::switch_to_prt(PerRegionTable* prt) {
+  assert(prt != NULL, "Cannot switch to NULL prt");
+  _fine_cur_prt = prt;
+
+  HeapWord* r_bot = _fine_cur_prt->hr()->bottom();
+  _cur_region_card_offset = _bosa->index_for(r_bot);
+
+  // The bitmap scan for the PRT always scans from _cur_region_cur_card + 1.
+  // To avoid special-casing this start case, and not miss the first bitmap
+  // entry, initialize _cur_region_cur_card with -1 instead of 0.
+  _cur_card_in_prt = (size_t)-1;
 }
 
 bool HeapRegionRemSetIterator::has_next(size_t& card_index) {
   switch (_is) {
-  case Sparse:
+  case Sparse: {
     if (_sparse_iter.has_next(card_index)) {
       _n_yielded_sparse++;
       return true;
     }
     // Otherwise, deliberate fall-through
     _is = Fine;
+    PerRegionTable* initial_fine_prt = _hrrs->_other_regions._first_all_fine_prts;
+    if (initial_fine_prt != NULL) {
+      switch_to_prt(_hrrs->_other_regions._first_all_fine_prts);
+    }
+  }
   case Fine:
     if (fine_has_next(card_index)) {
       _n_yielded_fine++;
@@ -1276,6 +1255,11 @@
 #ifndef PRODUCT
 void PerRegionTable::test_fl_mem_size() {
   PerRegionTable* dummy = alloc(NULL);
+
+  size_t min_prt_size = sizeof(void*) + dummy->bm()->size_in_words() * HeapWordSize;
+  assert(dummy->mem_size() > min_prt_size,
+         err_msg("PerRegionTable memory usage is suspiciously small, only has "SIZE_FORMAT" bytes. "
+                 "Should be at least "SIZE_FORMAT" bytes.", dummy->mem_size(), min_prt_size));
   free(dummy);
   guarantee(dummy->mem_size() == fl_mem_size(), "fl_mem_size() does not return the correct element size");
   // try to reset the state
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -185,6 +185,9 @@
   // objects.
   void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm);
 
+  // Returns whether this remembered set (and all sub-sets) contain no entries.
+  bool is_empty() const;
+
   size_t occupied() const;
   size_t occ_fine() const;
   size_t occ_coarse() const;
@@ -206,9 +209,6 @@
   // Specifically clear the from_card_cache.
   void clear_fcc();
 
-  // "from_hr" is being cleared; remove any entries from it.
-  void clear_incoming_entry(HeapRegion* from_hr);
-
   void do_cleanup_work(HRRSCleanupTask* hrrs_cleanup_task);
 
   // Declare the heap size (in # of regions) to the OtherRegionsTable.
@@ -272,6 +272,10 @@
     return _other_regions.hr();
   }
 
+  bool is_empty() const {
+    return (strong_code_roots_list_length() == 0) && _other_regions.is_empty();
+  }
+
   size_t occupied() {
     MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
     return occupied_locked();
@@ -342,20 +346,20 @@
     return _other_regions.mem_size()
       // This correction is necessary because the above includes the second
       // part.
-      + (sizeof(this) - sizeof(OtherRegionsTable))
+      + (sizeof(HeapRegionRemSet) - sizeof(OtherRegionsTable))
       + strong_code_roots_mem_size();
   }
 
   // Returns the memory occupancy of all static data structures associated
   // with remembered sets.
   static size_t static_mem_size() {
-    return OtherRegionsTable::static_mem_size() + G1CodeRootSet::static_mem_size();
+    return OtherRegionsTable::static_mem_size() + G1CodeRootSet::free_chunks_static_mem_size();
   }
 
   // Returns the memory occupancy of all free_list data structures associated
   // with remembered sets.
   static size_t fl_mem_size() {
-    return OtherRegionsTable::fl_mem_size() + G1CodeRootSet::fl_mem_size();
+    return OtherRegionsTable::fl_mem_size() + G1CodeRootSet::free_chunks_mem_size();
   }
 
   bool contains_reference(OopOrNarrowOopStar from) const {
@@ -378,7 +382,7 @@
   void strong_code_roots_do(CodeBlobClosure* blk) const;
 
   // Returns the number of elements in the strong code roots list
-  size_t strong_code_roots_list_length() {
+  size_t strong_code_roots_list_length() const {
     return _code_roots.length();
   }
 
@@ -400,7 +404,6 @@
   // Declare the heap size (in # of regions) to the HeapRegionRemSet(s).
   // (Uses it to initialize from_card_cache).
   static void init_heap(uint max_regions) {
-    G1CodeRootSet::initialize();
     OtherRegionsTable::init_from_card_cache(max_regions);
   }
 
@@ -433,26 +436,24 @@
 };
 
 class HeapRegionRemSetIterator : public StackObj {
-
-  // The region RSet over which we're iterating.
+ private:
+  // The region RSet over which we are iterating.
   HeapRegionRemSet* _hrrs;
 
   // Local caching of HRRS fields.
   const BitMap*             _coarse_map;
-  PerRegionTable**          _fine_grain_regions;
 
   G1BlockOffsetSharedArray* _bosa;
   G1CollectedHeap*          _g1h;
 
-  // The number yielded since initialization.
+  // The number of cards yielded since initialization.
   size_t _n_yielded_fine;
   size_t _n_yielded_coarse;
   size_t _n_yielded_sparse;
 
-  // Indicates what granularity of table that we're currently iterating over.
+  // Indicates what granularity of table that we are currently iterating over.
   // We start iterating over the sparse table, progress to the fine grain
   // table, and then finish with the coarse table.
-  // See HeapRegionRemSetIterator::has_next().
   enum IterState {
     Sparse,
     Fine,
@@ -460,38 +461,30 @@
   };
   IterState _is;
 
-  // In both kinds of iteration, heap offset of first card of current
-  // region.
+  // For both Coarse and Fine remembered set iteration this contains the
+  // first card number of the heap region we currently iterate over.
   size_t _cur_region_card_offset;
-  // Card offset within cur region.
-  size_t _cur_region_cur_card;
 
-  // Coarse table iteration fields:
-
-  // Current region index;
+  // Current region index for the Coarse remembered set iteration.
   int    _coarse_cur_region_index;
   size_t _coarse_cur_region_cur_card;
 
   bool coarse_has_next(size_t& card_index);
 
-  // Fine table iteration fields:
-
-  // Index of bucket-list we're working on.
-  int _fine_array_index;
+  // The PRT we are currently iterating over.
+  PerRegionTable* _fine_cur_prt;
+  // Card offset within the current PRT.
+  size_t _cur_card_in_prt;
 
-  // Per Region Table we're doing within current bucket list.
-  PerRegionTable* _fine_cur_prt;
-
-  /* SparsePRT::*/ SparsePRTIter _sparse_iter;
-
-  void fine_find_next_non_null_prt();
-
+  // Update internal variables when switching to the given PRT.
+  void switch_to_prt(PerRegionTable* prt);
   bool fine_has_next();
   bool fine_has_next(size_t& card_index);
 
-public:
-  // We require an iterator to be initialized before use, so the
-  // constructor does little.
+  // The Sparse remembered set iterator.
+  SparsePRTIter _sparse_iter;
+
+ public:
   HeapRegionRemSetIterator(HeapRegionRemSet* hrrs);
 
   // If there remains one or more cards to be yielded, returns true and
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/heapRegionSet.hpp
--- a/src/share/vm/gc_implementation/g1/heapRegionSet.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/heapRegionSet.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -119,7 +119,7 @@
 public:
   const char* name() { return _name; }
 
-  uint length() { return _count.length(); }
+  uint length() const { return _count.length(); }
 
   bool is_empty() { return _count.length() == 0; }
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/satbQueue.cpp
--- a/src/share/vm/gc_implementation/g1/satbQueue.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -285,37 +285,6 @@
   _par_closures[i] = par_closure;
 }
 
-void SATBMarkQueueSet::iterate_closure_all_threads() {
-  for(JavaThread* t = Threads::first(); t; t = t->next()) {
-    t->satb_mark_queue().apply_closure_and_empty(_closure);
-  }
-  shared_satb_queue()->apply_closure_and_empty(_closure);
-}
-
-void SATBMarkQueueSet::par_iterate_closure_all_threads(uint worker) {
-  SharedHeap* sh = SharedHeap::heap();
-  int parity = sh->strong_roots_parity();
-
-  for(JavaThread* t = Threads::first(); t; t = t->next()) {
-    if (t->claim_oops_do(true, parity)) {
-      t->satb_mark_queue().apply_closure_and_empty(_par_closures[worker]);
-    }
-  }
-
-  // We also need to claim the VMThread so that its parity is updated
-  // otherwise the next call to Thread::possibly_parallel_oops_do inside
-  // a StrongRootsScope might skip the VMThread because it has a stale
-  // parity that matches the parity set by the StrongRootsScope
-  //
-  // Whichever worker succeeds in claiming the VMThread gets to do
-  // the shared queue.
-
-  VMThread* vmt = VMThread::vm_thread();
-  if (vmt->claim_oops_do(true, parity)) {
-    shared_satb_queue()->apply_closure_and_empty(_par_closures[worker]);
-  }
-}
-
 bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par,
                                                               uint worker) {
   BufferNode* nd = NULL;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/satbQueue.hpp
--- a/src/share/vm/gc_implementation/g1/satbQueue.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -33,7 +33,9 @@
 
 // A ptrQueue whose elements are "oops", pointers to object heads.
 class ObjPtrQueue: public PtrQueue {
+  friend class Threads;
   friend class SATBMarkQueueSet;
+  friend class G1RemarkThreadsClosure;
 
 private:
   // Filter out unwanted entries from the buffer.
@@ -119,13 +121,6 @@
   // closures, one for each parallel GC thread.
   void set_par_closure(int i, ObjectClosure* closure);
 
-  // Apply the registered closure to all entries on each
-  // currently-active buffer and then empty the buffer. It should only
-  // be called serially and at a safepoint.
-  void iterate_closure_all_threads();
-  // Parallel version of the above.
-  void par_iterate_closure_all_threads(uint worker);
-
   // If there exists some completed buffer, pop it, then apply the
   // registered closure to all its elements, and return true.  If no
   // completed buffers exist, return false.
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/sparsePRT.cpp
--- a/src/share/vm/gc_implementation/g1/sparsePRT.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/sparsePRT.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -370,7 +370,7 @@
 }
 
 size_t RSHashTable::mem_size() const {
-  return sizeof(this) +
+  return sizeof(RSHashTable) +
     capacity() * (SparsePRTEntry::size() + sizeof(int));
 }
 
@@ -472,7 +472,7 @@
 size_t SparsePRT::mem_size() const {
   // We ignore "_cur" here, because it either = _next, or else it is
   // on the deleted list.
-  return sizeof(this) + _next->mem_size();
+  return sizeof(SparsePRT) + _next->mem_size();
 }
 
 bool SparsePRT::add_card(RegionIdx_t region_id, CardIdx_t card_index) {
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/vmStructs_g1.hpp
--- a/src/share/vm/gc_implementation/g1/vmStructs_g1.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/vmStructs_g1.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -34,6 +34,8 @@
   static_field(HeapRegion, GrainBytes,        size_t)                         \
   static_field(HeapRegion, LogOfHRGrainBytes, int)                            \
                                                                               \
+  nonstatic_field(G1OffsetTableContigSpace, _top,       HeapWord*)            \
+                                                                              \
   nonstatic_field(G1HeapRegionTable, _base,             address)              \
   nonstatic_field(G1HeapRegionTable, _length,           size_t)               \
   nonstatic_field(G1HeapRegionTable, _biased_base,      address)              \
@@ -69,7 +71,8 @@
                                                                               \
   declare_type(G1CollectedHeap, SharedHeap)                                   \
                                                                               \
-  declare_type(HeapRegion, ContiguousSpace)                                   \
+  declare_type(G1OffsetTableContigSpace, CompactibleSpace)                    \
+  declare_type(HeapRegion, G1OffsetTableContigSpace)                          \
   declare_toplevel_type(HeapRegionSeq)                                        \
   declare_toplevel_type(HeapRegionSetBase)                                    \
   declare_toplevel_type(HeapRegionSetCount)                                   \
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/g1/vm_operations_g1.cpp
--- a/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -226,7 +226,7 @@
 void VM_CGC_Operation::doit() {
   gclog_or_tty->date_stamp(G1Log::fine() && PrintGCDateStamps);
   TraceCPUTime tcpu(G1Log::finer(), true, gclog_or_tty);
-  GCTraceTime t(_printGCMessage, G1Log::fine(), true, G1CollectedHeap::heap()->gc_timer_cm());
+  GCTraceTime t(_printGCMessage, G1Log::fine(), true, G1CollectedHeap::heap()->gc_timer_cm(), G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id());
   SharedHeap* sh = SharedHeap::heap();
   // This could go away if CollectedHeap gave access to _gc_is_active...
   if (sh != NULL) {
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp
--- a/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -32,6 +32,7 @@
 #include "oops/oop.inline.hpp"
 #include "runtime/java.hpp"
 #include "runtime/mutexLocker.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/virtualspace.hpp"
 #include "runtime/vmThread.hpp"
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -28,12 +28,12 @@
 #include "gc_implementation/parNew/parOopClosures.inline.hpp"
 #include "gc_implementation/shared/adaptiveSizePolicy.hpp"
 #include "gc_implementation/shared/ageTable.hpp"
-#include "gc_implementation/shared/parGCAllocBuffer.hpp"
+#include "gc_implementation/shared/copyFailedInfo.hpp"
 #include "gc_implementation/shared/gcHeapSummary.hpp"
 #include "gc_implementation/shared/gcTimer.hpp"
 #include "gc_implementation/shared/gcTrace.hpp"
 #include "gc_implementation/shared/gcTraceTime.hpp"
-#include "gc_implementation/shared/copyFailedInfo.hpp"
+#include "gc_implementation/shared/parGCAllocBuffer.inline.hpp"
 #include "gc_implementation/shared/spaceDecorator.hpp"
 #include "memory/defNewGeneration.inline.hpp"
 #include "memory/genCollectedHeap.hpp"
@@ -50,7 +50,7 @@
 #include "runtime/handles.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/java.hpp"
-#include "runtime/thread.hpp"
+#include "runtime/thread.inline.hpp"
 #include "utilities/copy.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/workgroup.hpp"
@@ -251,7 +251,7 @@
         plab->set_word_size(buf_size);
         plab->set_buf(buf_space);
         record_survivor_plab(buf_space, buf_size);
-        obj = plab->allocate(word_sz);
+        obj = plab->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
         // Note that we cannot compare buf_size < word_sz below
         // because of AlignmentReserve (see ParGCAllocBuffer::allocate()).
         assert(obj != NULL || plab->words_remaining() < word_sz,
@@ -613,20 +613,21 @@
 
   KlassScanClosure klass_scan_closure(&par_scan_state.to_space_root_closure(),
                                       gch->rem_set()->klass_rem_set());
-
-  int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;
+  CLDToKlassAndOopClosure cld_scan_closure(&klass_scan_closure,
+                                           &par_scan_state.to_space_root_closure(),
+                                           false);
 
   par_scan_state.start_strong_roots();
-  gch->gen_process_strong_roots(_gen->level(),
-                                true,  // Process younger gens, if any,
-                                       // as strong roots.
-                                false, // no scope; this is parallel code
-                                true,  // is scavenging
-                                SharedHeap::ScanningOption(so),
-                                &par_scan_state.to_space_root_closure(),
-                                true,   // walk *all* scavengable nmethods
-                                &par_scan_state.older_gen_closure(),
-                                &klass_scan_closure);
+  gch->gen_process_roots(_gen->level(),
+                         true,  // Process younger gens, if any,
+                                // as strong roots.
+                         false, // no scope; this is parallel code
+                         SharedHeap::SO_ScavengeCodeCache,
+                         GenCollectedHeap::StrongAndWeakRoots,
+                         &par_scan_state.to_space_root_closure(),
+                         &par_scan_state.older_gen_closure(),
+                         &cld_scan_closure);
+
   par_scan_state.end_strong_roots();
 
   // "evacuate followers".
@@ -957,7 +958,7 @@
     size_policy->minor_collection_begin();
   }
 
-  GCTraceTime t1(GCCauseString("GC", gch->gc_cause()), PrintGC && !PrintGCDetails, true, NULL);
+  GCTraceTime t1(GCCauseString("GC", gch->gc_cause()), PrintGC && !PrintGCDetails, true, NULL, gc_tracer.gc_id());
   // Capture heap used before collection (for printing).
   size_t gch_prev_used = gch->used();
 
@@ -1015,14 +1016,14 @@
     ParNewRefProcTaskExecutor task_executor(*this, thread_state_set);
     stats = rp->process_discovered_references(&is_alive, &keep_alive,
                                               &evacuate_followers, &task_executor,
-                                              _gc_timer);
+                                              _gc_timer, gc_tracer.gc_id());
   } else {
     thread_state_set.flush();
     gch->set_par_threads(0);  // 0 ==> non-parallel.
     gch->save_marks();
     stats = rp->process_discovered_references(&is_alive, &keep_alive,
                                               &evacuate_followers, NULL,
-                                              _gc_timer);
+                                              _gc_timer, gc_tracer.gc_id());
   }
   gc_tracer.report_gc_reference_stats(stats);
   if (!promotion_failed()) {
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/parNew/parNewGeneration.hpp
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -69,7 +69,7 @@
   ParScanWithoutBarrierClosure         _to_space_closure; // scan_without_gc_barrier
   ParScanWithBarrierClosure            _old_gen_closure; // scan_with_gc_barrier
   ParRootScanWithoutBarrierClosure     _to_space_root_closure; // scan_root_without_gc_barrier
-  // One of these two will be passed to process_strong_roots, which will
+  // One of these two will be passed to process_roots, which will
   // set its generation.  The first is for two-gen configs where the
   // old gen collects the perm gen; the second is for arbitrary configs.
   // The second isn't used right now (it used to be used for the train, an
@@ -168,7 +168,7 @@
   HeapWord* alloc_in_to_space_slow(size_t word_sz);
 
   HeapWord* alloc_in_to_space(size_t word_sz) {
-    HeapWord* obj = to_space_alloc_buffer()->allocate(word_sz);
+    HeapWord* obj = to_space_alloc_buffer()->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
     if (obj != NULL) return obj;
     else return alloc_in_to_space_slow(word_sz);
   }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp
--- a/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -30,6 +30,7 @@
 #include "gc_implementation/parallelScavenge/psYoungGen.hpp"
 #include "oops/oop.inline.hpp"
 #include "oops/oop.psgc.inline.hpp"
+#include "runtime/prefetch.inline.hpp"
 
 // Checks an individual oop for missing precise marks. Mark
 // may be either dirty or newgen.
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.cpp
--- a/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -30,6 +30,7 @@
 #include "memory/allocation.inline.hpp"
 #include "runtime/mutex.hpp"
 #include "runtime/mutexLocker.hpp"
+#include "runtime/orderAccess.inline.hpp"
 
 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp
--- a/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -71,7 +71,7 @@
   if (_virtual_space != NULL && _virtual_space->expand_by(_reserved_byte_size)) {
     _region_start = covered_region.start();
     _region_size = covered_region.word_size();
-    idx_t* map = (idx_t*)_virtual_space->reserved_low_addr();
+    BitMap::bm_word_t* map = (BitMap::bm_word_t*)_virtual_space->reserved_low_addr();
     _beg_bits.set_map(map);
     _beg_bits.set_size(bits / 2);
     _end_bits.set_map(map + words / 2);
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp
--- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -53,13 +53,13 @@
   ResourceMark rm;
 
   NOT_PRODUCT(GCTraceTime tm("ThreadRootsMarkingTask",
-    PrintGCDetails && TraceParallelOldGCTasks, true, NULL));
+    PrintGCDetails && TraceParallelOldGCTasks, true, NULL, PSParallelCompact::gc_tracer()->gc_id()));
   ParCompactionManager* cm =
     ParCompactionManager::gc_thread_compaction_manager(which);
 
   PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
   CLDToOopClosure mark_and_push_from_clds(&mark_and_push_closure, true);
-  CodeBlobToOopClosure mark_and_push_in_blobs(&mark_and_push_closure, /*do_marking=*/ true);
+  MarkingCodeBlobClosure mark_and_push_in_blobs(&mark_and_push_closure, !CodeBlobToOopClosure::FixRelocations);
 
   if (_java_thread != NULL)
     _java_thread->oops_do(
@@ -82,7 +82,7 @@
   assert(Universe::heap()->is_gc_active(), "called outside gc");
 
   NOT_PRODUCT(GCTraceTime tm("MarkFromRootsTask",
-    PrintGCDetails && TraceParallelOldGCTasks, true, NULL));
+    PrintGCDetails && TraceParallelOldGCTasks, true, NULL, PSParallelCompact::gc_tracer()->gc_id()));
   ParCompactionManager* cm =
     ParCompactionManager::gc_thread_compaction_manager(which);
   PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
@@ -100,7 +100,7 @@
     case threads:
     {
       ResourceMark rm;
-      CodeBlobToOopClosure each_active_code_blob(&mark_and_push_closure, /*do_marking=*/ true);
+      MarkingCodeBlobClosure each_active_code_blob(&mark_and_push_closure, !CodeBlobToOopClosure::FixRelocations);
       CLDToOopClosure mark_and_push_from_cld(&mark_and_push_closure);
       Threads::oops_do(&mark_and_push_closure, &mark_and_push_from_cld, &each_active_code_blob);
     }
@@ -153,7 +153,7 @@
   assert(Universe::heap()->is_gc_active(), "called outside gc");
 
   NOT_PRODUCT(GCTraceTime tm("RefProcTask",
-    PrintGCDetails && TraceParallelOldGCTasks, true, NULL));
+    PrintGCDetails && TraceParallelOldGCTasks, true, NULL, PSParallelCompact::gc_tracer()->gc_id()));
   ParCompactionManager* cm =
     ParCompactionManager::gc_thread_compaction_manager(which);
   PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
@@ -209,7 +209,7 @@
   assert(Universe::heap()->is_gc_active(), "called outside gc");
 
   NOT_PRODUCT(GCTraceTime tm("StealMarkingTask",
-    PrintGCDetails && TraceParallelOldGCTasks, true, NULL));
+    PrintGCDetails && TraceParallelOldGCTasks, true, NULL, PSParallelCompact::gc_tracer()->gc_id()));
 
   ParCompactionManager* cm =
     ParCompactionManager::gc_thread_compaction_manager(which);
@@ -242,7 +242,7 @@
   assert(Universe::heap()->is_gc_active(), "called outside gc");
 
   NOT_PRODUCT(GCTraceTime tm("StealRegionCompactionTask",
-    PrintGCDetails && TraceParallelOldGCTasks, true, NULL));
+    PrintGCDetails && TraceParallelOldGCTasks, true, NULL, PSParallelCompact::gc_tracer()->gc_id()));
 
   ParCompactionManager* cm =
     ParCompactionManager::gc_thread_compaction_manager(which);
@@ -309,7 +309,7 @@
 void UpdateDensePrefixTask::do_it(GCTaskManager* manager, uint which) {
 
   NOT_PRODUCT(GCTraceTime tm("UpdateDensePrefixTask",
-    PrintGCDetails && TraceParallelOldGCTasks, true, NULL));
+    PrintGCDetails && TraceParallelOldGCTasks, true, NULL, PSParallelCompact::gc_tracer()->gc_id()));
 
   ParCompactionManager* cm =
     ParCompactionManager::gc_thread_compaction_manager(which);
@@ -324,7 +324,7 @@
   assert(Universe::heap()->is_gc_active(), "called outside gc");
 
   NOT_PRODUCT(GCTraceTime tm("DrainStacksCompactionTask",
-    PrintGCDetails && TraceParallelOldGCTasks, true, NULL));
+    PrintGCDetails && TraceParallelOldGCTasks, true, NULL, PSParallelCompact::gc_tracer()->gc_id()));
 
   ParCompactionManager* cm =
     ParCompactionManager::gc_thread_compaction_manager(which);
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp
--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -169,7 +169,7 @@
 
     gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
-    GCTraceTime t1(GCCauseString("Full GC", gc_cause), PrintGC, !PrintGCDetails, NULL);
+    GCTraceTime t1(GCCauseString("Full GC", gc_cause), PrintGC, !PrintGCDetails, NULL, _gc_tracer->gc_id());
     TraceCollectorStats tcs(counters());
     TraceMemoryManagerStats tms(true /* Full GC */,gc_cause);
 
@@ -513,7 +513,7 @@
 
 void PSMarkSweep::mark_sweep_phase1(bool clear_all_softrefs) {
   // Recursively traverse all live objects and mark them
-  GCTraceTime tm("phase 1", PrintGCDetails && Verbose, true, _gc_timer);
+  GCTraceTime tm("phase 1", PrintGCDetails && Verbose, true, _gc_timer, _gc_tracer->gc_id());
   trace(" 1");
 
   ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
@@ -528,14 +528,14 @@
     Universe::oops_do(mark_and_push_closure());
     JNIHandles::oops_do(mark_and_push_closure());   // Global (strong) JNI handles
     CLDToOopClosure mark_and_push_from_cld(mark_and_push_closure());
-    CodeBlobToOopClosure each_active_code_blob(mark_and_push_closure(), /*do_marking=*/ true);
+    MarkingCodeBlobClosure each_active_code_blob(mark_and_push_closure(), !CodeBlobToOopClosure::FixRelocations);
     Threads::oops_do(mark_and_push_closure(), &mark_and_push_from_cld, &each_active_code_blob);
     ObjectSynchronizer::oops_do(mark_and_push_closure());
     FlatProfiler::oops_do(mark_and_push_closure());
     Management::oops_do(mark_and_push_closure());
     JvmtiExport::oops_do(mark_and_push_closure());
     SystemDictionary::always_strong_oops_do(mark_and_push_closure());
-    ClassLoaderDataGraph::always_strong_oops_do(mark_and_push_closure(), follow_klass_closure(), true);
+    ClassLoaderDataGraph::always_strong_cld_do(follow_cld_closure());
     // Do not treat nmethods as strong roots for mark/sweep, since we can unload them.
     //CodeCache::scavenge_root_nmethods_do(CodeBlobToOopClosure(mark_and_push_closure()));
   }
@@ -548,7 +548,7 @@
     ref_processor()->setup_policy(clear_all_softrefs);
     const ReferenceProcessorStats& stats =
       ref_processor()->process_discovered_references(
-        is_alive_closure(), mark_and_push_closure(), follow_stack_closure(), NULL, _gc_timer);
+        is_alive_closure(), mark_and_push_closure(), follow_stack_closure(), NULL, _gc_timer, _gc_tracer->gc_id());
     gc_tracer()->report_gc_reference_stats(stats);
   }
 
@@ -574,7 +574,7 @@
 
 
 void PSMarkSweep::mark_sweep_phase2() {
-  GCTraceTime tm("phase 2", PrintGCDetails && Verbose, true, _gc_timer);
+  GCTraceTime tm("phase 2", PrintGCDetails && Verbose, true, _gc_timer, _gc_tracer->gc_id());
   trace("2");
 
   // Now all live objects are marked, compute the new object addresses.
@@ -604,7 +604,7 @@
 
 void PSMarkSweep::mark_sweep_phase3() {
   // Adjust the pointers to reflect the new locations
-  GCTraceTime tm("phase 3", PrintGCDetails && Verbose, true, _gc_timer);
+  GCTraceTime tm("phase 3", PrintGCDetails && Verbose, true, _gc_timer, _gc_tracer->gc_id());
   trace("3");
 
   ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
@@ -625,16 +625,16 @@
   FlatProfiler::oops_do(adjust_pointer_closure());
   Management::oops_do(adjust_pointer_closure());
   JvmtiExport::oops_do(adjust_pointer_closure());
-  // SO_AllClasses
   SystemDictionary::oops_do(adjust_pointer_closure());
-  ClassLoaderDataGraph::oops_do(adjust_pointer_closure(), adjust_klass_closure(), true);
+  ClassLoaderDataGraph::cld_do(adjust_cld_closure());
 
   // Now adjust pointers in remaining weak roots.  (All of which should
   // have been cleared if they pointed to non-surviving objects.)
   // Global (weak) JNI handles
   JNIHandles::weak_oops_do(&always_true, adjust_pointer_closure());
 
-  CodeCache::oops_do(adjust_pointer_closure());
+  CodeBlobToOopClosure adjust_from_blobs(adjust_pointer_closure(), CodeBlobToOopClosure::FixRelocations);
+  CodeCache::blobs_do(&adjust_from_blobs);
   StringTable::oops_do(adjust_pointer_closure());
   ref_processor()->weak_oops_do(adjust_pointer_closure());
   PSScavenge::reference_processor()->weak_oops_do(adjust_pointer_closure());
@@ -647,7 +647,7 @@
 
 void PSMarkSweep::mark_sweep_phase4() {
   EventMark m("4 compact heap");
-  GCTraceTime tm("phase 4", PrintGCDetails && Verbose, true, _gc_timer);
+  GCTraceTime tm("phase 4", PrintGCDetails && Verbose, true, _gc_timer, _gc_tracer->gc_id());
   trace("4");
 
   // All pointers are now adjusted, move objects accordingly
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.hpp
--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -40,11 +40,11 @@
   static CollectorCounters*  _counters;
 
   // Closure accessors
-  static OopClosure* mark_and_push_closure() { return &MarkSweep::mark_and_push_closure; }
-  static KlassClosure* follow_klass_closure() { return &MarkSweep::follow_klass_closure; }
-  static VoidClosure* follow_stack_closure() { return (VoidClosure*)&MarkSweep::follow_stack_closure; }
-  static OopClosure* adjust_pointer_closure() { return (OopClosure*)&MarkSweep::adjust_pointer_closure; }
-  static KlassClosure* adjust_klass_closure() { return &MarkSweep::adjust_klass_closure; }
+  static OopClosure* mark_and_push_closure()   { return &MarkSweep::mark_and_push_closure; }
+  static VoidClosure* follow_stack_closure()   { return (VoidClosure*)&MarkSweep::follow_stack_closure; }
+  static CLDClosure* follow_cld_closure()      { return &MarkSweep::follow_cld_closure; }
+  static OopClosure* adjust_pointer_closure()  { return (OopClosure*)&MarkSweep::adjust_pointer_closure; }
+  static CLDClosure* adjust_cld_closure()      { return &MarkSweep::adjust_cld_closure; }
   static BoolObjectClosure* is_alive_closure() { return (BoolObjectClosure*)&MarkSweep::is_alive; }
 
  debug_only(public:)  // Used for PSParallelCompact debugging
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp
--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -32,6 +32,7 @@
 #include "gc_implementation/shared/markSweep.inline.hpp"
 #include "gc_implementation/shared/spaceDecorator.hpp"
 #include "oops/oop.inline.hpp"
+#include "runtime/prefetch.inline.hpp"
 
 PSMarkSweepDecorator* PSMarkSweepDecorator::_destination_decorator = NULL;
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -978,7 +978,7 @@
   // at each young gen gc.  Do the update unconditionally (even though a
   // promotion failure does not swap spaces) because an unknown number of minor
   // collections will have swapped the spaces an unknown number of times.
-  GCTraceTime tm("pre compact", print_phases(), true, &_gc_timer);
+  GCTraceTime tm("pre compact", print_phases(), true, &_gc_timer, _gc_tracer.gc_id());
   ParallelScavengeHeap* heap = gc_heap();
   _space_info[from_space_id].set_space(heap->young_gen()->from_space());
   _space_info[to_space_id].set_space(heap->young_gen()->to_space());
@@ -1021,7 +1021,7 @@
 
 void PSParallelCompact::post_compact()
 {
-  GCTraceTime tm("post compact", print_phases(), true, &_gc_timer);
+  GCTraceTime tm("post compact", print_phases(), true, &_gc_timer, _gc_tracer.gc_id());
 
   for (unsigned int id = old_space_id; id < last_space_id; ++id) {
     // Clear the marking bitmap, summary data and split info.
@@ -1847,7 +1847,7 @@
 void PSParallelCompact::summary_phase(ParCompactionManager* cm,
                                       bool maximum_compaction)
 {
-  GCTraceTime tm("summary phase", print_phases(), true, &_gc_timer);
+  GCTraceTime tm("summary phase", print_phases(), true, &_gc_timer, _gc_tracer.gc_id());
   // trace("2");
 
 #ifdef  ASSERT
@@ -2056,7 +2056,7 @@
 
     gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
-    GCTraceTime t1(GCCauseString("Full GC", gc_cause), PrintGC, !PrintGCDetails, NULL);
+    GCTraceTime t1(GCCauseString("Full GC", gc_cause), PrintGC, !PrintGCDetails, NULL, _gc_tracer.gc_id());
     TraceCollectorStats tcs(counters());
     TraceMemoryManagerStats tms(true /* Full GC */,gc_cause);
 
@@ -2351,7 +2351,7 @@
                                       bool maximum_heap_compaction,
                                       ParallelOldTracer *gc_tracer) {
   // Recursively traverse all live objects and mark them
-  GCTraceTime tm("marking phase", print_phases(), true, &_gc_timer);
+  GCTraceTime tm("marking phase", print_phases(), true, &_gc_timer, _gc_tracer.gc_id());
 
   ParallelScavengeHeap* heap = gc_heap();
   uint parallel_gc_threads = heap->gc_task_manager()->workers();
@@ -2366,7 +2366,7 @@
   ClassLoaderDataGraph::clear_claimed_marks();
 
   {
-    GCTraceTime tm_m("par mark", print_phases(), true, &_gc_timer);
+    GCTraceTime tm_m("par mark", print_phases(), true, &_gc_timer, _gc_tracer.gc_id());
 
     ParallelScavengeHeap::ParStrongRootsScope psrs;
 
@@ -2395,24 +2395,24 @@
 
   // Process reference objects found during marking
   {
-    GCTraceTime tm_r("reference processing", print_phases(), true, &_gc_timer);
+    GCTraceTime tm_r("reference processing", print_phases(), true, &_gc_timer, _gc_tracer.gc_id());
 
     ReferenceProcessorStats stats;
     if (ref_processor()->processing_is_mt()) {
       RefProcTaskExecutor task_executor;
       stats = ref_processor()->process_discovered_references(
         is_alive_closure(), &mark_and_push_closure, &follow_stack_closure,
-        &task_executor, &_gc_timer);
+        &task_executor, &_gc_timer, _gc_tracer.gc_id());
     } else {
       stats = ref_processor()->process_discovered_references(
         is_alive_closure(), &mark_and_push_closure, &follow_stack_closure, NULL,
-        &_gc_timer);
+        &_gc_timer, _gc_tracer.gc_id());
     }
 
     gc_tracer->report_gc_reference_stats(stats);
   }
 
-  GCTraceTime tm_c("class unloading", print_phases(), true, &_gc_timer);
+  GCTraceTime tm_c("class unloading", print_phases(), true, &_gc_timer, _gc_tracer.gc_id());
 
   // This is the point where the entire marking should have completed.
   assert(cm->marking_stacks_empty(), "Marking should have completed");
@@ -2451,7 +2451,7 @@
 
 void PSParallelCompact::adjust_roots() {
   // Adjust the pointers to reflect the new locations
-  GCTraceTime tm("adjust roots", print_phases(), true, &_gc_timer);
+  GCTraceTime tm("adjust roots", print_phases(), true, &_gc_timer, _gc_tracer.gc_id());
 
   // Need new claim bits when tracing through and adjusting pointers.
   ClassLoaderDataGraph::clear_claimed_marks();
@@ -2465,7 +2465,6 @@
   FlatProfiler::oops_do(adjust_pointer_closure());
   Management::oops_do(adjust_pointer_closure());
   JvmtiExport::oops_do(adjust_pointer_closure());
-  // SO_AllClasses
   SystemDictionary::oops_do(adjust_pointer_closure());
   ClassLoaderDataGraph::oops_do(adjust_pointer_closure(), adjust_klass_closure(), true);
 
@@ -2474,7 +2473,8 @@
   // Global (weak) JNI handles
   JNIHandles::weak_oops_do(&always_true, adjust_pointer_closure());
 
-  CodeCache::oops_do(adjust_pointer_closure());
+  CodeBlobToOopClosure adjust_from_blobs(adjust_pointer_closure(), CodeBlobToOopClosure::FixRelocations);
+  CodeCache::blobs_do(&adjust_from_blobs);
   StringTable::oops_do(adjust_pointer_closure());
   ref_processor()->weak_oops_do(adjust_pointer_closure());
   // Roots were visited so references into the young gen in roots
@@ -2487,7 +2487,7 @@
 void PSParallelCompact::enqueue_region_draining_tasks(GCTaskQueue* q,
                                                       uint parallel_gc_threads)
 {
-  GCTraceTime tm("drain task setup", print_phases(), true, &_gc_timer);
+  GCTraceTime tm("drain task setup", print_phases(), true, &_gc_timer, _gc_tracer.gc_id());
 
   // Find the threads that are active
   unsigned int which = 0;
@@ -2561,7 +2561,7 @@
 
 void PSParallelCompact::enqueue_dense_prefix_tasks(GCTaskQueue* q,
                                                     uint parallel_gc_threads) {
-  GCTraceTime tm("dense prefix task setup", print_phases(), true, &_gc_timer);
+  GCTraceTime tm("dense prefix task setup", print_phases(), true, &_gc_timer, _gc_tracer.gc_id());
 
   ParallelCompactData& sd = PSParallelCompact::summary_data();
 
@@ -2643,7 +2643,7 @@
                                      GCTaskQueue* q,
                                      ParallelTaskTerminator* terminator_ptr,
                                      uint parallel_gc_threads) {
-  GCTraceTime tm("steal task setup", print_phases(), true, &_gc_timer);
+  GCTraceTime tm("steal task setup", print_phases(), true, &_gc_timer, _gc_tracer.gc_id());
 
   // Once a thread has drained it's stack, it should try to steal regions from
   // other threads.
@@ -2691,7 +2691,7 @@
 
 void PSParallelCompact::compact() {
   // trace("5");
-  GCTraceTime tm("compaction phase", print_phases(), true, &_gc_timer);
+  GCTraceTime tm("compaction phase", print_phases(), true, &_gc_timer, _gc_tracer.gc_id());
 
   ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
   assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
@@ -2708,7 +2708,7 @@
   enqueue_region_stealing_tasks(q, &terminator, active_gc_threads);
 
   {
-    GCTraceTime tm_pc("par compact", print_phases(), true, &_gc_timer);
+    GCTraceTime tm_pc("par compact", print_phases(), true, &_gc_timer, _gc_tracer.gc_id());
 
     gc_task_manager()->execute_and_wait(q);
 
@@ -2722,7 +2722,7 @@
 
   {
     // Update the deferred objects, if any.  Any compaction manager can be used.
-    GCTraceTime tm_du("deferred updates", print_phases(), true, &_gc_timer);
+    GCTraceTime tm_du("deferred updates", print_phases(), true, &_gc_timer, _gc_tracer.gc_id());
     ParCompactionManager* cm = ParCompactionManager::manager_array(0);
     for (unsigned int id = old_space_id; id < last_space_id; ++id) {
       update_deferred_objects(cm, SpaceId(id));
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1004,6 +1004,10 @@
   static bool   _dwl_initialized;
 #endif  // #ifdef ASSERT
 
+
+ public:
+  static ParallelOldTracer* gc_tracer() { return &_gc_tracer; }
+
  private:
 
   static void initialize_space_info();
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/parallelScavenge/psPromotionLAB.hpp
--- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionLAB.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionLAB.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPROMOTIONLAB_HPP
 
 #include "gc_implementation/parallelScavenge/objectStartArray.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
 #include "memory/allocation.hpp"
 
 //
@@ -94,23 +95,9 @@
   PSYoungPromotionLAB() { }
 
   // Not MT safe
-  HeapWord* allocate(size_t size) {
-    // Can't assert this, when young fills, we keep the LAB around, but flushed.
-    // assert(_state != flushed, "Sanity");
-    HeapWord* obj = top();
-    HeapWord* new_top = obj + size;
-    // The 'new_top>obj' check is needed to detect overflow of obj+size.
-    if (new_top > obj && new_top <= end()) {
-      set_top(new_top);
-      assert(is_object_aligned((intptr_t)obj) && is_object_aligned((intptr_t)new_top),
-             "checking alignment");
-      return obj;
-    }
+  inline HeapWord* allocate(size_t size);
 
-    return NULL;
-  }
-
-  debug_only(virtual bool lab_is_valid(MemRegion lab));
+  debug_only(virtual bool lab_is_valid(MemRegion lab);)
 };
 
 class PSOldPromotionLAB : public PSPromotionLAB {
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/parallelScavenge/psPromotionLAB.inline.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionLAB.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPROMOTIONLAB_INLINE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPROMOTIONLAB_INLINE_HPP
+
+#include "gc_implementation/parallelScavenge/psPromotionLAB.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+
+HeapWord* PSYoungPromotionLAB::allocate(size_t size) {
+  // Can't assert this, when young fills, we keep the LAB around, but flushed.
+  // assert(_state != flushed, "Sanity");
+  HeapWord* obj = CollectedHeap::align_allocation_or_fail(top(), end(), SurvivorAlignmentInBytes);
+  if (obj == NULL) {
+    return NULL;
+  }
+
+  HeapWord* new_top = obj + size;
+  // The 'new_top>obj' check is needed to detect overflow of obj+size.
+  if (new_top > obj && new_top <= end()) {
+    set_top(new_top);
+    assert(is_ptr_aligned(obj, SurvivorAlignmentInBytes) && is_object_aligned((intptr_t)new_top),
+           "checking alignment");
+    return obj;
+  } else {
+    set_top(obj);
+    return NULL;
+  }
+}
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPROMOTIONLAB_INLINE_HPP
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
--- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -27,6 +27,7 @@
 
 #include "gc_implementation/parallelScavenge/psOldGen.hpp"
 #include "gc_implementation/parallelScavenge/psPromotionManager.hpp"
+#include "gc_implementation/parallelScavenge/psPromotionLAB.inline.hpp"
 #include "gc_implementation/parallelScavenge/psScavenge.hpp"
 #include "oops/oop.psgc.inline.hpp"
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp
--- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -331,7 +331,7 @@
 
     gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
-    GCTraceTime t1(GCCauseString("GC", gc_cause), PrintGC, !PrintGCDetails, NULL);
+    GCTraceTime t1(GCCauseString("GC", gc_cause), PrintGC, !PrintGCDetails, NULL, _gc_tracer.gc_id());
     TraceCollectorStats tcs(counters());
     TraceMemoryManagerStats tms(false /* not full GC */,gc_cause);
 
@@ -397,7 +397,7 @@
     // We'll use the promotion manager again later.
     PSPromotionManager* promotion_manager = PSPromotionManager::vm_thread_promotion_manager();
     {
-      GCTraceTime tm("Scavenge", false, false, &_gc_timer);
+      GCTraceTime tm("Scavenge", false, false, &_gc_timer, _gc_tracer.gc_id());
       ParallelScavengeHeap::ParStrongRootsScope psrs;
 
       GCTaskQueue* q = GCTaskQueue::create();
@@ -439,7 +439,7 @@
 
     // Process reference objects discovered during scavenge
     {
-      GCTraceTime tm("References", false, false, &_gc_timer);
+      GCTraceTime tm("References", false, false, &_gc_timer, _gc_tracer.gc_id());
 
       reference_processor()->setup_policy(false); // not always_clear
       reference_processor()->set_active_mt_degree(active_workers);
@@ -450,10 +450,10 @@
         PSRefProcTaskExecutor task_executor;
         stats = reference_processor()->process_discovered_references(
           &_is_alive_closure, &keep_alive, &evac_followers, &task_executor,
-          &_gc_timer);
+          &_gc_timer, _gc_tracer.gc_id());
       } else {
         stats = reference_processor()->process_discovered_references(
-          &_is_alive_closure, &keep_alive, &evac_followers, NULL, &_gc_timer);
+          &_is_alive_closure, &keep_alive, &evac_followers, NULL, &_gc_timer, _gc_tracer.gc_id());
       }
 
       _gc_tracer.report_gc_reference_stats(stats);
@@ -468,7 +468,7 @@
     }
 
     {
-      GCTraceTime tm("StringTable", false, false, &_gc_timer);
+      GCTraceTime tm("StringTable", false, false, &_gc_timer, _gc_tracer.gc_id());
       // Unlink any dead interned Strings and process the remaining live ones.
       PSScavengeRootsClosure root_closure(promotion_manager);
       StringTable::unlink_or_oops_do(&_is_alive_closure, &root_closure);
@@ -638,7 +638,7 @@
     NOT_PRODUCT(reference_processor()->verify_no_references_recorded());
 
     {
-      GCTraceTime tm("Prune Scavenge Root Methods", false, false, &_gc_timer);
+      GCTraceTime tm("Prune Scavenge Root Methods", false, false, &_gc_timer, _gc_tracer.gc_id());
 
       CodeCache::prune_scavenge_root_nmethods();
     }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp
--- a/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -65,7 +65,7 @@
     case threads:
     {
       ResourceMark rm;
-      CLDToOopClosure* cld_closure = NULL; // Not needed. All CLDs are already visited.
+      CLDClosure* cld_closure = NULL; // Not needed. All CLDs are already visited.
       Threads::oops_do(&roots_closure, cld_closure, NULL);
     }
     break;
@@ -100,7 +100,7 @@
 
     case code_cache:
       {
-        CodeBlobToOopClosure each_scavengable_code_blob(&roots_to_old_closure, /*do_marking=*/ true);
+        MarkingCodeBlobClosure each_scavengable_code_blob(&roots_to_old_closure, CodeBlobToOopClosure::FixRelocations);
         CodeCache::scavenge_root_nmethods_do(&each_scavengable_code_blob);
       }
       break;
@@ -122,8 +122,8 @@
 
   PSPromotionManager* pm = PSPromotionManager::gc_thread_promotion_manager(which);
   PSScavengeRootsClosure roots_closure(pm);
-  CLDToOopClosure* roots_from_clds = NULL;  // Not needed. All CLDs are already visited.
-  CodeBlobToOopClosure roots_in_blobs(&roots_closure, /*do_marking=*/ true);
+  CLDClosure* roots_from_clds = NULL;  // Not needed. All CLDs are already visited.
+  MarkingCodeBlobClosure roots_in_blobs(&roots_closure, CodeBlobToOopClosure::FixRelocations);
 
   if (_java_thread != NULL)
     _java_thread->oops_do(&roots_closure, roots_from_clds, &roots_in_blobs);
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/shared/concurrentGCThread.cpp
--- a/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -36,21 +36,10 @@
 
 int  ConcurrentGCThread::_CGC_flag            = CGC_nil;
 
-SuspendibleThreadSet ConcurrentGCThread::_sts;
-
 ConcurrentGCThread::ConcurrentGCThread() :
   _should_terminate(false), _has_terminated(false) {
-  _sts.initialize();
 };
 
-void ConcurrentGCThread::safepoint_synchronize() {
-  _sts.suspend_all();
-}
-
-void ConcurrentGCThread::safepoint_desynchronize() {
-  _sts.resume_all();
-}
-
 void ConcurrentGCThread::create_and_start() {
   if (os::create_thread(this, os::cgc_thread)) {
     // XXX: need to set this to low priority
@@ -91,78 +80,6 @@
   ThreadLocalStorage::set_thread(NULL);
 }
 
-
-void SuspendibleThreadSet::initialize_work() {
-  MutexLocker x(STS_init_lock);
-  if (!_initialized) {
-    _m             = new Monitor(Mutex::leaf,
-                                 "SuspendibleThreadSetLock", true);
-    _async         = 0;
-    _async_stop    = false;
-    _async_stopped = 0;
-    _initialized   = true;
-  }
-}
-
-void SuspendibleThreadSet::join() {
-  initialize();
-  MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
-  while (_async_stop) _m->wait(Mutex::_no_safepoint_check_flag);
-  _async++;
-  assert(_async > 0, "Huh.");
-}
-
-void SuspendibleThreadSet::leave() {
-  assert(_initialized, "Must be initialized.");
-  MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
-  _async--;
-  assert(_async >= 0, "Huh.");
-  if (_async_stop) _m->notify_all();
-}
-
-void SuspendibleThreadSet::yield(const char* id) {
-  assert(_initialized, "Must be initialized.");
-  if (_async_stop) {
-    MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
-    if (_async_stop) {
-      _async_stopped++;
-      assert(_async_stopped > 0, "Huh.");
-      if (_async_stopped == _async) {
-        if (ConcGCYieldTimeout > 0) {
-          double now = os::elapsedTime();
-          guarantee((now - _suspend_all_start) * 1000.0 <
-                    (double)ConcGCYieldTimeout,
-                    "Long delay; whodunit?");
-        }
-      }
-      _m->notify_all();
-      while (_async_stop) _m->wait(Mutex::_no_safepoint_check_flag);
-      _async_stopped--;
-      assert(_async >= 0, "Huh");
-      _m->notify_all();
-    }
-  }
-}
-
-void SuspendibleThreadSet::suspend_all() {
-  initialize();  // If necessary.
-  if (ConcGCYieldTimeout > 0) {
-    _suspend_all_start = os::elapsedTime();
-  }
-  MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
-  assert(!_async_stop, "Only one at a time.");
-  _async_stop = true;
-  while (_async_stopped < _async) _m->wait(Mutex::_no_safepoint_check_flag);
-}
-
-void SuspendibleThreadSet::resume_all() {
-  assert(_initialized, "Must be initialized.");
-  MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
-  assert(_async_stopped == _async, "Huh.");
-  _async_stop = false;
-  _m->notify_all();
-}
-
 static void _sltLoop(JavaThread* thread, TRAPS) {
   SurrogateLockerThread* slt = (SurrogateLockerThread*)thread;
   slt->loop();
@@ -282,30 +199,3 @@
   }
   assert(!_monitor.owned_by_self(), "Should unlock before exit.");
 }
-
-
-// ===== STS Access From Outside CGCT =====
-
-void ConcurrentGCThread::stsYield(const char* id) {
-  assert( Thread::current()->is_ConcurrentGC_thread(),
-          "only a conc GC thread can call this" );
-  _sts.yield(id);
-}
-
-bool ConcurrentGCThread::stsShouldYield() {
-  assert( Thread::current()->is_ConcurrentGC_thread(),
-          "only a conc GC thread can call this" );
-  return _sts.should_yield();
-}
-
-void ConcurrentGCThread::stsJoin() {
-  assert( Thread::current()->is_ConcurrentGC_thread(),
-          "only a conc GC thread can call this" );
-  _sts.join();
-}
-
-void ConcurrentGCThread::stsLeave() {
-  assert( Thread::current()->is_ConcurrentGC_thread(),
-          "only a conc GC thread can call this" );
-  _sts.leave();
-}
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/shared/concurrentGCThread.hpp
--- a/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -26,55 +26,8 @@
 #define SHARE_VM_GC_IMPLEMENTATION_SHARED_CONCURRENTGCTHREAD_HPP
 
 #include "utilities/macros.hpp"
-#if INCLUDE_ALL_GCS
+#include "gc_implementation/shared/suspendibleThreadSet.hpp"
 #include "runtime/thread.hpp"
-#endif // INCLUDE_ALL_GCS
-
-class VoidClosure;
-
-// A SuspendibleThreadSet is (obviously) a set of threads that can be
-// suspended.  A thread can join and later leave the set, and periodically
-// yield.  If some thread (not in the set) requests, via suspend_all, that
-// the threads be suspended, then the requesting thread is blocked until
-// all the threads in the set have yielded or left the set.  (Threads may
-// not enter the set when an attempted suspension is in progress.)  The
-// suspending thread later calls resume_all, allowing the suspended threads
-// to continue.
-
-class SuspendibleThreadSet {
-  Monitor* _m;
-  int      _async;
-  bool     _async_stop;
-  int      _async_stopped;
-  bool     _initialized;
-  double   _suspend_all_start;
-
-  void initialize_work();
-
- public:
-  SuspendibleThreadSet() : _initialized(false) {}
-
-  // Add the current thread to the set.  May block if a suspension
-  // is in progress.
-  void join();
-  // Removes the current thread from the set.
-  void leave();
-  // Returns "true" iff an suspension is in progress.
-  bool should_yield() { return _async_stop; }
-  // Suspends the current thread if a suspension is in progress (for
-  // the duration of the suspension.)
-  void yield(const char* id);
-  // Return when all threads in the set are suspended.
-  void suspend_all();
-  // Allow suspended threads to resume.
-  void resume_all();
-  // Redundant initializations okay.
-  void initialize() {
-    // Double-check dirty read idiom.
-    if (!_initialized) initialize_work();
-  }
-};
-
 
 class ConcurrentGCThread: public NamedThread {
   friend class VMStructs;
@@ -96,9 +49,6 @@
   static int set_CGC_flag(int b)           { return _CGC_flag |= b; }
   static int reset_CGC_flag(int b)         { return _CGC_flag &= ~b; }
 
-  // All instances share this one set.
-  static SuspendibleThreadSet _sts;
-
   // Create and start the thread (setting it's priority high.)
   void create_and_start();
 
@@ -121,25 +71,6 @@
 
   // Tester
   bool is_ConcurrentGC_thread() const          { return true;       }
-
-  static void safepoint_synchronize();
-  static void safepoint_desynchronize();
-
-  // All overridings should probably do _sts::yield, but we allow
-  // overriding for distinguished debugging messages.  Default is to do
-  // nothing.
-  virtual void yield() {}
-
-  bool should_yield() { return _sts.should_yield(); }
-
-  // they are prefixed by sts since there are already yield() and
-  // should_yield() (non-static) methods in this class and it was an
-  // easy way to differentiate them.
-  static void stsYield(const char* id);
-  static bool stsShouldYield();
-  static void stsJoin();
-  static void stsLeave();
-
 };
 
 // The SurrogateLockerThread is used by concurrent GC threads for
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/shared/gcId.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/shared/gcId.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/shared/gcId.hpp"
+#include "runtime/safepoint.hpp"
+
+uint GCId::_next_id = 0;
+
+const GCId GCId::create() {
+  return GCId(_next_id++);
+}
+const GCId GCId::peek() {
+  return GCId(_next_id);
+}
+const GCId GCId::undefined() {
+  return GCId(UNDEFINED);
+}
+bool GCId::is_undefined() const {
+  return _id == UNDEFINED;
+}
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/shared/gcId.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/shared/gcId.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_SHARED_GCID_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_SHARED_GCID_HPP
+
+#include "memory/allocation.hpp"
+
+class GCId VALUE_OBJ_CLASS_SPEC {
+ private:
+  uint _id;
+  GCId(uint id) : _id(id) {}
+  GCId() { } // Unused
+
+  static uint _next_id;
+  static const uint UNDEFINED = (uint)-1;
+
+ public:
+  uint id() const {
+    assert(_id != UNDEFINED, "Using undefined GC ID");
+    return _id;
+  }
+  bool is_undefined() const;
+
+  static const GCId create();
+  static const GCId peek();
+  static const GCId undefined();
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_GCID_HPP
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/shared/gcTrace.cpp
--- a/src/share/vm/gc_implementation/shared/gcTrace.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/shared/gcTrace.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "gc_implementation/shared/copyFailedInfo.hpp"
 #include "gc_implementation/shared/gcHeapSummary.hpp"
+#include "gc_implementation/shared/gcId.hpp"
 #include "gc_implementation/shared/gcTimer.hpp"
 #include "gc_implementation/shared/gcTrace.hpp"
 #include "gc_implementation/shared/objectCountEventSender.hpp"
@@ -38,19 +39,14 @@
 #include "gc_implementation/g1/evacuationInfo.hpp"
 #endif
 
-#define assert_unset_gc_id() assert(_shared_gc_info.id() == SharedGCInfo::UNSET_GCID, "GC already started?")
-#define assert_set_gc_id() assert(_shared_gc_info.id() != SharedGCInfo::UNSET_GCID, "GC not started?")
-
-static GCId GCTracer_next_gc_id = 0;
-static GCId create_new_gc_id() {
-  return GCTracer_next_gc_id++;
-}
+#define assert_unset_gc_id() assert(_shared_gc_info.gc_id().is_undefined(), "GC already started?")
+#define assert_set_gc_id() assert(!_shared_gc_info.gc_id().is_undefined(), "GC not started?")
 
 void GCTracer::report_gc_start_impl(GCCause::Cause cause, const Ticks& timestamp) {
   assert_unset_gc_id();
 
-  GCId gc_id = create_new_gc_id();
-  _shared_gc_info.set_id(gc_id);
+  GCId gc_id = GCId::create();
+  _shared_gc_info.set_gc_id(gc_id);
   _shared_gc_info.set_cause(cause);
   _shared_gc_info.set_start_timestamp(timestamp);
 }
@@ -62,7 +58,7 @@
 }
 
 bool GCTracer::has_reported_gc_start() const {
-  return _shared_gc_info.id() != SharedGCInfo::UNSET_GCID;
+  return !_shared_gc_info.gc_id().is_undefined();
 }
 
 void GCTracer::report_gc_end_impl(const Ticks& timestamp, TimePartitions* time_partitions) {
@@ -81,7 +77,7 @@
 
   report_gc_end_impl(timestamp, time_partitions);
 
-  _shared_gc_info.set_id(SharedGCInfo::UNSET_GCID);
+  _shared_gc_info.set_gc_id(GCId::undefined());
 }
 
 void GCTracer::report_gc_reference_stats(const ReferenceProcessorStats& rps) const {
@@ -132,7 +128,7 @@
     if (!cit.allocation_failed()) {
       HeapInspection hi(false, false, false, NULL);
       hi.populate_table(&cit, is_alive_cl);
-      ObjectCountEventSenderClosure event_sender(_shared_gc_info.id(), cit.size_of_instances_in_words(), Ticks::now());
+      ObjectCountEventSenderClosure event_sender(_shared_gc_info.gc_id(), cit.size_of_instances_in_words(), Ticks::now());
       cit.iterate(&event_sender);
     }
   }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/shared/gcTrace.hpp
--- a/src/share/vm/gc_implementation/shared/gcTrace.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/shared/gcTrace.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -27,6 +27,7 @@
 
 #include "gc_interface/gcCause.hpp"
 #include "gc_interface/gcName.hpp"
+#include "gc_implementation/shared/gcId.hpp"
 #include "gc_implementation/shared/gcWhen.hpp"
 #include "gc_implementation/shared/copyFailedInfo.hpp"
 #include "memory/allocation.hpp"
@@ -38,7 +39,6 @@
 #include "utilities/macros.hpp"
 #include "utilities/ticks.hpp"
 
-typedef uint GCId;
 
 class EvacuationInfo;
 class GCHeapSummary;
@@ -50,11 +50,8 @@
 class BoolObjectClosure;
 
 class SharedGCInfo VALUE_OBJ_CLASS_SPEC {
- public:
-  static const GCId UNSET_GCID = (GCId)-1;
-
  private:
-  GCId _id;
+  GCId _gc_id;
   GCName _name;
   GCCause::Cause _cause;
   Ticks     _start_timestamp;
@@ -64,7 +61,7 @@
 
  public:
   SharedGCInfo(GCName name) :
-    _id(UNSET_GCID),
+    _gc_id(GCId::undefined()),
     _name(name),
     _cause(GCCause::_last_gc_cause),
     _start_timestamp(),
@@ -73,8 +70,8 @@
     _longest_pause() {
   }
 
-  void set_id(GCId id) { _id = id; }
-  GCId id() const { return _id; }
+  void set_gc_id(GCId gc_id) { _gc_id = gc_id; }
+  const GCId& gc_id() const { return _gc_id; }
 
   void set_start_timestamp(const Ticks& timestamp) { _start_timestamp = timestamp; }
   const Ticks start_timestamp() const { return _start_timestamp; }
@@ -131,10 +128,11 @@
   void report_gc_reference_stats(const ReferenceProcessorStats& rp) const;
   void report_object_count_after_gc(BoolObjectClosure* object_filter) NOT_SERVICES_RETURN;
   bool has_reported_gc_start() const;
+  const GCId& gc_id() { return _shared_gc_info.gc_id(); }
 
  protected:
   GCTracer(GCName name) : _shared_gc_info(name) {}
-  virtual void report_gc_start_impl(GCCause::Cause cause, const Ticks& timestamp);
+  void report_gc_start_impl(GCCause::Cause cause, const Ticks& timestamp);
   virtual void report_gc_end_impl(const Ticks& timestamp, TimePartitions* time_partitions);
 
  private:
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/shared/gcTraceSend.cpp
--- a/src/share/vm/gc_implementation/shared/gcTraceSend.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/shared/gcTraceSend.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -43,7 +43,7 @@
 void GCTracer::send_garbage_collection_event() const {
   EventGCGarbageCollection event(UNTIMED);
   if (event.should_commit()) {
-    event.set_gcId(_shared_gc_info.id());
+    event.set_gcId(_shared_gc_info.gc_id().id());
     event.set_name(_shared_gc_info.name());
     event.set_cause((u2) _shared_gc_info.cause());
     event.set_sumOfPauses(_shared_gc_info.sum_of_pauses());
@@ -57,7 +57,7 @@
 void GCTracer::send_reference_stats_event(ReferenceType type, size_t count) const {
   EventGCReferenceStatistics e;
   if (e.should_commit()) {
-      e.set_gcId(_shared_gc_info.id());
+      e.set_gcId(_shared_gc_info.gc_id().id());
       e.set_type((u1)type);
       e.set_count(count);
       e.commit();
@@ -68,7 +68,7 @@
                                                       const MetaspaceChunkFreeListSummary& summary) const {
   EventMetaspaceChunkFreeListSummary e;
   if (e.should_commit()) {
-    e.set_gcId(_shared_gc_info.id());
+    e.set_gcId(_shared_gc_info.gc_id().id());
     e.set_when(when);
     e.set_metadataType(mdtype);
 
@@ -91,7 +91,7 @@
 void ParallelOldTracer::send_parallel_old_event() const {
   EventGCParallelOld e(UNTIMED);
   if (e.should_commit()) {
-    e.set_gcId(_shared_gc_info.id());
+    e.set_gcId(_shared_gc_info.gc_id().id());
     e.set_densePrefix((TraceAddress)_parallel_old_gc_info.dense_prefix());
     e.set_starttime(_shared_gc_info.start_timestamp());
     e.set_endtime(_shared_gc_info.end_timestamp());
@@ -102,7 +102,7 @@
 void YoungGCTracer::send_young_gc_event() const {
   EventGCYoungGarbageCollection e(UNTIMED);
   if (e.should_commit()) {
-    e.set_gcId(_shared_gc_info.id());
+    e.set_gcId(_shared_gc_info.gc_id().id());
     e.set_tenuringThreshold(_tenuring_threshold);
     e.set_starttime(_shared_gc_info.start_timestamp());
     e.set_endtime(_shared_gc_info.end_timestamp());
@@ -113,7 +113,7 @@
 void OldGCTracer::send_old_gc_event() const {
   EventGCOldGarbageCollection e(UNTIMED);
   if (e.should_commit()) {
-    e.set_gcId(_shared_gc_info.id());
+    e.set_gcId(_shared_gc_info.gc_id().id());
     e.set_starttime(_shared_gc_info.start_timestamp());
     e.set_endtime(_shared_gc_info.end_timestamp());
     e.commit();
@@ -132,7 +132,7 @@
 void YoungGCTracer::send_promotion_failed_event(const PromotionFailedInfo& pf_info) const {
   EventPromotionFailed e;
   if (e.should_commit()) {
-    e.set_gcId(_shared_gc_info.id());
+    e.set_gcId(_shared_gc_info.gc_id().id());
     e.set_data(to_trace_struct(pf_info));
     e.set_thread(pf_info.thread()->thread_id());
     e.commit();
@@ -143,7 +143,7 @@
 void OldGCTracer::send_concurrent_mode_failure_event() {
   EventConcurrentModeFailure e;
   if (e.should_commit()) {
-    e.set_gcId(_shared_gc_info.id());
+    e.set_gcId(_shared_gc_info.gc_id().id());
     e.commit();
   }
 }
@@ -152,7 +152,7 @@
 void G1NewTracer::send_g1_young_gc_event() {
   EventGCG1GarbageCollection e(UNTIMED);
   if (e.should_commit()) {
-    e.set_gcId(_shared_gc_info.id());
+    e.set_gcId(_shared_gc_info.gc_id().id());
     e.set_type(_g1_young_gc_info.type());
     e.set_starttime(_shared_gc_info.start_timestamp());
     e.set_endtime(_shared_gc_info.end_timestamp());
@@ -163,7 +163,7 @@
 void G1NewTracer::send_evacuation_info_event(EvacuationInfo* info) {
   EventEvacuationInfo e;
   if (e.should_commit()) {
-    e.set_gcId(_shared_gc_info.id());
+    e.set_gcId(_shared_gc_info.gc_id().id());
     e.set_cSetRegions(info->collectionset_regions());
     e.set_cSetUsedBefore(info->collectionset_used_before());
     e.set_cSetUsedAfter(info->collectionset_used_after());
@@ -179,7 +179,7 @@
 void G1NewTracer::send_evacuation_failed_event(const EvacuationFailedInfo& ef_info) const {
   EventEvacuationFailed e;
   if (e.should_commit()) {
-    e.set_gcId(_shared_gc_info.id());
+    e.set_gcId(_shared_gc_info.gc_id().id());
     e.set_data(to_trace_struct(ef_info));
     e.commit();
   }
@@ -206,17 +206,17 @@
 }
 
 class GCHeapSummaryEventSender : public GCHeapSummaryVisitor {
-  GCId _id;
+  GCId _gc_id;
   GCWhen::Type _when;
  public:
-  GCHeapSummaryEventSender(GCId id, GCWhen::Type when) : _id(id), _when(when) {}
+  GCHeapSummaryEventSender(GCId gc_id, GCWhen::Type when) : _gc_id(gc_id), _when(when) {}
 
   void visit(const GCHeapSummary* heap_summary) const {
     const VirtualSpaceSummary& heap_space = heap_summary->heap();
 
     EventGCHeapSummary e;
     if (e.should_commit()) {
-      e.set_gcId(_id);
+      e.set_gcId(_gc_id.id());
       e.set_when((u1)_when);
       e.set_heapSpace(to_trace_struct(heap_space));
       e.set_heapUsed(heap_summary->used());
@@ -236,7 +236,7 @@
 
     EventPSHeapSummary e;
     if (e.should_commit()) {
-      e.set_gcId(_id);
+      e.set_gcId(_gc_id.id());
       e.set_when((u1)_when);
 
       e.set_oldSpace(to_trace_struct(ps_heap_summary->old()));
@@ -251,7 +251,7 @@
 };
 
 void GCTracer::send_gc_heap_summary_event(GCWhen::Type when, const GCHeapSummary& heap_summary) const {
-  GCHeapSummaryEventSender visitor(_shared_gc_info.id(), when);
+  GCHeapSummaryEventSender visitor(_shared_gc_info.gc_id(), when);
   heap_summary.accept(&visitor);
 }
 
@@ -268,7 +268,7 @@
 void GCTracer::send_meta_space_summary_event(GCWhen::Type when, const MetaspaceSummary& meta_space_summary) const {
   EventMetaspaceSummary e;
   if (e.should_commit()) {
-    e.set_gcId(_shared_gc_info.id());
+    e.set_gcId(_shared_gc_info.gc_id().id());
     e.set_when((u1) when);
     e.set_gcThreshold(meta_space_summary.capacity_until_GC());
     e.set_metaspace(to_trace_struct(meta_space_summary.meta_space()));
@@ -287,7 +287,7 @@
   void send_phase(PausePhase* pause) {
     T event(UNTIMED);
     if (event.should_commit()) {
-      event.set_gcId(_gc_id);
+      event.set_gcId(_gc_id.id());
       event.set_name(pause->name());
       event.set_starttime(pause->start());
       event.set_endtime(pause->end());
@@ -311,7 +311,7 @@
 };
 
 void GCTracer::send_phase_events(TimePartitions* time_partitions) const {
-  PhaseSender phase_reporter(_shared_gc_info.id());
+  PhaseSender phase_reporter(_shared_gc_info.gc_id());
 
   TimePartitionPhasesIterator iter(time_partitions);
   while (iter.has_next()) {
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/shared/gcTraceTime.cpp
--- a/src/share/vm/gc_implementation/shared/gcTraceTime.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/shared/gcTraceTime.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "gc_implementation/shared/gcTimer.hpp"
+#include "gc_implementation/shared/gcTrace.hpp"
 #include "gc_implementation/shared/gcTraceTime.hpp"
 #include "runtime/globals.hpp"
 #include "runtime/os.hpp"
@@ -34,7 +35,7 @@
 #include "utilities/ticks.inline.hpp"
 
 
-GCTraceTime::GCTraceTime(const char* title, bool doit, bool print_cr, GCTimer* timer) :
+GCTraceTime::GCTraceTime(const char* title, bool doit, bool print_cr, GCTimer* timer, GCId gc_id) :
     _title(title), _doit(doit), _print_cr(print_cr), _timer(timer), _start_counter() {
   if (_doit || _timer != NULL) {
     _start_counter.stamp();
@@ -52,6 +53,9 @@
       gclog_or_tty->stamp();
       gclog_or_tty->print(": ");
     }
+    if (PrintGCID) {
+      gclog_or_tty->print("#%u: ", gc_id.id());
+    }
     gclog_or_tty->print("[%s", title);
     gclog_or_tty->flush();
   }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/shared/gcTraceTime.hpp
--- a/src/share/vm/gc_implementation/shared/gcTraceTime.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/shared/gcTraceTime.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -25,6 +25,7 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_SHARED_GCTRACETIME_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_SHARED_GCTRACETIME_HPP
 
+#include "gc_implementation/shared/gcTrace.hpp"
 #include "prims/jni_md.h"
 #include "utilities/ticks.hpp"
 
@@ -38,7 +39,7 @@
   Ticks _start_counter;
 
  public:
-  GCTraceTime(const char* title, bool doit, bool print_cr, GCTimer* timer);
+  GCTraceTime(const char* title, bool doit, bool print_cr, GCTimer* timer, GCId gc_id);
   ~GCTraceTime();
 };
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/shared/markSweep.cpp
--- a/src/share/vm/gc_implementation/shared/markSweep.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/shared/markSweep.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -49,27 +49,19 @@
 SerialOldTracer*        MarkSweep::_gc_tracer       = NULL;
 
 MarkSweep::FollowRootClosure  MarkSweep::follow_root_closure;
-CodeBlobToOopClosure MarkSweep::follow_code_root_closure(&MarkSweep::follow_root_closure, /*do_marking=*/ true);
 
 void MarkSweep::FollowRootClosure::do_oop(oop* p)       { follow_root(p); }
 void MarkSweep::FollowRootClosure::do_oop(narrowOop* p) { follow_root(p); }
 
 MarkSweep::MarkAndPushClosure MarkSweep::mark_and_push_closure;
-MarkSweep::FollowKlassClosure MarkSweep::follow_klass_closure;
-MarkSweep::AdjustKlassClosure MarkSweep::adjust_klass_closure;
+CLDToOopClosure               MarkSweep::follow_cld_closure(&mark_and_push_closure);
+CLDToOopClosure               MarkSweep::adjust_cld_closure(&adjust_pointer_closure);
 
 void MarkSweep::MarkAndPushClosure::do_oop(oop* p)       { mark_and_push(p); }
 void MarkSweep::MarkAndPushClosure::do_oop(narrowOop* p) { mark_and_push(p); }
 
-void MarkSweep::FollowKlassClosure::do_klass(Klass* klass) {
-  klass->oops_do(&MarkSweep::mark_and_push_closure);
-}
-void MarkSweep::AdjustKlassClosure::do_klass(Klass* klass) {
-  klass->oops_do(&MarkSweep::adjust_pointer_closure);
-}
-
 void MarkSweep::follow_class_loader(ClassLoaderData* cld) {
-  cld->oops_do(&MarkSweep::mark_and_push_closure, &MarkSweep::follow_klass_closure, true);
+  MarkSweep::follow_cld_closure.do_cld(cld);
 }
 
 void MarkSweep::follow_stack() {
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/shared/markSweep.hpp
--- a/src/share/vm/gc_implementation/shared/markSweep.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/shared/markSweep.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -65,17 +65,6 @@
     virtual void do_oop(narrowOop* p);
   };
 
-  // The one and only place to start following the classes.
-  // Should only be applied to the ClassLoaderData klasses list.
-  class FollowKlassClosure : public KlassClosure {
-   public:
-    void do_klass(Klass* klass);
-  };
-  class AdjustKlassClosure : public KlassClosure {
-   public:
-    void do_klass(Klass* klass);
-  };
-
   class FollowStackClosure: public VoidClosure {
    public:
     virtual void do_void();
@@ -143,12 +132,11 @@
   // Public closures
   static IsAliveClosure       is_alive;
   static FollowRootClosure    follow_root_closure;
-  static CodeBlobToOopClosure follow_code_root_closure; // => follow_root_closure
   static MarkAndPushClosure   mark_and_push_closure;
-  static FollowKlassClosure   follow_klass_closure;
   static FollowStackClosure   follow_stack_closure;
+  static CLDToOopClosure      follow_cld_closure;
   static AdjustPointerClosure adjust_pointer_closure;
-  static AdjustKlassClosure   adjust_klass_closure;
+  static CLDToOopClosure      adjust_cld_closure;
 
   // Accessors
   static uint total_invocations() { return _total_invocations; }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/shared/objectCountEventSender.cpp
--- a/src/share/vm/gc_implementation/shared/objectCountEventSender.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/shared/objectCountEventSender.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -24,6 +24,7 @@
 
 
 #include "precompiled.hpp"
+#include "gc_implementation/shared/gcId.hpp"
 #include "gc_implementation/shared/objectCountEventSender.hpp"
 #include "memory/heapInspection.hpp"
 #include "trace/tracing.hpp"
@@ -38,7 +39,7 @@
          "Only call this method if the event is enabled");
 
   EventObjectCountAfterGC event(UNTIMED);
-  event.set_gcId(gc_id);
+  event.set_gcId(gc_id.id());
   event.set_class(entry->klass());
   event.set_count(entry->count());
   event.set_totalSize(entry->words() * BytesPerWord);
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp
--- a/src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -24,7 +24,7 @@
 
 #ifndef SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
-
+#include "gc_interface/collectedHeap.hpp"
 #include "memory/allocation.hpp"
 #include "memory/blockOffsetTable.hpp"
 #include "memory/threadLocalAllocBuffer.hpp"
@@ -60,6 +60,7 @@
   // Initializes the buffer to be empty, but with the given "word_sz".
   // Must get initialized with "set_buf" for an allocation to succeed.
   ParGCAllocBuffer(size_t word_sz);
+  virtual ~ParGCAllocBuffer() {}
 
   static const size_t min_size() {
     return ThreadLocalAllocBuffer::min_size();
@@ -83,6 +84,9 @@
     }
   }
 
+  // Allocate the object aligned to "alignment_in_bytes".
+  HeapWord* allocate_aligned(size_t word_sz, unsigned short alignment_in_bytes);
+
   // Undo the last allocation in the buffer, which is required to be of the
   // "obj" of the given "word_sz".
   void undo_allocation(HeapWord* obj, size_t word_sz) {
@@ -113,7 +117,7 @@
   }
 
   // Sets the space of the buffer to be [buf, space+word_sz()).
-  void set_buf(HeapWord* buf) {
+  virtual void set_buf(HeapWord* buf) {
     _bottom   = buf;
     _top      = _bottom;
     _hard_end = _bottom + word_sz();
@@ -158,7 +162,7 @@
   // Fills in the unallocated portion of the buffer with a garbage object.
   // If "end_of_gc" is TRUE, is after the last use in the GC.  IF "retain"
   // is true, attempt to re-use the unused portion in the next GC.
-  void retire(bool end_of_gc, bool retain);
+  virtual void retire(bool end_of_gc, bool retain);
 
   void print() PRODUCT_RETURN;
 };
@@ -238,14 +242,14 @@
 
   void undo_allocation(HeapWord* obj, size_t word_sz);
 
-  void set_buf(HeapWord* buf_start) {
+  virtual void set_buf(HeapWord* buf_start) {
     ParGCAllocBuffer::set_buf(buf_start);
     _true_end = _hard_end;
     _bt.set_region(MemRegion(buf_start, word_sz()));
     _bt.initialize_threshold();
   }
 
-  void retire(bool end_of_gc, bool retain);
+  virtual void retire(bool end_of_gc, bool retain);
 
   MemRegion range() {
     return MemRegion(_top, _true_end);
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/shared/parGCAllocBuffer.inline.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/shared/parGCAllocBuffer.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_SHARED_PARGCALLOCBUFFER_INLINE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_SHARED_PARGCALLOCBUFFER_INLINE_HPP
+
+#include "gc_implementation/shared/parGCAllocBuffer.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+
+HeapWord* ParGCAllocBuffer::allocate_aligned(size_t word_sz, unsigned short alignment_in_bytes) {
+
+  HeapWord* res = CollectedHeap::align_allocation_or_fail(_top, _end, alignment_in_bytes);
+  if (res == NULL) {
+    return NULL;
+  }
+
+  // Set _top so that allocate(), which expects _top to be correctly set,
+  // can be used below.
+  _top = res;
+  return allocate(word_sz);
+}
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_PARGCALLOCBUFFER_INLINE_HPP
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/shared/suspendibleThreadSet.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/shared/suspendibleThreadSet.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/shared/suspendibleThreadSet.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/thread.inline.hpp"
+
+uint   SuspendibleThreadSet::_nthreads          = 0;
+uint   SuspendibleThreadSet::_nthreads_stopped  = 0;
+bool   SuspendibleThreadSet::_suspend_all       = false;
+double SuspendibleThreadSet::_suspend_all_start = 0.0;
+
+void SuspendibleThreadSet::join() {
+  MonitorLockerEx ml(STS_lock, Mutex::_no_safepoint_check_flag);
+  while (_suspend_all) {
+    ml.wait(Mutex::_no_safepoint_check_flag);
+  }
+  _nthreads++;
+}
+
+void SuspendibleThreadSet::leave() {
+  MonitorLockerEx ml(STS_lock, Mutex::_no_safepoint_check_flag);
+  assert(_nthreads > 0, "Invalid");
+  _nthreads--;
+  if (_suspend_all) {
+    ml.notify_all();
+  }
+}
+
+void SuspendibleThreadSet::yield() {
+  if (_suspend_all) {
+    MonitorLockerEx ml(STS_lock, Mutex::_no_safepoint_check_flag);
+    if (_suspend_all) {
+      _nthreads_stopped++;
+      if (_nthreads_stopped == _nthreads) {
+        if (ConcGCYieldTimeout > 0) {
+          double now = os::elapsedTime();
+          guarantee((now - _suspend_all_start) * 1000.0 < (double)ConcGCYieldTimeout, "Long delay");
+        }
+      }
+      ml.notify_all();
+      while (_suspend_all) {
+        ml.wait(Mutex::_no_safepoint_check_flag);
+      }
+      assert(_nthreads_stopped > 0, "Invalid");
+      _nthreads_stopped--;
+      ml.notify_all();
+    }
+  }
+}
+
+void SuspendibleThreadSet::synchronize() {
+  assert(Thread::current()->is_VM_thread(), "Must be the VM thread");
+  if (ConcGCYieldTimeout > 0) {
+    _suspend_all_start = os::elapsedTime();
+  }
+  MonitorLockerEx ml(STS_lock, Mutex::_no_safepoint_check_flag);
+  assert(!_suspend_all, "Only one at a time");
+  _suspend_all = true;
+  while (_nthreads_stopped < _nthreads) {
+    ml.wait(Mutex::_no_safepoint_check_flag);
+  }
+}
+
+void SuspendibleThreadSet::desynchronize() {
+  assert(Thread::current()->is_VM_thread(), "Must be the VM thread");
+  MonitorLockerEx ml(STS_lock, Mutex::_no_safepoint_check_flag);
+  assert(_nthreads_stopped == _nthreads, "Invalid");
+  _suspend_all = false;
+  ml.notify_all();
+}
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/shared/suspendibleThreadSet.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/shared/suspendibleThreadSet.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_SHARED_SUSPENDIBLETHREADSET_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_SHARED_SUSPENDIBLETHREADSET_HPP
+
+#include "memory/allocation.hpp"
+
+// A SuspendibleThreadSet is a set of threads that can be suspended.
+// A thread can join and later leave the set, and periodically yield.
+// If some thread (not in the set) requests, via synchronize(), that
+// the threads be suspended, then the requesting thread is blocked
+// until all the threads in the set have yielded or left the set. Threads
+// may not enter the set when an attempted suspension is in progress. The
+// suspending thread later calls desynchronize(), allowing the suspended
+// threads to continue.
+class SuspendibleThreadSet : public AllStatic {
+private:
+  static uint   _nthreads;
+  static uint   _nthreads_stopped;
+  static bool   _suspend_all;
+  static double _suspend_all_start;
+
+public:
+  // Add the current thread to the set. May block if a suspension is in progress.
+  static void join();
+
+  // Removes the current thread from the set.
+  static void leave();
+
+  // Returns true if an suspension is in progress.
+  static bool should_yield() { return _suspend_all; }
+
+  // Suspends the current thread if a suspension is in progress.
+  static void yield();
+
+  // Returns when all threads in the set are suspended.
+  static void synchronize();
+
+  // Resumes all suspended threads in the set.
+  static void desynchronize();
+};
+
+class SuspendibleThreadSetJoiner : public StackObj {
+public:
+  SuspendibleThreadSetJoiner() {
+    SuspendibleThreadSet::join();
+  }
+
+  ~SuspendibleThreadSetJoiner() {
+    SuspendibleThreadSet::leave();
+  }
+
+  bool should_yield() {
+    return SuspendibleThreadSet::should_yield();
+  }
+
+  void yield() {
+    SuspendibleThreadSet::yield();
+  }
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_SUSPENDIBLETHREADSET_HPP
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/shared/vmGCOperations.cpp
--- a/src/share/vm/gc_implementation/shared/vmGCOperations.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/shared/vmGCOperations.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -209,6 +209,45 @@
   gch->do_full_collection(gch->must_clear_all_soft_refs(), _max_level);
 }
 
+// Returns true iff concurrent GCs unloads metadata.
+bool VM_CollectForMetadataAllocation::initiate_concurrent_GC() {
+#if INCLUDE_ALL_GCS
+  if (UseConcMarkSweepGC && CMSClassUnloadingEnabled) {
+    MetaspaceGC::set_should_concurrent_collect(true);
+    return true;
+  }
+
+  if (UseG1GC && ClassUnloadingWithConcurrentMark) {
+    G1CollectedHeap* g1h = G1CollectedHeap::heap();
+    g1h->g1_policy()->set_initiate_conc_mark_if_possible();
+
+    GCCauseSetter x(g1h, _gc_cause);
+
+    // At this point we are supposed to start a concurrent cycle. We
+    // will do so if one is not already in progress.
+    bool should_start = g1h->g1_policy()->force_initial_mark_if_outside_cycle(_gc_cause);
+
+    if (should_start) {
+      double pause_target = g1h->g1_policy()->max_pause_time_ms();
+      g1h->do_collection_pause_at_safepoint(pause_target);
+    }
+    return true;
+  }
+#endif
+
+  return false;
+}
+
+static void log_metaspace_alloc_failure_for_concurrent_GC() {
+  if (Verbose && PrintGCDetails) {
+    if (UseConcMarkSweepGC) {
+      gclog_or_tty->print_cr("\nCMS full GC for Metaspace");
+    } else if (UseG1GC) {
+      gclog_or_tty->print_cr("\nG1 full GC for Metaspace");
+    }
+  }
+}
+
 void VM_CollectForMetadataAllocation::doit() {
   SvcGCMarker sgcm(SvcGCMarker::FULL);
 
@@ -220,54 +259,57 @@
   // a GC that freed space for the allocation.
   if (!MetadataAllocationFailALot) {
     _result = _loader_data->metaspace_non_null()->allocate(_size, _mdtype);
-  }
-
-  if (_result == NULL) {
-    if (UseConcMarkSweepGC) {
-      if (CMSClassUnloadingEnabled) {
-        MetaspaceGC::set_should_concurrent_collect(true);
-      }
-      // For CMS expand since the collection is going to be concurrent.
-      _result =
-        _loader_data->metaspace_non_null()->expand_and_allocate(_size, _mdtype);
-    }
-    if (_result == NULL) {
-      // Don't clear the soft refs yet.
-      if (Verbose && PrintGCDetails && UseConcMarkSweepGC) {
-        gclog_or_tty->print_cr("\nCMS full GC for Metaspace");
-      }
-      heap->collect_as_vm_thread(GCCause::_metadata_GC_threshold);
-      // After a GC try to allocate without expanding.  Could fail
-      // and expansion will be tried below.
-      _result =
-        _loader_data->metaspace_non_null()->allocate(_size, _mdtype);
-    }
-    if (_result == NULL) {
-      // If still failing, allow the Metaspace to expand.
-      // See delta_capacity_until_GC() for explanation of the
-      // amount of the expansion.
-      // This should work unless there really is no more space
-      // or a MaxMetaspaceSize has been specified on the command line.
-      _result =
-        _loader_data->metaspace_non_null()->expand_and_allocate(_size, _mdtype);
-      if (_result == NULL) {
-        // If expansion failed, do a last-ditch collection and try allocating
-        // again.  A last-ditch collection will clear softrefs.  This
-        // behavior is similar to the last-ditch collection done for perm
-        // gen when it was full and a collection for failed allocation
-        // did not free perm gen space.
-        heap->collect_as_vm_thread(GCCause::_last_ditch_collection);
-        _result =
-          _loader_data->metaspace_non_null()->allocate(_size, _mdtype);
-      }
-    }
-    if (Verbose && PrintGCDetails && _result == NULL) {
-      gclog_or_tty->print_cr("\nAfter Metaspace GC failed to allocate size "
-                             SIZE_FORMAT, _size);
+    if (_result != NULL) {
+      return;
     }
   }
 
-  if (_result == NULL && GC_locker::is_active_and_needs_gc()) {
+  if (initiate_concurrent_GC()) {
+    // For CMS and G1 expand since the collection is going to be concurrent.
+    _result = _loader_data->metaspace_non_null()->expand_and_allocate(_size, _mdtype);
+    if (_result != NULL) {
+      return;
+    }
+
+    log_metaspace_alloc_failure_for_concurrent_GC();
+  }
+
+  // Don't clear the soft refs yet.
+  heap->collect_as_vm_thread(GCCause::_metadata_GC_threshold);
+  // After a GC try to allocate without expanding.  Could fail
+  // and expansion will be tried below.
+  _result = _loader_data->metaspace_non_null()->allocate(_size, _mdtype);
+  if (_result != NULL) {
+    return;
+  }
+
+  // If still failing, allow the Metaspace to expand.
+  // See delta_capacity_until_GC() for explanation of the
+  // amount of the expansion.
+  // This should work unless there really is no more space
+  // or a MaxMetaspaceSize has been specified on the command line.
+  _result = _loader_data->metaspace_non_null()->expand_and_allocate(_size, _mdtype);
+  if (_result != NULL) {
+    return;
+  }
+
+  // If expansion failed, do a last-ditch collection and try allocating
+  // again.  A last-ditch collection will clear softrefs.  This
+  // behavior is similar to the last-ditch collection done for perm
+  // gen when it was full and a collection for failed allocation
+  // did not free perm gen space.
+  heap->collect_as_vm_thread(GCCause::_last_ditch_collection);
+  _result = _loader_data->metaspace_non_null()->allocate(_size, _mdtype);
+  if (_result != NULL) {
+    return;
+  }
+
+  if (Verbose && PrintGCDetails) {
+    gclog_or_tty->print_cr("\nAfter Metaspace GC failed to allocate size "
+                           SIZE_FORMAT, _size);
+  }
+
+  if (GC_locker::is_active_and_needs_gc()) {
     set_gc_locked();
   }
 }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_implementation/shared/vmGCOperations.hpp
--- a/src/share/vm/gc_implementation/shared/vmGCOperations.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_implementation/shared/vmGCOperations.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -217,6 +217,8 @@
   virtual VMOp_Type type() const { return VMOp_CollectForMetadataAllocation; }
   virtual void doit();
   MetaWord* result() const       { return _result; }
+
+  bool initiate_concurrent_GC();
 };
 
 class SvcGCMarker : public StackObj {
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_interface/collectedHeap.cpp
--- a/src/share/vm/gc_interface/collectedHeap.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_interface/collectedHeap.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -558,13 +558,13 @@
 
 void CollectedHeap::pre_full_gc_dump(GCTimer* timer) {
   if (HeapDumpBeforeFullGC) {
-    GCTraceTime tt("Heap Dump (before full gc): ", PrintGCDetails, false, timer);
+    GCTraceTime tt("Heap Dump (before full gc): ", PrintGCDetails, false, timer, GCId::create());
     // We are doing a "major" collection and a heap dump before
     // major collection has been requested.
     HeapDumper::dump_heap();
   }
   if (PrintClassHistogramBeforeFullGC) {
-    GCTraceTime tt("Class Histogram (before full gc): ", PrintGCDetails, true, timer);
+    GCTraceTime tt("Class Histogram (before full gc): ", PrintGCDetails, true, timer, GCId::create());
     VM_GC_HeapInspection inspector(gclog_or_tty, false /* ! full gc */);
     inspector.doit();
   }
@@ -572,11 +572,11 @@
 
 void CollectedHeap::post_full_gc_dump(GCTimer* timer) {
   if (HeapDumpAfterFullGC) {
-    GCTraceTime tt("Heap Dump (after full gc): ", PrintGCDetails, false, timer);
+    GCTraceTime tt("Heap Dump (after full gc): ", PrintGCDetails, false, timer, GCId::create());
     HeapDumper::dump_heap();
   }
   if (PrintClassHistogramAfterFullGC) {
-    GCTraceTime tt("Class Histogram (after full gc): ", PrintGCDetails, true, timer);
+    GCTraceTime tt("Class Histogram (after full gc): ", PrintGCDetails, true, timer, GCId::create());
     VM_GC_HeapInspection inspector(gclog_or_tty, false /* ! full gc */);
     inspector.doit();
   }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_interface/collectedHeap.hpp
--- a/src/share/vm/gc_interface/collectedHeap.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_interface/collectedHeap.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -351,6 +351,12 @@
     fill_with_object(start, pointer_delta(end, start), zap);
   }
 
+  // Return the address "addr" aligned by "alignment_in_bytes" if such
+  // an address is below "end".  Return NULL otherwise.
+  inline static HeapWord* align_allocation_or_fail(HeapWord* addr,
+                                                   HeapWord* end,
+                                                   unsigned short alignment_in_bytes);
+
   // Some heaps may offer a contiguous region for shared non-blocking
   // allocation, via inlined code (by exporting the address of the top and
   // end fields defining the extent of the contiguous allocation region.)
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/gc_interface/collectedHeap.inline.hpp
--- a/src/share/vm/gc_interface/collectedHeap.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/gc_interface/collectedHeap.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -241,6 +241,44 @@
   oop_iterate(&no_header_cl);
 }
 
+
+inline HeapWord* CollectedHeap::align_allocation_or_fail(HeapWord* addr,
+                                                         HeapWord* end,
+                                                         unsigned short alignment_in_bytes) {
+  if (alignment_in_bytes <= ObjectAlignmentInBytes) {
+    return addr;
+  }
+
+  assert(is_ptr_aligned(addr, HeapWordSize),
+    err_msg("Address " PTR_FORMAT " is not properly aligned.", p2i(addr)));
+  assert(is_size_aligned(alignment_in_bytes, HeapWordSize),
+    err_msg("Alignment size %u is incorrect.", alignment_in_bytes));
+
+  HeapWord* new_addr = (HeapWord*) align_pointer_up(addr, alignment_in_bytes);
+  size_t padding = pointer_delta(new_addr, addr);
+
+  if (padding == 0) {
+    return addr;
+  }
+
+  if (padding < CollectedHeap::min_fill_size()) {
+    padding += alignment_in_bytes / HeapWordSize;
+    assert(padding >= CollectedHeap::min_fill_size(),
+      err_msg("alignment_in_bytes %u is expect to be larger "
+      "than the minimum object size", alignment_in_bytes));
+    new_addr = addr + padding;
+  }
+
+  assert(new_addr > addr, err_msg("Unexpected arithmetic overflow "
+    PTR_FORMAT " not greater than " PTR_FORMAT, p2i(new_addr), p2i(addr)));
+  if(new_addr < end) {
+    CollectedHeap::fill_with_object(addr, padding);
+    return new_addr;
+  } else {
+    return NULL;
+  }
+}
+
 #ifndef PRODUCT
 
 inline bool
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/interpreter/bytecodeInterpreter.cpp
--- a/src/share/vm/interpreter/bytecodeInterpreter.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/interpreter/bytecodeInterpreter.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -41,43 +41,10 @@
 #include "runtime/frame.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/interfaceSupport.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/threadCritical.hpp"
 #include "utilities/exceptions.hpp"
-#ifdef TARGET_OS_ARCH_linux_x86
-# include "orderAccess_linux_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_sparc
-# include "orderAccess_linux_sparc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_zero
-# include "orderAccess_linux_zero.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_solaris_x86
-# include "orderAccess_solaris_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_solaris_sparc
-# include "orderAccess_solaris_sparc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_windows_x86
-# include "orderAccess_windows_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_arm
-# include "orderAccess_linux_arm.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_ppc
-# include "orderAccess_linux_ppc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_aix_ppc
-# include "orderAccess_aix_ppc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_bsd_x86
-# include "orderAccess_bsd_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_bsd_zero
-# include "orderAccess_bsd_zero.inline.hpp"
-#endif
-
 
 // no precompiled headers
 #ifdef CC_INTERP
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/cardTableModRefBS.cpp
--- a/src/share/vm/memory/cardTableModRefBS.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/memory/cardTableModRefBS.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -429,7 +429,7 @@
                                                                  OopsInGenClosure* cl,
                                                                  CardTableRS* ct) {
   if (!mr.is_empty()) {
-    // Caller (process_strong_roots()) claims that all GC threads
+    // Caller (process_roots()) claims that all GC threads
     // execute this call.  With UseDynamicNumberOfGCThreads now all
     // active GC threads execute this call.  The number of active GC
     // threads needs to be passed to par_non_clean_card_iterate_work()
@@ -438,7 +438,7 @@
     // This is an example of where n_par_threads() is used instead
     // of workers()->active_workers().  n_par_threads can be set to 0 to
     // turn off parallelism.  For example when this code is called as
-    // part of verification and SharedHeap::process_strong_roots() is being
+    // part of verification and SharedHeap::process_roots() is being
     // used, then n_par_threads() may have been set to 0.  active_workers
     // is not overloaded with the meaning that it is a switch to disable
     // parallelism and so keeps the meaning of the number of
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/defNewGeneration.cpp
--- a/src/share/vm/memory/defNewGeneration.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/memory/defNewGeneration.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -42,6 +42,7 @@
 #include "oops/instanceRefKlass.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/java.hpp"
+#include "runtime/prefetch.inline.hpp"
 #include "runtime/thread.inline.hpp"
 #include "utilities/copy.hpp"
 #include "utilities/stack.inline.hpp"
@@ -584,7 +585,7 @@
 
   init_assuming_no_promotion_failure();
 
-  GCTraceTime t1(GCCauseString("GC", gch->gc_cause()), PrintGC && !PrintGCDetails, true, NULL);
+  GCTraceTime t1(GCCauseString("GC", gch->gc_cause()), PrintGC && !PrintGCDetails, true, NULL, gc_tracer.gc_id());
   // Capture heap used before collection (for printing).
   size_t gch_prev_used = gch->used();
 
@@ -612,6 +613,9 @@
 
   KlassScanClosure klass_scan_closure(&fsc_with_no_gc_barrier,
                                       gch->rem_set()->klass_rem_set());
+  CLDToKlassAndOopClosure cld_scan_closure(&klass_scan_closure,
+                                           &fsc_with_no_gc_barrier,
+                                           false);
 
   set_promo_failure_scan_stack_closure(&fsc_with_no_gc_barrier);
   FastEvacuateFollowersClosure evacuate_followers(gch, _level, this,
@@ -621,18 +625,15 @@
   assert(gch->no_allocs_since_save_marks(0),
          "save marks have not been newly set.");
 
-  int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;
-
-  gch->gen_process_strong_roots(_level,
-                                true,  // Process younger gens, if any,
-                                       // as strong roots.
-                                true,  // activate StrongRootsScope
-                                true,  // is scavenging
-                                SharedHeap::ScanningOption(so),
-                                &fsc_with_no_gc_barrier,
-                                true,   // walk *all* scavengable nmethods
-                                &fsc_with_gc_barrier,
-                                &klass_scan_closure);
+  gch->gen_process_roots(_level,
+                         true,  // Process younger gens, if any,
+                                // as strong roots.
+                         true,  // activate StrongRootsScope
+                         SharedHeap::SO_ScavengeCodeCache,
+                         GenCollectedHeap::StrongAndWeakRoots,
+                         &fsc_with_no_gc_barrier,
+                         &fsc_with_gc_barrier,
+                         &cld_scan_closure);
 
   // "evacuate followers".
   evacuate_followers.do_void();
@@ -642,7 +643,7 @@
   rp->setup_policy(clear_all_soft_refs);
   const ReferenceProcessorStats& stats =
   rp->process_discovered_references(&is_alive, &keep_alive, &evacuate_followers,
-                                    NULL, _gc_timer);
+                                    NULL, _gc_timer, gc_tracer.gc_id());
   gc_tracer.report_gc_reference_stats(stats);
 
   if (!_promotion_failed) {
@@ -788,7 +789,7 @@
 
   // Try allocating obj in to-space (unless too old)
   if (old->age() < tenuring_threshold()) {
-    obj = (oop) to()->allocate(s);
+    obj = (oop) to()->allocate_aligned(s);
   }
 
   // Otherwise try allocating obj tenured
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/gcLocker.cpp
--- a/src/share/vm/memory/gcLocker.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/memory/gcLocker.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -26,6 +26,7 @@
 #include "memory/gcLocker.inline.hpp"
 #include "memory/resourceArea.hpp"
 #include "memory/sharedHeap.hpp"
+#include "runtime/thread.inline.hpp"
 
 volatile jint GC_locker::_jni_lock_count = 0;
 volatile bool GC_locker::_needs_gc       = false;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/genCollectedHeap.cpp
--- a/src/share/vm/memory/genCollectedHeap.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/memory/genCollectedHeap.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -28,6 +28,7 @@
 #include "classfile/vmSymbols.hpp"
 #include "code/icBuffer.hpp"
 #include "gc_implementation/shared/collectorCounters.hpp"
+#include "gc_implementation/shared/gcTrace.hpp"
 #include "gc_implementation/shared/gcTraceTime.hpp"
 #include "gc_implementation/shared/vmGCOperations.hpp"
 #include "gc_interface/collectedHeap.inline.hpp"
@@ -60,8 +61,8 @@
 GenCollectedHeap* GenCollectedHeap::_gch;
 NOT_PRODUCT(size_t GenCollectedHeap::_skip_header_HeapWords = 0;)
 
-// The set of potentially parallel tasks in strong root scanning.
-enum GCH_process_strong_roots_tasks {
+// The set of potentially parallel tasks in root scanning.
+enum GCH_strong_roots_tasks {
   // We probably want to parallelize both of these internally, but for now...
   GCH_PS_younger_gens,
   // Leave this one last.
@@ -71,11 +72,11 @@
 GenCollectedHeap::GenCollectedHeap(GenCollectorPolicy *policy) :
   SharedHeap(policy),
   _gen_policy(policy),
-  _gen_process_strong_tasks(new SubTasksDone(GCH_PS_NumElements)),
+  _gen_process_roots_tasks(new SubTasksDone(GCH_PS_NumElements)),
   _full_collections_completed(0)
 {
-  if (_gen_process_strong_tasks == NULL ||
-      !_gen_process_strong_tasks->valid()) {
+  if (_gen_process_roots_tasks == NULL ||
+      !_gen_process_roots_tasks->valid()) {
     vm_exit_during_initialization("Failed necessary allocation.");
   }
   assert(policy != NULL, "Sanity check");
@@ -385,7 +386,9 @@
     const char* gc_cause_prefix = complete ? "Full GC" : "GC";
     gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
-    GCTraceTime t(GCCauseString(gc_cause_prefix, gc_cause()), PrintGCDetails, false, NULL);
+    // The PrintGCDetails logging starts before we have incremented the GC id. We will do that later
+    // so we can assume here that the next GC id is what we want.
+    GCTraceTime t(GCCauseString(gc_cause_prefix, gc_cause()), PrintGCDetails, false, NULL, GCId::peek());
 
     gc_prologue(complete);
     increment_total_collections(complete);
@@ -418,7 +421,9 @@
         }
         // Timer for individual generations. Last argument is false: no CR
         // FIXME: We should try to start the timing earlier to cover more of the GC pause
-        GCTraceTime t1(_gens[i]->short_name(), PrintGCDetails, false, NULL);
+        // The PrintGCDetails logging starts before we have incremented the GC id. We will do that later
+        // so we can assume here that the next GC id is what we want.
+        GCTraceTime t1(_gens[i]->short_name(), PrintGCDetails, false, NULL, GCId::peek());
         TraceCollectorStats tcs(_gens[i]->counters());
         TraceMemoryManagerStats tmms(_gens[i]->kind(),gc_cause());
 
@@ -585,33 +590,29 @@
 
 void GenCollectedHeap::set_par_threads(uint t) {
   SharedHeap::set_par_threads(t);
-  _gen_process_strong_tasks->set_n_threads(t);
+  _gen_process_roots_tasks->set_n_threads(t);
 }
 
 void GenCollectedHeap::
-gen_process_strong_roots(int level,
-                         bool younger_gens_as_roots,
-                         bool activate_scope,
-                         bool is_scavenging,
-                         SharedHeap::ScanningOption so,
-                         OopsInGenClosure* not_older_gens,
-                         bool do_code_roots,
-                         OopsInGenClosure* older_gens,
-                         KlassClosure* klass_closure) {
-  // General strong roots.
+gen_process_roots(int level,
+                  bool younger_gens_as_roots,
+                  bool activate_scope,
+                  SharedHeap::ScanningOption so,
+                  OopsInGenClosure* not_older_gens,
+                  OopsInGenClosure* weak_roots,
+                  OopsInGenClosure* older_gens,
+                  CLDClosure* cld_closure,
+                  CLDClosure* weak_cld_closure,
+                  CodeBlobClosure* code_closure) {
 
-  if (!do_code_roots) {
-    SharedHeap::process_strong_roots(activate_scope, is_scavenging, so,
-                                     not_older_gens, NULL, klass_closure);
-  } else {
-    bool do_code_marking = (activate_scope || nmethod::oops_do_marking_is_active());
-    CodeBlobToOopClosure code_roots(not_older_gens, /*do_marking=*/ do_code_marking);
-    SharedHeap::process_strong_roots(activate_scope, is_scavenging, so,
-                                     not_older_gens, &code_roots, klass_closure);
-  }
+  // General roots.
+  SharedHeap::process_roots(activate_scope, so,
+                            not_older_gens, weak_roots,
+                            cld_closure, weak_cld_closure,
+                            code_closure);
 
   if (younger_gens_as_roots) {
-    if (!_gen_process_strong_tasks->is_task_claimed(GCH_PS_younger_gens)) {
+    if (!_gen_process_roots_tasks->is_task_claimed(GCH_PS_younger_gens)) {
       for (int i = 0; i < level; i++) {
         not_older_gens->set_generation(_gens[i]);
         _gens[i]->oop_iterate(not_older_gens);
@@ -627,12 +628,42 @@
     older_gens->reset_generation();
   }
 
-  _gen_process_strong_tasks->all_tasks_completed();
+  _gen_process_roots_tasks->all_tasks_completed();
 }
 
-void GenCollectedHeap::gen_process_weak_roots(OopClosure* root_closure,
-                                              CodeBlobClosure* code_roots) {
-  SharedHeap::process_weak_roots(root_closure, code_roots);
+void GenCollectedHeap::
+gen_process_roots(int level,
+                  bool younger_gens_as_roots,
+                  bool activate_scope,
+                  SharedHeap::ScanningOption so,
+                  bool only_strong_roots,
+                  OopsInGenClosure* not_older_gens,
+                  OopsInGenClosure* older_gens,
+                  CLDClosure* cld_closure) {
+
+  const bool is_adjust_phase = !only_strong_roots && !younger_gens_as_roots;
+
+  bool is_moving_collection = false;
+  if (level == 0 || is_adjust_phase) {
+    // young collections are always moving
+    is_moving_collection = true;
+  }
+
+  MarkingCodeBlobClosure mark_code_closure(not_older_gens, is_moving_collection);
+  CodeBlobClosure* code_closure = &mark_code_closure;
+
+  gen_process_roots(level,
+                    younger_gens_as_roots,
+                    activate_scope, so,
+                    not_older_gens, only_strong_roots ? NULL : not_older_gens,
+                    older_gens,
+                    cld_closure, only_strong_roots ? NULL : cld_closure,
+                    code_closure);
+
+}
+
+void GenCollectedHeap::gen_process_weak_roots(OopClosure* root_closure) {
+  SharedHeap::process_weak_roots(root_closure);
   // "Local" "weak" refs
   for (int i = 0; i < _n_gens; i++) {
     _gens[i]->ref_processor()->weak_oops_do(root_closure);
@@ -851,12 +882,6 @@
   }
 }
 
-void GenCollectedHeap::oop_iterate(MemRegion mr, ExtendedOopClosure* cl) {
-  for (int i = 0; i < _n_gens; i++) {
-    _gens[i]->oop_iterate(mr, cl);
-  }
-}
-
 void GenCollectedHeap::object_iterate(ObjectClosure* cl) {
   for (int i = 0; i < _n_gens; i++) {
     _gens[i]->object_iterate(cl);
@@ -1074,7 +1099,7 @@
   guarantee(_n_gens = 2, "Wrong number of generations");
   Generation* old_gen = _gens[1];
   // Start by compacting into same gen.
-  CompactPoint cp(old_gen, NULL, NULL);
+  CompactPoint cp(old_gen);
   old_gen->prepare_for_compaction(&cp);
   Generation* young_gen = _gens[0];
   young_gen->prepare_for_compaction(&cp);
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/genCollectedHeap.hpp
--- a/src/share/vm/memory/genCollectedHeap.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/memory/genCollectedHeap.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -78,9 +78,9 @@
   unsigned int _full_collections_completed;
 
   // Data structure for claiming the (potentially) parallel tasks in
-  // (gen-specific) strong roots processing.
-  SubTasksDone* _gen_process_strong_tasks;
-  SubTasksDone* gen_process_strong_tasks() { return _gen_process_strong_tasks; }
+  // (gen-specific) roots processing.
+  SubTasksDone* _gen_process_roots_tasks;
+  SubTasksDone* gen_process_roots_tasks() { return _gen_process_roots_tasks; }
 
   // In block contents verification, the number of header words to skip
   NOT_PRODUCT(static size_t _skip_header_HeapWords;)
@@ -220,7 +220,6 @@
 
   // Iteration functions.
   void oop_iterate(ExtendedOopClosure* cl);
-  void oop_iterate(MemRegion mr, ExtendedOopClosure* cl);
   void object_iterate(ObjectClosure* cl);
   void safe_object_iterate(ObjectClosure* cl);
   Space* space_containing(const void* addr) const;
@@ -412,26 +411,35 @@
   // The "so" argument determines which of the roots
   // the closure is applied to:
   // "SO_None" does none;
-  // "SO_AllClasses" applies the closure to all entries in the SystemDictionary;
-  // "SO_SystemClasses" to all the "system" classes and loaders;
-  // "SO_Strings" applies the closure to all entries in the StringTable.
-  void gen_process_strong_roots(int level,
-                                bool younger_gens_as_roots,
-                                // The remaining arguments are in an order
-                                // consistent with SharedHeap::process_strong_roots:
-                                bool activate_scope,
-                                bool is_scavenging,
-                                SharedHeap::ScanningOption so,
-                                OopsInGenClosure* not_older_gens,
-                                bool do_code_roots,
-                                OopsInGenClosure* older_gens,
-                                KlassClosure* klass_closure);
+ private:
+  void gen_process_roots(int level,
+                         bool younger_gens_as_roots,
+                         bool activate_scope,
+                         SharedHeap::ScanningOption so,
+                         OopsInGenClosure* not_older_gens,
+                         OopsInGenClosure* weak_roots,
+                         OopsInGenClosure* older_gens,
+                         CLDClosure* cld_closure,
+                         CLDClosure* weak_cld_closure,
+                         CodeBlobClosure* code_closure);
 
-  // Apply "blk" to all the weak roots of the system.  These include
-  // JNI weak roots, the code cache, system dictionary, symbol table,
-  // string table, and referents of reachable weak refs.
-  void gen_process_weak_roots(OopClosure* root_closure,
-                              CodeBlobClosure* code_roots);
+ public:
+  static const bool StrongAndWeakRoots = false;
+  static const bool StrongRootsOnly    = true;
+
+  void gen_process_roots(int level,
+                         bool younger_gens_as_roots,
+                         bool activate_scope,
+                         SharedHeap::ScanningOption so,
+                         bool only_strong_roots,
+                         OopsInGenClosure* not_older_gens,
+                         OopsInGenClosure* older_gens,
+                         CLDClosure* cld_closure);
+
+  // Apply "root_closure" to all the weak roots of the system.
+  // These include JNI weak roots, string table,
+  // and referents of reachable weak refs.
+  void gen_process_weak_roots(OopClosure* root_closure);
 
   // Set the saved marks of generations, if that makes sense.
   // In particular, if any generation might iterate over the oops
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/genMarkSweep.cpp
--- a/src/share/vm/memory/genMarkSweep.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/memory/genMarkSweep.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -69,7 +69,7 @@
   _ref_processor = rp;
   rp->setup_policy(clear_all_softrefs);
 
-  GCTraceTime t1(GCCauseString("Full GC", gch->gc_cause()), PrintGC && !PrintGCDetails, true, NULL);
+  GCTraceTime t1(GCCauseString("Full GC", gch->gc_cause()), PrintGC && !PrintGCDetails, true, NULL, _gc_tracer->gc_id());
 
   gch->trace_heap_before_gc(_gc_tracer);
 
@@ -193,7 +193,7 @@
 void GenMarkSweep::mark_sweep_phase1(int level,
                                   bool clear_all_softrefs) {
   // Recursively traverse all live objects and mark them
-  GCTraceTime tm("phase 1", PrintGC && Verbose, true, _gc_timer);
+  GCTraceTime tm("phase 1", PrintGC && Verbose, true, _gc_timer, _gc_tracer->gc_id());
   trace(" 1");
 
   GenCollectedHeap* gch = GenCollectedHeap::heap();
@@ -207,22 +207,21 @@
   // Need new claim bits before marking starts.
   ClassLoaderDataGraph::clear_claimed_marks();
 
-  gch->gen_process_strong_roots(level,
-                                false, // Younger gens are not roots.
-                                true,  // activate StrongRootsScope
-                                false, // not scavenging
-                                SharedHeap::SO_SystemClasses,
-                                &follow_root_closure,
-                                true,   // walk code active on stacks
-                                &follow_root_closure,
-                                &follow_klass_closure);
+  gch->gen_process_roots(level,
+                         false, // Younger gens are not roots.
+                         true,  // activate StrongRootsScope
+                         SharedHeap::SO_None,
+                         GenCollectedHeap::StrongRootsOnly,
+                         &follow_root_closure,
+                         &follow_root_closure,
+                         &follow_cld_closure);
 
   // Process reference objects found during marking
   {
     ref_processor()->setup_policy(clear_all_softrefs);
     const ReferenceProcessorStats& stats =
       ref_processor()->process_discovered_references(
-        &is_alive, &keep_alive, &follow_stack_closure, NULL, _gc_timer);
+        &is_alive, &keep_alive, &follow_stack_closure, NULL, _gc_timer, _gc_tracer->gc_id());
     gc_tracer()->report_gc_reference_stats(stats);
   }
 
@@ -264,7 +263,7 @@
 
   GenCollectedHeap* gch = GenCollectedHeap::heap();
 
-  GCTraceTime tm("phase 2", PrintGC && Verbose, true, _gc_timer);
+  GCTraceTime tm("phase 2", PrintGC && Verbose, true, _gc_timer, _gc_tracer->gc_id());
   trace("2");
 
   gch->prepare_for_compaction();
@@ -281,7 +280,7 @@
   GenCollectedHeap* gch = GenCollectedHeap::heap();
 
   // Adjust the pointers to reflect the new locations
-  GCTraceTime tm("phase 3", PrintGC && Verbose, true, _gc_timer);
+  GCTraceTime tm("phase 3", PrintGC && Verbose, true, _gc_timer, _gc_tracer->gc_id());
   trace("3");
 
   // Need new claim bits for the pointer adjustment tracing.
@@ -293,22 +292,16 @@
   // are run.
   adjust_pointer_closure.set_orig_generation(gch->get_gen(level));
 
-  gch->gen_process_strong_roots(level,
-                                false, // Younger gens are not roots.
-                                true,  // activate StrongRootsScope
-                                false, // not scavenging
-                                SharedHeap::SO_AllClasses,
-                                &adjust_pointer_closure,
-                                false, // do not walk code
-                                &adjust_pointer_closure,
-                                &adjust_klass_closure);
+  gch->gen_process_roots(level,
+                         false, // Younger gens are not roots.
+                         true,  // activate StrongRootsScope
+                         SharedHeap::SO_AllCodeCache,
+                         GenCollectedHeap::StrongAndWeakRoots,
+                         &adjust_pointer_closure,
+                         &adjust_pointer_closure,
+                         &adjust_cld_closure);
 
-  // Now adjust pointers in remaining weak roots.  (All of which should
-  // have been cleared if they pointed to non-surviving objects.)
-  CodeBlobToOopClosure adjust_code_pointer_closure(&adjust_pointer_closure,
-                                                   /*do_marking=*/ false);
-  gch->gen_process_weak_roots(&adjust_pointer_closure,
-                              &adjust_code_pointer_closure);
+  gch->gen_process_weak_roots(&adjust_pointer_closure);
 
   adjust_marks();
   GenAdjustPointersClosure blk;
@@ -336,7 +329,7 @@
   // to use a higher index (saved from phase2) when verifying perm_gen.
   GenCollectedHeap* gch = GenCollectedHeap::heap();
 
-  GCTraceTime tm("phase 4", PrintGC && Verbose, true, _gc_timer);
+  GCTraceTime tm("phase 4", PrintGC && Verbose, true, _gc_timer, _gc_tracer->gc_id());
   trace("4");
 
   GenCompactClosure blk;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/generation.cpp
--- a/src/share/vm/memory/generation.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/memory/generation.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -297,22 +297,16 @@
 
 class GenerationOopIterateClosure : public SpaceClosure {
  public:
-  ExtendedOopClosure* cl;
-  MemRegion mr;
+  ExtendedOopClosure* _cl;
   virtual void do_space(Space* s) {
-    s->oop_iterate(mr, cl);
+    s->oop_iterate(_cl);
   }
-  GenerationOopIterateClosure(ExtendedOopClosure* _cl, MemRegion _mr) :
-    cl(_cl), mr(_mr) {}
+  GenerationOopIterateClosure(ExtendedOopClosure* cl) :
+    _cl(cl) {}
 };
 
 void Generation::oop_iterate(ExtendedOopClosure* cl) {
-  GenerationOopIterateClosure blk(cl, _reserved);
-  space_iterate(&blk);
-}
-
-void Generation::oop_iterate(MemRegion mr, ExtendedOopClosure* cl) {
-  GenerationOopIterateClosure blk(cl, mr);
+  GenerationOopIterateClosure blk(cl);
   space_iterate(&blk);
 }
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/generation.hpp
--- a/src/share/vm/memory/generation.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/memory/generation.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -543,10 +543,6 @@
   // generation, calling "cl.do_oop" on each.
   virtual void oop_iterate(ExtendedOopClosure* cl);
 
-  // Same as above, restricted to the intersection of a memory region and
-  // the generation.
-  virtual void oop_iterate(MemRegion mr, ExtendedOopClosure* cl);
-
   // Iterate over all objects in the generation, calling "cl.do_object" on
   // each.
   virtual void object_iterate(ObjectClosure* cl);
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/guardedMemory.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/guardedMemory.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+#include "precompiled.hpp"
+#include "memory/allocation.hpp"
+#include "memory/allocation.inline.hpp"
+#include "memory/guardedMemory.hpp"
+#include "runtime/os.hpp"
+
+void* GuardedMemory::wrap_copy(const void* ptr, const size_t len, const void* tag) {
+  size_t total_sz = GuardedMemory::get_total_size(len);
+  void* outerp = os::malloc(total_sz, mtInternal);
+  if (outerp != NULL) {
+    GuardedMemory guarded(outerp, len, tag);
+    void* innerp = guarded.get_user_ptr();
+    memcpy(innerp, ptr, len);
+    return innerp;
+  }
+  return NULL; // OOM
+}
+
+bool GuardedMemory::free_copy(void* p) {
+  if (p == NULL) {
+    return true;
+  }
+  GuardedMemory guarded((u_char*)p);
+  bool verify_ok = guarded.verify_guards();
+
+  /* always attempt to free, pass problem on to any nested memchecker */
+  os::free(guarded.release_for_freeing());
+
+  return verify_ok;
+}
+
+void GuardedMemory::print_on(outputStream* st) const {
+  if (_base_addr == NULL) {
+    st->print_cr("GuardedMemory(" PTR_FORMAT ") not associated to any memory", p2i(this));
+    return;
+  }
+  st->print_cr("GuardedMemory(" PTR_FORMAT ") base_addr=" PTR_FORMAT
+      " tag=" PTR_FORMAT " user_size=" SIZE_FORMAT " user_data=" PTR_FORMAT,
+      p2i(this), p2i(_base_addr), p2i(get_tag()), get_user_size(), p2i(get_user_ptr()));
+
+  Guard* guard = get_head_guard();
+  st->print_cr("  Header guard @" PTR_FORMAT " is %s", p2i(guard), (guard->verify() ? "OK" : "BROKEN"));
+  guard = get_tail_guard();
+  st->print_cr("  Trailer guard @" PTR_FORMAT " is %s", p2i(guard), (guard->verify() ? "OK" : "BROKEN"));
+
+  u_char udata = *get_user_ptr();
+  switch (udata) {
+  case uninitBlockPad:
+    st->print_cr("  User data appears unused");
+    break;
+  case freeBlockPad:
+    st->print_cr("  User data appears to have been freed");
+    break;
+  default:
+    st->print_cr("  User data appears to be in use");
+    break;
+  }
+}
+
+// test code...
+
+#ifndef PRODUCT
+
+static void guarded_memory_test_check(void* p, size_t sz, void* tag) {
+  assert(p != NULL, "NULL pointer given to check");
+  u_char* c = (u_char*) p;
+  GuardedMemory guarded(c);
+  assert(guarded.get_tag() == tag, "Tag is not the same as supplied");
+  assert(guarded.get_user_ptr() == c, "User pointer is not the same as supplied");
+  assert(guarded.get_user_size() == sz, "User size is not the same as supplied");
+  assert(guarded.verify_guards(), "Guard broken");
+}
+
+void GuardedMemory::test_guarded_memory() {
+  // Test the basic characteristics...
+  size_t total_sz = GuardedMemory::get_total_size(1);
+  assert(total_sz > 1 && total_sz >= (sizeof(GuardHeader) + 1 + sizeof(Guard)), "Unexpected size");
+  u_char* basep = (u_char*) os::malloc(total_sz, mtInternal);
+
+  GuardedMemory guarded(basep, 1, (void*)0xf000f000);
+
+  assert(*basep == badResourceValue, "Expected guard in the form of badResourceValue");
+  u_char* userp = guarded.get_user_ptr();
+  assert(*userp == uninitBlockPad, "Expected uninitialized data in the form of uninitBlockPad");
+  guarded_memory_test_check(userp, 1, (void*)0xf000f000);
+
+  void* freep = guarded.release_for_freeing();
+  assert((u_char*)freep == basep, "Expected the same pointer guard was ");
+  assert(*userp == freeBlockPad, "Expected user data to be free block padded");
+  assert(!guarded.verify_guards(), "Expected failed");
+  os::free(freep);
+
+  // Test a number of odd sizes...
+  size_t sz = 0;
+  do {
+    void* p = os::malloc(GuardedMemory::get_total_size(sz), mtInternal);
+    void* up = guarded.wrap_with_guards(p, sz, (void*)1);
+    memset(up, 0, sz);
+    guarded_memory_test_check(up, sz, (void*)1);
+    os::free(guarded.release_for_freeing());
+    sz = (sz << 4) + 1;
+  } while (sz < (256 * 1024));
+
+  // Test buffer overrun into head...
+  basep = (u_char*) os::malloc(GuardedMemory::get_total_size(1), mtInternal);
+  guarded.wrap_with_guards(basep, 1);
+  *basep = 0;
+  assert(!guarded.verify_guards(), "Expected failure");
+  os::free(basep);
+
+  // Test buffer overrun into tail with a number of odd sizes...
+  sz = 1;
+  do {
+    void* p = os::malloc(GuardedMemory::get_total_size(sz), mtInternal);
+    void* up = guarded.wrap_with_guards(p, sz, (void*)1);
+    memset(up, 0, sz + 1); // Buffer-overwrite (within guard)
+    assert(!guarded.verify_guards(), "Guard was not broken as expected");
+    os::free(guarded.release_for_freeing());
+    sz = (sz << 4) + 1;
+  } while (sz < (256 * 1024));
+
+  // Test wrap_copy/wrap_free...
+  assert(GuardedMemory::free_copy(NULL), "Expected free NULL to be OK");
+
+  const char* str = "Check my bounds out";
+  size_t str_sz = strlen(str) + 1;
+  char* str_copy = (char*) GuardedMemory::wrap_copy(str, str_sz);
+  guarded_memory_test_check(str_copy, str_sz, NULL);
+  assert(strcmp(str, str_copy) == 0, "Not identical copy");
+  assert(GuardedMemory::free_copy(str_copy), "Free copy failed to verify");
+
+  void* no_data = NULL;
+  void* no_data_copy = GuardedMemory::wrap_copy(no_data, 0);
+  assert(GuardedMemory::free_copy(no_data_copy), "Expected valid guards even for no data copy");
+}
+
+#endif // !PRODUCT
+
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/guardedMemory.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/guardedMemory.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,326 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_MEMORY_GUARDED_MEMORY_HPP
+#define SHARE_VM_MEMORY_GUARDED_MEMORY_HPP
+
+#include "memory/allocation.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+/**
+ * Guarded memory for detecting buffer overrun.
+ *
+ * Allows allocations to be wrapped with padded bytes of a known byte pattern,
+ * that is a "guard". Guard patterns may be verified to detect buffer overruns.
+ *
+ * Primarily used by "debug malloc" and "checked JNI".
+ *
+ * Memory layout:
+ *
+ * |Offset             | Content              | Description    |
+ * |------------------------------------------------------------
+ * |base_addr          | 0xABABABABABABABAB   | Head guard     |
+ * |+16                | <size_t:user_size>   | User data size |
+ * |+sizeof(uintptr_t) | <tag>                | Tag word       |
+ * |+sizeof(void*)     | 0xF1 <user_data> (   | User data      |
+ * |+user_size         | 0xABABABABABABABAB   | Tail guard     |
+ * -------------------------------------------------------------
+ *
+ * Where:
+ *  - guard padding uses "badResourceValue" (0xAB)
+ *  - tag word is general purpose
+ *  - user data
+ *    -- initially padded with "uninitBlockPad" (0xF1),
+ *    -- to "freeBlockPad" (0xBA), when freed
+ *
+ * Usage:
+ *
+ * * Allocations: one may wrap allocations with guard memory:
+ * <code>
+ *   Thing* alloc_thing() {
+ *     void* mem = user_alloc_fn(GuardedMemory::get_total_size(sizeof(thing)));
+ *     GuardedMemory guarded(mem, sizeof(thing));
+ *     return (Thing*) guarded.get_user_ptr();
+ *   }
+ * </code>
+ * * Verify: memory guards are still in tact
+ * <code>
+ *   bool verify_thing(Thing* thing) {
+ *     GuardedMemory guarded((void*)thing);
+ *     return guarded.verify_guards();
+ *   }
+ * </code>
+ * * Free: one may mark bytes as freed (further debugging support)
+ * <code>
+ *   void free_thing(Thing* thing) {
+ *    GuardedMemory guarded((void*)thing);
+ *    assert(guarded.verify_guards(), "Corrupt thing");
+ *    user_free_fn(guards.release_for_freeing();
+ *   }
+ * </code>
+ */
+class GuardedMemory : StackObj { // Wrapper on stack
+
+  // Private inner classes for memory layout...
+
+protected:
+
+  /**
+   * Guard class for header and trailer known pattern to test for overwrites.
+   */
+  class Guard { // Class for raw memory (no vtbl allowed)
+    friend class GuardedMemory;
+   protected:
+    enum {
+      GUARD_SIZE = 16
+    };
+
+    u_char _guard[GUARD_SIZE];
+
+   public:
+
+    void build() {
+      u_char* c = _guard; // Possibly unaligned if tail guard
+      u_char* end = c + GUARD_SIZE;
+      while (c < end) {
+        *c = badResourceValue;
+        c++;
+      }
+    }
+
+    bool verify() const {
+      u_char* c = (u_char*) _guard;
+      u_char* end = c + GUARD_SIZE;
+      while (c < end) {
+        if (*c != badResourceValue) {
+          return false;
+        }
+        c++;
+      }
+      return true;
+    }
+
+  }; // GuardedMemory::Guard
+
+  /**
+   * Header guard and size
+   */
+  class GuardHeader : Guard {
+    friend class GuardedMemory;
+   protected:
+    // Take care in modifying fields here, will effect alignment
+    // e.g. x86 ABI 16 byte stack alignment
+    union {
+      uintptr_t __unused_full_word1;
+      size_t _user_size;
+    };
+    void* _tag;
+   public:
+    void set_user_size(const size_t usz) { _user_size = usz; }
+    size_t get_user_size() const { return _user_size; }
+
+    void set_tag(const void* tag) { _tag = (void*) tag; }
+    void* get_tag() const { return _tag; }
+
+  }; // GuardedMemory::GuardHeader
+
+  // Guarded Memory...
+
+ protected:
+  u_char* _base_addr;
+
+ public:
+
+  /**
+   * Create new guarded memory.
+   *
+   * Wraps, starting at the given "base_ptr" with guards. Use "get_user_ptr()"
+   * to return a pointer suitable for user data.
+   *
+   * @param base_ptr  allocation wishing to be wrapped, must be at least "GuardedMemory::get_total_size()" bytes.
+   * @param user_size the size of the user data to be wrapped.
+   * @param tag       optional general purpose tag.
+   */
+  GuardedMemory(void* base_ptr, const size_t user_size, const void* tag = NULL) {
+    wrap_with_guards(base_ptr, user_size, tag);
+  }
+
+  /**
+   * Wrap existing guarded memory.
+   *
+   * To use this constructor, one must have created guarded memory with
+   * "GuardedMemory(void*, size_t, void*)" (or indirectly via helper, e.g. "wrap_copy()").
+   *
+   * @param user_p  existing wrapped memory.
+   */
+  GuardedMemory(void* userp) {
+    u_char* user_ptr = (u_char*) userp;
+    assert((uintptr_t)user_ptr > (sizeof(GuardHeader) + 0x1000), "Invalid pointer");
+    _base_addr = (user_ptr - sizeof(GuardHeader));
+  }
+
+  /**
+   * Create new guarded memory.
+   *
+   * Wraps, starting at the given "base_ptr" with guards. Allows reuse of stack allocated helper.
+   *
+   * @param base_ptr  allocation wishing to be wrapped, must be at least "GuardedMemory::get_total_size()" bytes.
+   * @param user_size the size of the user data to be wrapped.
+   * @param tag       optional general purpose tag.
+   *
+   * @return user data pointer (inner pointer to supplied "base_ptr").
+   */
+  void* wrap_with_guards(void* base_ptr, size_t user_size, const void* tag = NULL) {
+    assert(base_ptr != NULL, "Attempt to wrap NULL with memory guard");
+    _base_addr = (u_char*)base_ptr;
+    get_head_guard()->build();
+    get_head_guard()->set_user_size(user_size);
+    get_tail_guard()->build();
+    set_tag(tag);
+    set_user_bytes(uninitBlockPad);
+    assert(verify_guards(), "Expected valid memory guards");
+    return get_user_ptr();
+  }
+
+  /**
+   * Verify head and tail guards.
+   *
+   * @return true if guards are intact, false would indicate a buffer overrun.
+   */
+  bool verify_guards() const {
+    if (_base_addr != NULL) {
+      return (get_head_guard()->verify() && get_tail_guard()->verify());
+    }
+    return false;
+  }
+
+  /**
+   * Set the general purpose tag.
+   *
+   * @param tag general purpose tag.
+   */
+  void set_tag(const void* tag) { get_head_guard()->set_tag(tag); }
+
+  /**
+   * Return the general purpose tag.
+   *
+   * @return the general purpose tag, defaults to NULL.
+   */
+  void* get_tag() const { return get_head_guard()->get_tag(); }
+
+  /**
+   * Return the size of the user data.
+   *
+   * @return the size of the user data.
+   */
+  size_t get_user_size() const {
+    assert(_base_addr, "Not wrapping any memory");
+    return get_head_guard()->get_user_size();
+  }
+
+  /**
+   * Return the user data pointer.
+   *
+   * @return the user data pointer.
+   */
+  u_char* get_user_ptr() const {
+    assert(_base_addr, "Not wrapping any memory");
+    return _base_addr + sizeof(GuardHeader);
+  }
+
+  /**
+   * Release the wrapped pointer for resource freeing.
+   *
+   * Pads the user data with "freeBlockPad", and dis-associates the helper.
+   *
+   * @return the original base pointer used to wrap the data.
+   */
+  void* release_for_freeing() {
+    set_user_bytes(freeBlockPad);
+    return release();
+  }
+
+  /**
+   * Dis-associate the help from the original base address.
+   *
+   * @return the original base pointer used to wrap the data.
+   */
+  void* release() {
+    void* p = (void*) _base_addr;
+    _base_addr = NULL;
+    return p;
+  }
+
+  virtual void print_on(outputStream* st) const;
+
+ protected:
+  GuardHeader*  get_head_guard() const { return (GuardHeader*) _base_addr; }
+  Guard*        get_tail_guard() const { return (Guard*) (get_user_ptr() + get_user_size()); };
+  void set_user_bytes(u_char ch) {
+    memset(get_user_ptr(), ch, get_user_size());
+  }
+
+public:
+  /**
+   * Return the total size required for wrapping the given user size.
+   *
+   * @return the total size required for wrapping the given user size.
+   */
+  static size_t get_total_size(size_t user_size) {
+    size_t total_size = sizeof(GuardHeader) + user_size + sizeof(Guard);
+    assert(total_size > user_size, "Unexpected wrap-around");
+    return total_size;
+  }
+
+  // Helper functions...
+
+  /**
+   * Wrap a copy of size "len" of "ptr".
+   *
+   * @param ptr the memory to be copied
+   * @param len the length of the copy
+   * @param tag optional general purpose tag (see GuardedMemory::get_tag())
+   *
+   * @return guarded wrapped memory pointer to the user area, or NULL if OOM.
+   */
+  static void* wrap_copy(const void* p, const size_t len, const void* tag = NULL);
+
+  /**
+   * Free wrapped copy.
+   *
+   * Frees memory copied with "wrap_copy()".
+   *
+   * @param p memory returned by "wrap_copy()".
+   *
+   * @return true if guards were verified as intact. false indicates a buffer overrun.
+   */
+  static bool free_copy(void* p);
+
+  // Testing...
+#ifndef PRODUCT
+  static void test_guarded_memory(void);
+#endif
+}; // GuardedMemory
+
+#endif // SHARE_VM_MEMORY_GUARDED_MEMORY_HPP
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/iterator.cpp
--- a/src/share/vm/memory/iterator.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/memory/iterator.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -27,6 +27,7 @@
 #include "oops/oop.inline.hpp"
 
 void KlassToOopClosure::do_klass(Klass* k) {
+  assert(_oop_closure != NULL, "Not initialized?");
   k->oops_do(_oop_closure);
 }
 
@@ -34,6 +35,10 @@
   cld->oops_do(_oop_closure, &_klass_closure, _must_claim_cld);
 }
 
+void CLDToKlassAndOopClosure::do_cld(ClassLoaderData* cld) {
+  cld->oops_do(_oop_closure, _klass_closure, _must_claim_cld);
+}
+
 void ObjectToOopClosure::do_object(oop obj) {
   obj->oop_iterate(_cl);
 }
@@ -42,6 +47,20 @@
   ShouldNotCallThis();
 }
 
+void CodeBlobToOopClosure::do_nmethod(nmethod* nm) {
+  nm->oops_do(_cl);
+  if (_fix_relocations) {
+    nm->fix_oop_relocations();
+  }
+}
+
+void CodeBlobToOopClosure::do_code_blob(CodeBlob* cb) {
+  nmethod* nm = cb->as_nmethod_or_null();
+  if (nm != NULL) {
+    do_nmethod(nm);
+  }
+}
+
 MarkingCodeBlobClosure::MarkScope::MarkScope(bool activate)
   : _active(activate)
 {
@@ -54,32 +73,7 @@
 
 void MarkingCodeBlobClosure::do_code_blob(CodeBlob* cb) {
   nmethod* nm = cb->as_nmethod_or_null();
-  if (nm == NULL)  return;
-  if (!nm->test_set_oops_do_mark()) {
-    NOT_PRODUCT(if (TraceScavenge)  nm->print_on(tty, "oops_do, 1st visit\n"));
-    do_newly_marked_nmethod(nm);
-  } else {
-    NOT_PRODUCT(if (TraceScavenge)  nm->print_on(tty, "oops_do, skipped on 2nd visit\n"));
+  if (nm != NULL && !nm->test_set_oops_do_mark()) {
+    do_nmethod(nm);
   }
 }
-
-void CodeBlobToOopClosure::do_newly_marked_nmethod(nmethod* nm) {
-  nm->oops_do(_cl, /*allow_zombie=*/ false);
-}
-
-void CodeBlobToOopClosure::do_code_blob(CodeBlob* cb) {
-  if (!_do_marking) {
-    nmethod* nm = cb->as_nmethod_or_null();
-    NOT_PRODUCT(if (TraceScavenge && Verbose && nm != NULL)  nm->print_on(tty, "oops_do, unmarked visit\n"));
-    // This assert won't work, since there are lots of mini-passes
-    // (mostly in debug mode) that co-exist with marking phases.
-    //assert(!(cb->is_nmethod() && ((nmethod*)cb)->test_oops_do_mark()), "found marked nmethod during mark-free phase");
-    if (nm != NULL) {
-      nm->oops_do(_cl);
-    }
-  } else {
-    MarkingCodeBlobClosure::do_code_blob(cb);
-  }
-}
-
-
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/iterator.hpp
--- a/src/share/vm/memory/iterator.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/memory/iterator.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -84,8 +84,8 @@
   //
   // Providing default implementations of the _nv functions unfortunately
   // removes the compile-time safeness, but reduces the clutter for the
-  // ExtendedOopClosures that don't need to walk the metadata. Currently,
-  // only CMS needs these.
+  // ExtendedOopClosures that don't need to walk the metadata.
+  // Currently, only CMS and G1 need these.
 
   virtual bool do_metadata() { return do_metadata_nv(); }
   bool do_metadata_v()       { return do_metadata(); }
@@ -128,17 +128,33 @@
   virtual void do_klass(Klass* k) = 0;
 };
 
+class CLDClosure : public Closure {
+ public:
+  virtual void do_cld(ClassLoaderData* cld) = 0;
+};
+
 class KlassToOopClosure : public KlassClosure {
+  friend class MetadataAwareOopClosure;
+  friend class MetadataAwareOopsInGenClosure;
+
   OopClosure* _oop_closure;
+
+  // Used when _oop_closure couldn't be set in an initialization list.
+  void initialize(OopClosure* oop_closure) {
+    assert(_oop_closure == NULL, "Should only be called once");
+    _oop_closure = oop_closure;
+  }
+
  public:
-  KlassToOopClosure(OopClosure* oop_closure) : _oop_closure(oop_closure) {}
+  KlassToOopClosure(OopClosure* oop_closure = NULL) : _oop_closure(oop_closure) {}
+
   virtual void do_klass(Klass* k);
 };
 
-class CLDToOopClosure {
-  OopClosure* _oop_closure;
+class CLDToOopClosure : public CLDClosure {
+  OopClosure*       _oop_closure;
   KlassToOopClosure _klass_closure;
-  bool _must_claim_cld;
+  bool              _must_claim_cld;
 
  public:
   CLDToOopClosure(OopClosure* oop_closure, bool must_claim_cld = true) :
@@ -149,6 +165,46 @@
   void do_cld(ClassLoaderData* cld);
 };
 
+class CLDToKlassAndOopClosure : public CLDClosure {
+  friend class SharedHeap;
+  friend class G1CollectedHeap;
+ protected:
+  OopClosure*   _oop_closure;
+  KlassClosure* _klass_closure;
+  bool          _must_claim_cld;
+ public:
+  CLDToKlassAndOopClosure(KlassClosure* klass_closure,
+                          OopClosure* oop_closure,
+                          bool must_claim_cld) :
+                              _oop_closure(oop_closure),
+                              _klass_closure(klass_closure),
+                              _must_claim_cld(must_claim_cld) {}
+  void do_cld(ClassLoaderData* cld);
+};
+
+// The base class for all concurrent marking closures,
+// that participates in class unloading.
+// It's used to proxy through the metadata to the oops defined in them.
+class MetadataAwareOopClosure: public ExtendedOopClosure {
+  KlassToOopClosure _klass_closure;
+
+ public:
+  MetadataAwareOopClosure() : ExtendedOopClosure() {
+    _klass_closure.initialize(this);
+  }
+  MetadataAwareOopClosure(ReferenceProcessor* rp) : ExtendedOopClosure(rp) {
+    _klass_closure.initialize(this);
+  }
+
+  virtual bool do_metadata()    { return do_metadata_nv(); }
+  inline  bool do_metadata_nv() { return true; }
+
+  virtual void do_klass(Klass* k);
+  void do_klass_nv(Klass* k);
+
+  virtual void do_class_loader_data(ClassLoaderData* cld);
+};
+
 // ObjectClosure is used for iterating through an object space
 
 class ObjectClosure : public Closure {
@@ -172,19 +228,6 @@
   ObjectToOopClosure(ExtendedOopClosure* cl) : _cl(cl) {}
 };
 
-// A version of ObjectClosure with "memory" (see _previous_address below)
-class UpwardsObjectClosure: public BoolObjectClosure {
-  HeapWord* _previous_address;
- public:
-  UpwardsObjectClosure() : _previous_address(NULL) { }
-  void set_previous(HeapWord* addr) { _previous_address = addr; }
-  HeapWord* previous()              { return _previous_address; }
-  // A return value of "true" can be used by the caller to decide
-  // if this object's end should *NOT* be recorded in
-  // _previous_address above.
-  virtual bool do_object_bm(oop obj, MemRegion mr) = 0;
-};
-
 // A version of ObjectClosure that is expected to be robust
 // in the face of possibly uninitialized objects.
 class ObjectClosureCareful : public ObjectClosure {
@@ -240,14 +283,26 @@
   virtual void do_code_blob(CodeBlob* cb) = 0;
 };
 
-
-class MarkingCodeBlobClosure : public CodeBlobClosure {
+// Applies an oop closure to all ref fields in code blobs
+// iterated over in an object iteration.
+class CodeBlobToOopClosure : public CodeBlobClosure {
+  OopClosure* _cl;
+  bool _fix_relocations;
+ protected:
+  void do_nmethod(nmethod* nm);
  public:
+  CodeBlobToOopClosure(OopClosure* cl, bool fix_relocations) : _cl(cl), _fix_relocations(fix_relocations) {}
+  virtual void do_code_blob(CodeBlob* cb);
+
+  const static bool FixRelocations = true;
+};
+
+class MarkingCodeBlobClosure : public CodeBlobToOopClosure {
+ public:
+  MarkingCodeBlobClosure(OopClosure* cl, bool fix_relocations) : CodeBlobToOopClosure(cl, fix_relocations) {}
   // Called for each code blob, but at most once per unique blob.
-  virtual void do_newly_marked_nmethod(nmethod* nm) = 0;
 
   virtual void do_code_blob(CodeBlob* cb);
-    // = { if (!nmethod(cb)->test_set_oops_do_mark())  do_newly_marked_nmethod(cb); }
 
   class MarkScope : public StackObj {
   protected:
@@ -260,23 +315,6 @@
   };
 };
 
-
-// Applies an oop closure to all ref fields in code blobs
-// iterated over in an object iteration.
-class CodeBlobToOopClosure: public MarkingCodeBlobClosure {
-  OopClosure* _cl;
-  bool _do_marking;
-public:
-  virtual void do_newly_marked_nmethod(nmethod* cb);
-    // = { cb->oops_do(_cl); }
-  virtual void do_code_blob(CodeBlob* cb);
-    // = { if (_do_marking)  super::do_code_blob(cb); else cb->oops_do(_cl); }
-  CodeBlobToOopClosure(OopClosure* cl, bool do_marking)
-    : _cl(cl), _do_marking(do_marking) {}
-};
-
-
-
 // MonitorClosure is used for iterating over monitors in the monitors cache
 
 class ObjectMonitor;
@@ -345,4 +383,16 @@
   }
 };
 
+
+// Helper defines for ExtendOopClosure
+
+#define if_do_metadata_checked(closure, nv_suffix)       \
+  /* Make sure the non-virtual and the virtual versions match. */     \
+  assert(closure->do_metadata##nv_suffix() == closure->do_metadata(), \
+      "Inconsistency in do_metadata");                                \
+  if (closure->do_metadata##nv_suffix())
+
+#define assert_should_ignore_metadata(closure, nv_suffix)                                  \
+  assert(!closure->do_metadata##nv_suffix(), "Code to handle metadata is not implemented")
+
 #endif // SHARE_VM_MEMORY_ITERATOR_HPP
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/iterator.inline.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/iterator.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_MEMORY_ITERATOR_INLINE_HPP
+#define SHARE_VM_MEMORY_ITERATOR_INLINE_HPP
+
+#include "classfile/classLoaderData.hpp"
+#include "memory/iterator.hpp"
+#include "oops/klass.hpp"
+#include "utilities/debug.hpp"
+
+inline void MetadataAwareOopClosure::do_class_loader_data(ClassLoaderData* cld) {
+  assert(_klass_closure._oop_closure == this, "Must be");
+
+  bool claim = true;  // Must claim the class loader data before processing.
+  cld->oops_do(_klass_closure._oop_closure, &_klass_closure, claim);
+}
+
+inline void MetadataAwareOopClosure::do_klass_nv(Klass* k) {
+  ClassLoaderData* cld = k->class_loader_data();
+  do_class_loader_data(cld);
+}
+
+inline void MetadataAwareOopClosure::do_klass(Klass* k)       { do_klass_nv(k); }
+
+#endif // SHARE_VM_MEMORY_ITERATOR_INLINE_HPP
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/metadataFactory.hpp
--- a/src/share/vm/memory/metadataFactory.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/memory/metadataFactory.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -25,6 +25,7 @@
 #ifndef SHARE_VM_MEMORY_METADATAFACTORY_HPP
 #define SHARE_VM_MEMORY_METADATAFACTORY_HPP
 
+#include "classfile/classLoaderData.hpp"
 #include "utilities/array.hpp"
 #include "utilities/exceptions.hpp"
 #include "utilities/globalDefinitions.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/metaspace.cpp
--- a/src/share/vm/memory/metaspace.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/memory/metaspace.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -42,7 +42,7 @@
 #include "runtime/init.hpp"
 #include "runtime/java.hpp"
 #include "runtime/mutex.hpp"
-#include "runtime/orderAccess.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "services/memTracker.hpp"
 #include "services/memoryService.hpp"
 #include "utilities/copy.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/referenceProcessor.cpp
--- a/src/share/vm/memory/referenceProcessor.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/memory/referenceProcessor.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -190,7 +190,8 @@
   OopClosure*                  keep_alive,
   VoidClosure*                 complete_gc,
   AbstractRefProcTaskExecutor* task_executor,
-  GCTimer*                     gc_timer) {
+  GCTimer*                     gc_timer,
+  GCId                         gc_id) {
   NOT_PRODUCT(verify_ok_to_handle_reflists());
 
   assert(!enqueuing_is_done(), "If here enqueuing should not be complete");
@@ -212,7 +213,7 @@
   // Soft references
   size_t soft_count = 0;
   {
-    GCTraceTime tt("SoftReference", trace_time, false, gc_timer);
+    GCTraceTime tt("SoftReference", trace_time, false, gc_timer, gc_id);
     soft_count =
       process_discovered_reflist(_discoveredSoftRefs, _current_soft_ref_policy, true,
                                  is_alive, keep_alive, complete_gc, task_executor);
@@ -223,7 +224,7 @@
   // Weak references
   size_t weak_count = 0;
   {
-    GCTraceTime tt("WeakReference", trace_time, false, gc_timer);
+    GCTraceTime tt("WeakReference", trace_time, false, gc_timer, gc_id);
     weak_count =
       process_discovered_reflist(_discoveredWeakRefs, NULL, true,
                                  is_alive, keep_alive, complete_gc, task_executor);
@@ -232,7 +233,7 @@
   // Final references
   size_t final_count = 0;
   {
-    GCTraceTime tt("FinalReference", trace_time, false, gc_timer);
+    GCTraceTime tt("FinalReference", trace_time, false, gc_timer, gc_id);
     final_count =
       process_discovered_reflist(_discoveredFinalRefs, NULL, false,
                                  is_alive, keep_alive, complete_gc, task_executor);
@@ -241,7 +242,7 @@
   // Phantom references
   size_t phantom_count = 0;
   {
-    GCTraceTime tt("PhantomReference", trace_time, false, gc_timer);
+    GCTraceTime tt("PhantomReference", trace_time, false, gc_timer, gc_id);
     phantom_count =
       process_discovered_reflist(_discoveredPhantomRefs, NULL, false,
                                  is_alive, keep_alive, complete_gc, task_executor);
@@ -253,7 +254,7 @@
   // thus use JNI weak references to circumvent the phantom references and
   // resurrect a "post-mortem" object.
   {
-    GCTraceTime tt("JNI Weak Reference", trace_time, false, gc_timer);
+    GCTraceTime tt("JNI Weak Reference", trace_time, false, gc_timer, gc_id);
     if (task_executor != NULL) {
       task_executor->set_single_threaded_mode();
     }
@@ -1251,14 +1252,15 @@
   OopClosure* keep_alive,
   VoidClosure* complete_gc,
   YieldClosure* yield,
-  GCTimer* gc_timer) {
+  GCTimer* gc_timer,
+  GCId     gc_id) {
 
   NOT_PRODUCT(verify_ok_to_handle_reflists());
 
   // Soft references
   {
     GCTraceTime tt("Preclean SoftReferences", PrintGCDetails && PrintReferenceGC,
-              false, gc_timer);
+              false, gc_timer, gc_id);
     for (uint i = 0; i < _max_num_q; i++) {
       if (yield->should_return()) {
         return;
@@ -1271,7 +1273,7 @@
   // Weak references
   {
     GCTraceTime tt("Preclean WeakReferences", PrintGCDetails && PrintReferenceGC,
-              false, gc_timer);
+              false, gc_timer, gc_id);
     for (uint i = 0; i < _max_num_q; i++) {
       if (yield->should_return()) {
         return;
@@ -1284,7 +1286,7 @@
   // Final references
   {
     GCTraceTime tt("Preclean FinalReferences", PrintGCDetails && PrintReferenceGC,
-              false, gc_timer);
+              false, gc_timer, gc_id);
     for (uint i = 0; i < _max_num_q; i++) {
       if (yield->should_return()) {
         return;
@@ -1297,7 +1299,7 @@
   // Phantom references
   {
     GCTraceTime tt("Preclean PhantomReferences", PrintGCDetails && PrintReferenceGC,
-              false, gc_timer);
+              false, gc_timer, gc_id);
     for (uint i = 0; i < _max_num_q; i++) {
       if (yield->should_return()) {
         return;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/referenceProcessor.hpp
--- a/src/share/vm/memory/referenceProcessor.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/memory/referenceProcessor.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -25,6 +25,7 @@
 #ifndef SHARE_VM_MEMORY_REFERENCEPROCESSOR_HPP
 #define SHARE_VM_MEMORY_REFERENCEPROCESSOR_HPP
 
+#include "gc_implementation/shared/gcTrace.hpp"
 #include "memory/referencePolicy.hpp"
 #include "memory/referenceProcessorStats.hpp"
 #include "memory/referenceType.hpp"
@@ -349,7 +350,8 @@
                                       OopClosure*        keep_alive,
                                       VoidClosure*       complete_gc,
                                       YieldClosure*      yield,
-                                      GCTimer*           gc_timer);
+                                      GCTimer*           gc_timer,
+                                      GCId               gc_id);
 
   // Delete entries in the discovered lists that have
   // either a null referent or are not active. Such
@@ -480,7 +482,8 @@
                                 OopClosure*                  keep_alive,
                                 VoidClosure*                 complete_gc,
                                 AbstractRefProcTaskExecutor* task_executor,
-                                GCTimer *gc_timer);
+                                GCTimer *gc_timer,
+                                GCId    gc_id);
 
   // Enqueue references at end of GC (called by the garbage collector)
   bool enqueue_discovered_references(AbstractRefProcTaskExecutor* task_executor = NULL);
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/sharedHeap.cpp
--- a/src/share/vm/memory/sharedHeap.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/memory/sharedHeap.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -29,6 +29,7 @@
 #include "gc_interface/collectedHeap.inline.hpp"
 #include "memory/sharedHeap.hpp"
 #include "oops/oop.inline.hpp"
+#include "runtime/atomic.inline.hpp"
 #include "runtime/fprofiler.hpp"
 #include "runtime/java.hpp"
 #include "services/management.hpp"
@@ -39,8 +40,8 @@
 
 SharedHeap* SharedHeap::_sh;
 
-// The set of potentially parallel tasks in strong root scanning.
-enum SH_process_strong_roots_tasks {
+// The set of potentially parallel tasks in root scanning.
+enum SH_process_roots_tasks {
   SH_PS_Universe_oops_do,
   SH_PS_JNIHandles_oops_do,
   SH_PS_ObjectSynchronizer_oops_do,
@@ -58,6 +59,7 @@
   CollectedHeap(),
   _collector_policy(policy_),
   _rem_set(NULL),
+  _strong_roots_scope(NULL),
   _strong_roots_parity(0),
   _process_strong_tasks(new SubTasksDone(SH_PS_NumElements)),
   _workers(NULL)
@@ -114,6 +116,19 @@
 static AssertNonScavengableClosure assert_is_non_scavengable_closure;
 #endif
 
+SharedHeap::StrongRootsScope* SharedHeap::active_strong_roots_scope() const {
+  return _strong_roots_scope;
+}
+void SharedHeap::register_strong_roots_scope(SharedHeap::StrongRootsScope* scope) {
+  assert(_strong_roots_scope == NULL, "Should only have one StrongRootsScope active");
+  assert(scope != NULL, "Illegal argument");
+  _strong_roots_scope = scope;
+}
+void SharedHeap::unregister_strong_roots_scope(SharedHeap::StrongRootsScope* scope) {
+  assert(_strong_roots_scope == scope, "Wrong scope unregistered");
+  _strong_roots_scope = NULL;
+}
+
 void SharedHeap::change_strong_roots_parity() {
   // Also set the new collection parity.
   assert(_strong_roots_parity >= 0 && _strong_roots_parity <= 2,
@@ -124,122 +139,173 @@
          "Not in range.");
 }
 
-SharedHeap::StrongRootsScope::StrongRootsScope(SharedHeap* outer, bool activate)
-  : MarkScope(activate)
+SharedHeap::StrongRootsScope::StrongRootsScope(SharedHeap* heap, bool activate)
+  : MarkScope(activate), _sh(heap), _n_workers_done_with_threads(0)
 {
   if (_active) {
-    outer->change_strong_roots_parity();
+    _sh->register_strong_roots_scope(this);
+    _sh->change_strong_roots_parity();
     // Zero the claimed high water mark in the StringTable
     StringTable::clear_parallel_claimed_index();
   }
 }
 
 SharedHeap::StrongRootsScope::~StrongRootsScope() {
-  // nothing particular
+  if (_active) {
+    _sh->unregister_strong_roots_scope(this);
+  }
+}
+
+Monitor* SharedHeap::StrongRootsScope::_lock = new Monitor(Mutex::leaf, "StrongRootsScope lock", false);
+
+void SharedHeap::StrongRootsScope::mark_worker_done_with_threads(uint n_workers) {
+  // The Thread work barrier is only needed by G1 Class Unloading.
+  // No need to use the barrier if this is single-threaded code.
+  if (UseG1GC && ClassUnloadingWithConcurrentMark && n_workers > 0) {
+    uint new_value = (uint)Atomic::add(1, &_n_workers_done_with_threads);
+    if (new_value == n_workers) {
+      // This thread is last. Notify the others.
+      MonitorLockerEx ml(_lock, Mutex::_no_safepoint_check_flag);
+      _lock->notify_all();
+    }
+  }
 }
 
-void SharedHeap::process_strong_roots(bool activate_scope,
-                                      bool is_scavenging,
-                                      ScanningOption so,
-                                      OopClosure* roots,
-                                      CodeBlobClosure* code_roots,
-                                      KlassClosure* klass_closure) {
+void SharedHeap::StrongRootsScope::wait_until_all_workers_done_with_threads(uint n_workers) {
+  assert(UseG1GC,                          "Currently only used by G1");
+  assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading");
+
+  // No need to use the barrier if this is single-threaded code.
+  if (n_workers > 0 && (uint)_n_workers_done_with_threads != n_workers) {
+    MonitorLockerEx ml(_lock, Mutex::_no_safepoint_check_flag);
+    while ((uint)_n_workers_done_with_threads != n_workers) {
+      _lock->wait(Mutex::_no_safepoint_check_flag, 0, false);
+    }
+  }
+}
+
+void SharedHeap::process_roots(bool activate_scope,
+                               ScanningOption so,
+                               OopClosure* strong_roots,
+                               OopClosure* weak_roots,
+                               CLDClosure* strong_cld_closure,
+                               CLDClosure* weak_cld_closure,
+                               CodeBlobClosure* code_roots) {
   StrongRootsScope srs(this, activate_scope);
 
-  // General strong roots.
+  // General roots.
   assert(_strong_roots_parity != 0, "must have called prologue code");
+  assert(code_roots != NULL, "code root closure should always be set");
   // _n_termination for _process_strong_tasks should be set up stream
   // in a method not running in a GC worker.  Otherwise the GC worker
   // could be trying to change the termination condition while the task
   // is executing in another GC worker.
+
+  // Iterating over the CLDG and the Threads are done early to allow G1 to
+  // first process the strong CLDs and nmethods and then, after a barrier,
+  // let the thread process the weak CLDs and nmethods.
+
+  if (!_process_strong_tasks->is_task_claimed(SH_PS_ClassLoaderDataGraph_oops_do)) {
+    ClassLoaderDataGraph::roots_cld_do(strong_cld_closure, weak_cld_closure);
+  }
+
+  // Some CLDs contained in the thread frames should be considered strong.
+  // Don't process them if they will be processed during the ClassLoaderDataGraph phase.
+  CLDClosure* roots_from_clds_p = (strong_cld_closure != weak_cld_closure) ? strong_cld_closure : NULL;
+  // Only process code roots from thread stacks if we aren't visiting the entire CodeCache anyway
+  CodeBlobClosure* roots_from_code_p = (so & SO_AllCodeCache) ? NULL : code_roots;
+
+  Threads::possibly_parallel_oops_do(strong_roots, roots_from_clds_p, roots_from_code_p);
+
+  // This is the point where this worker thread will not find more strong CLDs/nmethods.
+  // Report this so G1 can synchronize the strong and weak CLDs/nmethods processing.
+  active_strong_roots_scope()->mark_worker_done_with_threads(n_par_threads());
+
   if (!_process_strong_tasks->is_task_claimed(SH_PS_Universe_oops_do)) {
-    Universe::oops_do(roots);
+    Universe::oops_do(strong_roots);
   }
   // Global (strong) JNI handles
   if (!_process_strong_tasks->is_task_claimed(SH_PS_JNIHandles_oops_do))
-    JNIHandles::oops_do(roots);
-
-  // All threads execute this; the individual threads are task groups.
-  CLDToOopClosure roots_from_clds(roots);
-  CLDToOopClosure* roots_from_clds_p = (is_scavenging ? NULL : &roots_from_clds);
-  if (CollectedHeap::use_parallel_gc_threads()) {
-    Threads::possibly_parallel_oops_do(roots, roots_from_clds_p, code_roots);
-  } else {
-    Threads::oops_do(roots, roots_from_clds_p, code_roots);
-  }
+    JNIHandles::oops_do(strong_roots);
 
   if (!_process_strong_tasks-> is_task_claimed(SH_PS_ObjectSynchronizer_oops_do))
-    ObjectSynchronizer::oops_do(roots);
+    ObjectSynchronizer::oops_do(strong_roots);
   if (!_process_strong_tasks->is_task_claimed(SH_PS_FlatProfiler_oops_do))
-    FlatProfiler::oops_do(roots);
+    FlatProfiler::oops_do(strong_roots);
   if (!_process_strong_tasks->is_task_claimed(SH_PS_Management_oops_do))
-    Management::oops_do(roots);
+    Management::oops_do(strong_roots);
   if (!_process_strong_tasks->is_task_claimed(SH_PS_jvmti_oops_do))
-    JvmtiExport::oops_do(roots);
+    JvmtiExport::oops_do(strong_roots);
 
   if (!_process_strong_tasks->is_task_claimed(SH_PS_SystemDictionary_oops_do)) {
-    if (so & SO_AllClasses) {
-      SystemDictionary::oops_do(roots);
-    } else if (so & SO_SystemClasses) {
-      SystemDictionary::always_strong_oops_do(roots);
-    } else {
-      fatal("We should always have selected either SO_AllClasses or SO_SystemClasses");
-    }
-  }
-
-  if (!_process_strong_tasks->is_task_claimed(SH_PS_ClassLoaderDataGraph_oops_do)) {
-    if (so & SO_AllClasses) {
-      ClassLoaderDataGraph::oops_do(roots, klass_closure, !is_scavenging);
-    } else if (so & SO_SystemClasses) {
-      ClassLoaderDataGraph::always_strong_oops_do(roots, klass_closure, !is_scavenging);
-    }
+    SystemDictionary::roots_oops_do(strong_roots, weak_roots);
   }
 
   // All threads execute the following. A specific chunk of buckets
   // from the StringTable are the individual tasks.
-  if (so & SO_Strings) {
+  if (weak_roots != NULL) {
     if (CollectedHeap::use_parallel_gc_threads()) {
-      StringTable::possibly_parallel_oops_do(roots);
+      StringTable::possibly_parallel_oops_do(weak_roots);
     } else {
-      StringTable::oops_do(roots);
+      StringTable::oops_do(weak_roots);
     }
   }
 
   if (!_process_strong_tasks->is_task_claimed(SH_PS_CodeCache_oops_do)) {
-    if (so & SO_CodeCache) {
+    if (so & SO_ScavengeCodeCache) {
       assert(code_roots != NULL, "must supply closure for code cache");
 
-      if (is_scavenging) {
-        // We only visit parts of the CodeCache when scavenging.
-        CodeCache::scavenge_root_nmethods_do(code_roots);
-      } else {
-        // CMSCollector uses this to do intermediate-strength collections.
-        // We scan the entire code cache, since CodeCache::do_unloading is not called.
-        CodeCache::blobs_do(code_roots);
-      }
+      // We only visit parts of the CodeCache when scavenging.
+      CodeCache::scavenge_root_nmethods_do(code_roots);
+    }
+    if (so & SO_AllCodeCache) {
+      assert(code_roots != NULL, "must supply closure for code cache");
+
+      // CMSCollector uses this to do intermediate-strength collections.
+      // We scan the entire code cache, since CodeCache::do_unloading is not called.
+      CodeCache::blobs_do(code_roots);
     }
     // Verify that the code cache contents are not subject to
     // movement by a scavenging collection.
-    DEBUG_ONLY(CodeBlobToOopClosure assert_code_is_non_scavengable(&assert_is_non_scavengable_closure, /*do_marking=*/ false));
+    DEBUG_ONLY(CodeBlobToOopClosure assert_code_is_non_scavengable(&assert_is_non_scavengable_closure, !CodeBlobToOopClosure::FixRelocations));
     DEBUG_ONLY(CodeCache::asserted_non_scavengable_nmethods_do(&assert_code_is_non_scavengable));
   }
 
   _process_strong_tasks->all_tasks_completed();
 }
 
+void SharedHeap::process_all_roots(bool activate_scope,
+                                   ScanningOption so,
+                                   OopClosure* roots,
+                                   CLDClosure* cld_closure,
+                                   CodeBlobClosure* code_closure) {
+  process_roots(activate_scope, so,
+                roots, roots,
+                cld_closure, cld_closure,
+                code_closure);
+}
+
+void SharedHeap::process_strong_roots(bool activate_scope,
+                                      ScanningOption so,
+                                      OopClosure* roots,
+                                      CLDClosure* cld_closure,
+                                      CodeBlobClosure* code_closure) {
+  process_roots(activate_scope, so,
+                roots, NULL,
+                cld_closure, NULL,
+                code_closure);
+}
+
+
 class AlwaysTrueClosure: public BoolObjectClosure {
 public:
   bool do_object_b(oop p) { return true; }
 };
 static AlwaysTrueClosure always_true;
 
-void SharedHeap::process_weak_roots(OopClosure* root_closure,
-                                    CodeBlobClosure* code_roots) {
+void SharedHeap::process_weak_roots(OopClosure* root_closure) {
   // Global (weak) JNI handles
   JNIHandles::weak_oops_do(&always_true, root_closure);
-
-  CodeCache::blobs_do(code_roots);
-  StringTable::oops_do(root_closure);
 }
 
 void SharedHeap::set_barrier_set(BarrierSet* bs) {
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/sharedHeap.hpp
--- a/src/share/vm/memory/sharedHeap.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/memory/sharedHeap.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -69,14 +69,10 @@
 //    number of active GC workers.  CompactibleFreeListSpace and Space
 //    have SequentialSubTasksDone's.
 // Example of using SubTasksDone and SequentialSubTasksDone
-// G1CollectedHeap::g1_process_strong_roots() calls
-//  process_strong_roots(false, // no scoping; this is parallel code
-//                       is_scavenging, so,
-//                       &buf_scan_non_heap_roots,
-//                       &eager_scan_code_roots);
-//  which delegates to SharedHeap::process_strong_roots() and uses
+// G1CollectedHeap::g1_process_roots()
+//  to SharedHeap::process_roots() and uses
 //  SubTasksDone* _process_strong_tasks to claim tasks.
-//  process_strong_roots() calls
+//  process_roots() calls
 //      rem_set()->younger_refs_iterate()
 //  to scan the card table and which eventually calls down into
 //  CardTableModRefBS::par_non_clean_card_iterate_work().  This method
@@ -163,9 +159,6 @@
   // Iteration functions.
   void oop_iterate(ExtendedOopClosure* cl) = 0;
 
-  // Same as above, restricted to a memory region.
-  virtual void oop_iterate(MemRegion mr, ExtendedOopClosure* cl) = 0;
-
   // Iterate over all spaces in use in the heap, in an undefined order.
   virtual void space_iterate(SpaceClosure* cl) = 0;
 
@@ -185,12 +178,12 @@
   // task.  (This also means that a parallel thread may only call
   // process_strong_roots once.)
   //
-  // For calls to process_strong_roots by sequential code, the parity is
+  // For calls to process_roots by sequential code, the parity is
   // updated automatically.
   //
   // The idea is that objects representing fine-grained tasks, such as
   // threads, will contain a "parity" field.  A task will is claimed in the
-  // current "process_strong_roots" call only if its parity field is the
+  // current "process_roots" call only if its parity field is the
   // same as the "strong_roots_parity"; task claiming is accomplished by
   // updating the parity field to the strong_roots_parity with a CAS.
   //
@@ -201,27 +194,45 @@
   //   c) to never return a distinguished value (zero) with which such
   //      task-claiming variables may be initialized, to indicate "never
   //      claimed".
- private:
-  void change_strong_roots_parity();
  public:
   int strong_roots_parity() { return _strong_roots_parity; }
 
-  // Call these in sequential code around process_strong_roots.
+  // Call these in sequential code around process_roots.
   // strong_roots_prologue calls change_strong_roots_parity, if
   // parallel tasks are enabled.
   class StrongRootsScope : public MarkingCodeBlobClosure::MarkScope {
-  public:
-    StrongRootsScope(SharedHeap* outer, bool activate = true);
+    // Used to implement the Thread work barrier.
+    static Monitor* _lock;
+
+    SharedHeap*   _sh;
+    volatile jint _n_workers_done_with_threads;
+
+   public:
+    StrongRootsScope(SharedHeap* heap, bool activate = true);
     ~StrongRootsScope();
+
+    // Mark that this thread is done with the Threads work.
+    void mark_worker_done_with_threads(uint n_workers);
+    // Wait until all n_workers are done with the Threads work.
+    void wait_until_all_workers_done_with_threads(uint n_workers);
   };
   friend class StrongRootsScope;
 
+  // The current active StrongRootScope
+  StrongRootsScope* _strong_roots_scope;
+
+  StrongRootsScope* active_strong_roots_scope() const;
+
+ private:
+  void register_strong_roots_scope(StrongRootsScope* scope);
+  void unregister_strong_roots_scope(StrongRootsScope* scope);
+  void change_strong_roots_parity();
+
+ public:
   enum ScanningOption {
-    SO_None                = 0x0,
-    SO_AllClasses          = 0x1,
-    SO_SystemClasses       = 0x2,
-    SO_Strings             = 0x4,
-    SO_CodeCache           = 0x8
+    SO_None                =  0x0,
+    SO_AllCodeCache        =  0x8,
+    SO_ScavengeCodeCache   = 0x10
   };
 
   FlexibleWorkGang* workers() const { return _workers; }
@@ -229,22 +240,29 @@
   // Invoke the "do_oop" method the closure "roots" on all root locations.
   // The "so" argument determines which roots the closure is applied to:
   // "SO_None" does none;
-  // "SO_AllClasses" applies the closure to all entries in the SystemDictionary;
-  // "SO_SystemClasses" to all the "system" classes and loaders;
-  // "SO_Strings" applies the closure to all entries in StringTable;
-  // "SO_CodeCache" applies the closure to all elements of the CodeCache.
+  // "SO_AllCodeCache" applies the closure to all elements of the CodeCache.
+  // "SO_ScavengeCodeCache" applies the closure to elements on the scavenge root list in the CodeCache.
+  void process_roots(bool activate_scope,
+                     ScanningOption so,
+                     OopClosure* strong_roots,
+                     OopClosure* weak_roots,
+                     CLDClosure* strong_cld_closure,
+                     CLDClosure* weak_cld_closure,
+                     CodeBlobClosure* code_roots);
+  void process_all_roots(bool activate_scope,
+                         ScanningOption so,
+                         OopClosure* roots,
+                         CLDClosure* cld_closure,
+                         CodeBlobClosure* code_roots);
   void process_strong_roots(bool activate_scope,
-                            bool is_scavenging,
                             ScanningOption so,
                             OopClosure* roots,
-                            CodeBlobClosure* code_roots,
-                            KlassClosure* klass_closure);
+                            CLDClosure* cld_closure,
+                            CodeBlobClosure* code_roots);
 
-  // Apply "blk" to all the weak roots of the system.  These include
-  // JNI weak roots, the code cache, system dictionary, symbol table,
-  // string table.
-  void process_weak_roots(OopClosure* root_closure,
-                          CodeBlobClosure* code_roots);
+
+  // Apply "root_closure" to the JNI weak roots..
+  void process_weak_roots(OopClosure* root_closure);
 
   // The functions below are helper functions that a subclass of
   // "SharedHeap" can use in the implementation of its virtual
@@ -257,7 +275,7 @@
   virtual void gc_epilogue(bool full) = 0;
 
   // Sets the number of parallel threads that will be doing tasks
-  // (such as process strong roots) subsequently.
+  // (such as process roots) subsequently.
   virtual void set_par_threads(uint t);
 
   int n_termination();
@@ -274,4 +292,8 @@
                              size_t capacity);
 };
 
+inline SharedHeap::ScanningOption operator|(SharedHeap::ScanningOption so0, SharedHeap::ScanningOption so1) {
+  return static_cast<SharedHeap::ScanningOption>(static_cast<int>(so0) | static_cast<int>(so1));
+}
+
 #endif // SHARE_VM_MEMORY_SHAREDHEAP_HPP
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/space.cpp
--- a/src/share/vm/memory/space.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/memory/space.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -28,6 +28,7 @@
 #include "gc_implementation/shared/liveRange.hpp"
 #include "gc_implementation/shared/markSweep.hpp"
 #include "gc_implementation/shared/spaceDecorator.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
 #include "memory/blockOffsetTable.inline.hpp"
 #include "memory/defNewGeneration.hpp"
 #include "memory/genCollectedHeap.hpp"
@@ -37,14 +38,13 @@
 #include "oops/oop.inline.hpp"
 #include "oops/oop.inline2.hpp"
 #include "runtime/java.hpp"
+#include "runtime/prefetch.inline.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/safepoint.hpp"
 #include "utilities/copy.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/macros.hpp"
 
-void SpaceMemRegionOopsIterClosure::do_oop(oop* p)       { SpaceMemRegionOopsIterClosure::do_oop_work(p); }
-void SpaceMemRegionOopsIterClosure::do_oop(narrowOop* p) { SpaceMemRegionOopsIterClosure::do_oop_work(p); }
-
 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
 
 HeapWord* DirtyCardToOopClosure::get_actual_top(HeapWord* top,
@@ -307,10 +307,6 @@
   CompactibleSpace::clear(mangle_space);
 }
 
-bool ContiguousSpace::is_in(const void* p) const {
-  return _bottom <= p && p < _top;
-}
-
 bool ContiguousSpace::is_free_block(const HeapWord* p) const {
   return p >= _top;
 }
@@ -552,115 +548,11 @@
   object_iterate(&blk2);
 }
 
-HeapWord* Space::object_iterate_careful(ObjectClosureCareful* cl) {
-  guarantee(false, "NYI");
-  return bottom();
-}
-
-HeapWord* Space::object_iterate_careful_m(MemRegion mr,
-                                          ObjectClosureCareful* cl) {
-  guarantee(false, "NYI");
-  return bottom();
-}
-
-
-void Space::object_iterate_mem(MemRegion mr, UpwardsObjectClosure* cl) {
-  assert(!mr.is_empty(), "Should be non-empty");
-  // We use MemRegion(bottom(), end()) rather than used_region() below
-  // because the two are not necessarily equal for some kinds of
-  // spaces, in particular, certain kinds of free list spaces.
-  // We could use the more complicated but more precise:
-  // MemRegion(used_region().start(), round_to(used_region().end(), CardSize))
-  // but the slight imprecision seems acceptable in the assertion check.
-  assert(MemRegion(bottom(), end()).contains(mr),
-         "Should be within used space");
-  HeapWord* prev = cl->previous();   // max address from last time
-  if (prev >= mr.end()) { // nothing to do
-    return;
-  }
-  // This assert will not work when we go from cms space to perm
-  // space, and use same closure. Easy fix deferred for later. XXX YSR
-  // assert(prev == NULL || contains(prev), "Should be within space");
-
-  bool last_was_obj_array = false;
-  HeapWord *blk_start_addr, *region_start_addr;
-  if (prev > mr.start()) {
-    region_start_addr = prev;
-    blk_start_addr    = prev;
-    // The previous invocation may have pushed "prev" beyond the
-    // last allocated block yet there may be still be blocks
-    // in this region due to a particular coalescing policy.
-    // Relax the assertion so that the case where the unallocated
-    // block is maintained and "prev" is beyond the unallocated
-    // block does not cause the assertion to fire.
-    assert((BlockOffsetArrayUseUnallocatedBlock &&
-            (!is_in(prev))) ||
-           (blk_start_addr == block_start(region_start_addr)), "invariant");
-  } else {
-    region_start_addr = mr.start();
-    blk_start_addr    = block_start(region_start_addr);
-  }
-  HeapWord* region_end_addr = mr.end();
-  MemRegion derived_mr(region_start_addr, region_end_addr);
-  while (blk_start_addr < region_end_addr) {
-    const size_t size = block_size(blk_start_addr);
-    if (block_is_obj(blk_start_addr)) {
-      last_was_obj_array = cl->do_object_bm(oop(blk_start_addr), derived_mr);
-    } else {
-      last_was_obj_array = false;
-    }
-    blk_start_addr += size;
-  }
-  if (!last_was_obj_array) {
-    assert((bottom() <= blk_start_addr) && (blk_start_addr <= end()),
-           "Should be within (closed) used space");
-    assert(blk_start_addr > prev, "Invariant");
-    cl->set_previous(blk_start_addr); // min address for next time
-  }
-}
-
 bool Space::obj_is_alive(const HeapWord* p) const {
   assert (block_is_obj(p), "The address should point to an object");
   return true;
 }
 
-void ContiguousSpace::object_iterate_mem(MemRegion mr, UpwardsObjectClosure* cl) {
-  assert(!mr.is_empty(), "Should be non-empty");
-  assert(used_region().contains(mr), "Should be within used space");
-  HeapWord* prev = cl->previous();   // max address from last time
-  if (prev >= mr.end()) { // nothing to do
-    return;
-  }
-  // See comment above (in more general method above) in case you
-  // happen to use this method.
-  assert(prev == NULL || is_in_reserved(prev), "Should be within space");
-
-  bool last_was_obj_array = false;
-  HeapWord *obj_start_addr, *region_start_addr;
-  if (prev > mr.start()) {
-    region_start_addr = prev;
-    obj_start_addr    = prev;
-    assert(obj_start_addr == block_start(region_start_addr), "invariant");
-  } else {
-    region_start_addr = mr.start();
-    obj_start_addr    = block_start(region_start_addr);
-  }
-  HeapWord* region_end_addr = mr.end();
-  MemRegion derived_mr(region_start_addr, region_end_addr);
-  while (obj_start_addr < region_end_addr) {
-    oop obj = oop(obj_start_addr);
-    const size_t size = obj->size();
-    last_was_obj_array = cl->do_object_bm(obj, derived_mr);
-    obj_start_addr += size;
-  }
-  if (!last_was_obj_array) {
-    assert((bottom() <= obj_start_addr)  && (obj_start_addr <= end()),
-           "Should be within (closed) used space");
-    assert(obj_start_addr > prev, "Invariant");
-    cl->set_previous(obj_start_addr); // min address for next time
-  }
-}
-
 #if INCLUDE_ALL_GCS
 #define ContigSpace_PAR_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)         \
                                                                             \
@@ -688,43 +580,6 @@
   }
 }
 
-void ContiguousSpace::oop_iterate(MemRegion mr, ExtendedOopClosure* blk) {
-  if (is_empty()) {
-    return;
-  }
-  MemRegion cur = MemRegion(bottom(), top());
-  mr = mr.intersection(cur);
-  if (mr.is_empty()) {
-    return;
-  }
-  if (mr.equals(cur)) {
-    oop_iterate(blk);
-    return;
-  }
-  assert(mr.end() <= top(), "just took an intersection above");
-  HeapWord* obj_addr = block_start(mr.start());
-  HeapWord* t = mr.end();
-
-  // Handle first object specially.
-  oop obj = oop(obj_addr);
-  SpaceMemRegionOopsIterClosure smr_blk(blk, mr);
-  obj_addr += obj->oop_iterate(&smr_blk);
-  while (obj_addr < t) {
-    oop obj = oop(obj_addr);
-    assert(obj->is_oop(), "expected an oop");
-    obj_addr += obj->size();
-    // If "obj_addr" is not greater than top, then the
-    // entire object "obj" is within the region.
-    if (obj_addr <= t) {
-      obj->oop_iterate(blk);
-    } else {
-      // "obj" extends beyond end of region
-      obj->oop_iterate(&smr_blk);
-      break;
-    }
-  };
-}
-
 void ContiguousSpace::object_iterate(ObjectClosure* blk) {
   if (is_empty()) return;
   WaterMark bm = bottom_mark();
@@ -830,14 +685,8 @@
 // This version requires locking.
 inline HeapWord* ContiguousSpace::allocate_impl(size_t size,
                                                 HeapWord* const end_value) {
-  // In G1 there are places where a GC worker can allocates into a
-  // region using this serial allocation code without being prone to a
-  // race with other GC workers (we ensure that no other GC worker can
-  // access the same region at the same time). So the assert below is
-  // too strong in the case of G1.
   assert(Heap_lock->owned_by_self() ||
-         (SafepointSynchronize::is_at_safepoint() &&
-                               (Thread::current()->is_VM_thread() || UseG1GC)),
+         (SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread()),
          "not locked");
   HeapWord* obj = top();
   if (pointer_delta(end_value, obj) >= size) {
@@ -871,6 +720,27 @@
   } while (true);
 }
 
+HeapWord* ContiguousSpace::allocate_aligned(size_t size) {
+  assert(Heap_lock->owned_by_self() || (SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread()), "not locked");
+  HeapWord* end_value = end();
+
+  HeapWord* obj = CollectedHeap::align_allocation_or_fail(top(), end_value, SurvivorAlignmentInBytes);
+  if (obj == NULL) {
+    return NULL;
+  }
+
+  if (pointer_delta(end_value, obj) >= size) {
+    HeapWord* new_top = obj + size;
+    set_top(new_top);
+    assert(is_ptr_aligned(obj, SurvivorAlignmentInBytes) && is_aligned(new_top),
+      "checking alignment");
+    return obj;
+  } else {
+    set_top(obj);
+    return NULL;
+  }
+}
+
 // Requires locking.
 HeapWord* ContiguousSpace::allocate(size_t size) {
   return allocate_impl(size, end());
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/space.hpp
--- a/src/share/vm/memory/space.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/memory/space.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -33,24 +33,8 @@
 #include "memory/watermark.hpp"
 #include "oops/markOop.hpp"
 #include "runtime/mutexLocker.hpp"
-#include "runtime/prefetch.hpp"
 #include "utilities/macros.hpp"
 #include "utilities/workgroup.hpp"
-#ifdef TARGET_OS_FAMILY_linux
-# include "os_linux.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-# include "os_solaris.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "os_windows.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_aix
-# include "os_aix.inline.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "os_bsd.inline.hpp"
-#endif
 
 // A space is an abstraction for the "storage units" backing
 // up the generation abstraction. It includes specific
@@ -81,31 +65,6 @@
 class CardTableRS;
 class DirtyCardToOopClosure;
 
-// An oop closure that is circumscribed by a filtering memory region.
-class SpaceMemRegionOopsIterClosure: public ExtendedOopClosure {
- private:
-  ExtendedOopClosure* _cl;
-  MemRegion   _mr;
- protected:
-  template <class T> void do_oop_work(T* p) {
-    if (_mr.contains(p)) {
-      _cl->do_oop(p);
-    }
-  }
- public:
-  SpaceMemRegionOopsIterClosure(ExtendedOopClosure* cl, MemRegion mr):
-    _cl(cl), _mr(mr) {}
-  virtual void do_oop(oop* p);
-  virtual void do_oop(narrowOop* p);
-  virtual bool do_metadata() {
-    // _cl is of type ExtendedOopClosure instead of OopClosure, so that we can check this.
-    assert(!_cl->do_metadata(), "I've checked all call paths, this shouldn't happen.");
-    return false;
-  }
-  virtual void do_klass(Klass* k)                         { ShouldNotReachHere(); }
-  virtual void do_class_loader_data(ClassLoaderData* cld) { ShouldNotReachHere(); }
-};
-
 // A Space describes a heap area. Class Space is an abstract
 // base class.
 //
@@ -145,6 +104,12 @@
 
   void set_saved_mark_word(HeapWord* p) { _saved_mark_word = p; }
 
+  // Returns true if this object has been allocated since a
+  // generation's "save_marks" call.
+  virtual bool obj_allocated_since_save_marks(const oop obj) const {
+    return (HeapWord*)obj >= saved_mark_word();
+  }
+
   MemRegionClosure* preconsumptionDirtyCardClosure() const {
     return _preconsumptionDirtyCardClosure;
   }
@@ -152,9 +117,9 @@
     _preconsumptionDirtyCardClosure = cl;
   }
 
-  // Returns a subregion of the space containing all the objects in
+  // Returns a subregion of the space containing only the allocated objects in
   // the space.
-  virtual MemRegion used_region() const { return MemRegion(bottom(), end()); }
+  virtual MemRegion used_region() const = 0;
 
   // Returns a region that is guaranteed to contain (at least) all objects
   // allocated at the time of the last call to "save_marks".  If the space
@@ -164,7 +129,7 @@
   // saved mark.  Otherwise, the "obj_allocated_since_save_marks" method of
   // the space must distiguish between objects in the region allocated before
   // and after the call to save marks.
-  virtual MemRegion used_region_at_save_marks() const {
+  MemRegion used_region_at_save_marks() const {
     return MemRegion(bottom(), saved_mark_word());
   }
 
@@ -197,7 +162,9 @@
   // expensive operation. To prevent performance problems
   // on account of its inadvertent use in product jvm's,
   // we restrict its use to assertion checks only.
-  virtual bool is_in(const void* p) const = 0;
+  bool is_in(const void* p) const {
+    return used_region().contains(p);
+  }
 
   // Returns true iff the given reserved memory of the space contains the
   // given address.
@@ -221,11 +188,6 @@
   // applications of the closure are not included in the iteration.
   virtual void oop_iterate(ExtendedOopClosure* cl);
 
-  // Same as above, restricted to the intersection of a memory region and
-  // the space.  Fields in objects allocated by applications of the closure
-  // are not included in the iteration.
-  virtual void oop_iterate(MemRegion mr, ExtendedOopClosure* cl) = 0;
-
   // Iterate over all objects in the space, calling "cl.do_object" on
   // each.  Objects allocated by applications of the closure are not
   // included in the iteration.
@@ -234,24 +196,6 @@
   // objects whose internal references point to objects in the space.
   virtual void safe_object_iterate(ObjectClosure* blk) = 0;
 
-  // Iterate over all objects that intersect with mr, calling "cl->do_object"
-  // on each.  There is an exception to this: if this closure has already
-  // been invoked on an object, it may skip such objects in some cases.  This is
-  // Most likely to happen in an "upwards" (ascending address) iteration of
-  // MemRegions.
-  virtual void object_iterate_mem(MemRegion mr, UpwardsObjectClosure* cl);
-
-  // Iterate over as many initialized objects in the space as possible,
-  // calling "cl.do_object_careful" on each. Return NULL if all objects
-  // in the space (at the start of the iteration) were iterated over.
-  // Return an address indicating the extent of the iteration in the
-  // event that the iteration had to return because of finding an
-  // uninitialized object in the space, or if the closure "cl"
-  // signalled early termination.
-  virtual HeapWord* object_iterate_careful(ObjectClosureCareful* cl);
-  virtual HeapWord* object_iterate_careful_m(MemRegion mr,
-                                             ObjectClosureCareful* cl);
-
   // Create and return a new dirty card to oop closure. Can be
   // overriden to return the appropriate type of closure
   // depending on the type of space in which the closure will
@@ -292,10 +236,6 @@
   // Allocation (return NULL if full).  Enforces mutual exclusion internally.
   virtual HeapWord* par_allocate(size_t word_size) = 0;
 
-  // Returns true if this object has been allocated since a
-  // generation's "save_marks" call.
-  virtual bool obj_allocated_since_save_marks(const oop obj) const = 0;
-
   // Mark-sweep-compact support: all spaces can update pointers to objects
   // moving as a part of compaction.
   virtual void adjust_pointers();
@@ -390,9 +330,9 @@
   Generation* gen;
   CompactibleSpace* space;
   HeapWord* threshold;
-  CompactPoint(Generation* _gen, CompactibleSpace* _space,
-               HeapWord* _threshold) :
-    gen(_gen), space(_space), threshold(_threshold) {}
+
+  CompactPoint(Generation* _gen) :
+    gen(_gen), space(NULL), threshold(0) {}
 };
 
 
@@ -427,7 +367,7 @@
 
   // Perform operations on the space needed after a compaction
   // has been performed.
-  virtual void reset_after_compaction() {}
+  virtual void reset_after_compaction() = 0;
 
   // Returns the next space (in the current generation) to be compacted in
   // the global compaction order.  Also is used to select the next
@@ -492,7 +432,7 @@
   HeapWord* _end_of_live;
 
   // Minimum size of a free block.
-  virtual size_t minimum_free_block_size() const = 0;
+  virtual size_t minimum_free_block_size() const { return 0; }
 
   // This the function is invoked when an allocation of an object covering
   // "start" to "end occurs crosses the threshold; returns the next
@@ -512,272 +452,6 @@
                         size_t word_len);
 };
 
-#define SCAN_AND_FORWARD(cp,scan_limit,block_is_obj,block_size) {            \
-  /* Compute the new addresses for the live objects and store it in the mark \
-   * Used by universe::mark_sweep_phase2()                                   \
-   */                                                                        \
-  HeapWord* compact_top; /* This is where we are currently compacting to. */ \
-                                                                             \
-  /* We're sure to be here before any objects are compacted into this        \
-   * space, so this is a good time to initialize this:                       \
-   */                                                                        \
-  set_compaction_top(bottom());                                              \
-                                                                             \
-  if (cp->space == NULL) {                                                   \
-    assert(cp->gen != NULL, "need a generation");                            \
-    assert(cp->threshold == NULL, "just checking");                          \
-    assert(cp->gen->first_compaction_space() == this, "just checking");      \
-    cp->space = cp->gen->first_compaction_space();                           \
-    compact_top = cp->space->bottom();                                       \
-    cp->space->set_compaction_top(compact_top);                              \
-    cp->threshold = cp->space->initialize_threshold();                       \
-  } else {                                                                   \
-    compact_top = cp->space->compaction_top();                               \
-  }                                                                          \
-                                                                             \
-  /* We allow some amount of garbage towards the bottom of the space, so     \
-   * we don't start compacting before there is a significant gain to be made.\
-   * Occasionally, we want to ensure a full compaction, which is determined  \
-   * by the MarkSweepAlwaysCompactCount parameter.                           \
-   */                                                                        \
-  uint invocations = MarkSweep::total_invocations();                         \
-  bool skip_dead = ((invocations % MarkSweepAlwaysCompactCount) != 0);       \
-                                                                             \
-  size_t allowed_deadspace = 0;                                              \
-  if (skip_dead) {                                                           \
-    const size_t ratio = allowed_dead_ratio();                               \
-    allowed_deadspace = (capacity() * ratio / 100) / HeapWordSize;           \
-  }                                                                          \
-                                                                             \
-  HeapWord* q = bottom();                                                    \
-  HeapWord* t = scan_limit();                                                \
-                                                                             \
-  HeapWord*  end_of_live= q;    /* One byte beyond the last byte of the last \
-                                   live object. */                           \
-  HeapWord*  first_dead = end();/* The first dead object. */                 \
-  LiveRange* liveRange  = NULL; /* The current live range, recorded in the   \
-                                   first header of preceding free area. */   \
-  _first_dead = first_dead;                                                  \
-                                                                             \
-  const intx interval = PrefetchScanIntervalInBytes;                         \
-                                                                             \
-  while (q < t) {                                                            \
-    assert(!block_is_obj(q) ||                                               \
-           oop(q)->mark()->is_marked() || oop(q)->mark()->is_unlocked() ||   \
-           oop(q)->mark()->has_bias_pattern(),                               \
-           "these are the only valid states during a mark sweep");           \
-    if (block_is_obj(q) && oop(q)->is_gc_marked()) {                         \
-      /* prefetch beyond q */                                                \
-      Prefetch::write(q, interval);                                          \
-      size_t size = block_size(q);                                           \
-      compact_top = cp->space->forward(oop(q), size, cp, compact_top);       \
-      q += size;                                                             \
-      end_of_live = q;                                                       \
-    } else {                                                                 \
-      /* run over all the contiguous dead objects */                         \
-      HeapWord* end = q;                                                     \
-      do {                                                                   \
-        /* prefetch beyond end */                                            \
-        Prefetch::write(end, interval);                                      \
-        end += block_size(end);                                              \
-      } while (end < t && (!block_is_obj(end) || !oop(end)->is_gc_marked()));\
-                                                                             \
-      /* see if we might want to pretend this object is alive so that        \
-       * we don't have to compact quite as often.                            \
-       */                                                                    \
-      if (allowed_deadspace > 0 && q == compact_top) {                       \
-        size_t sz = pointer_delta(end, q);                                   \
-        if (insert_deadspace(allowed_deadspace, q, sz)) {                    \
-          compact_top = cp->space->forward(oop(q), sz, cp, compact_top);     \
-          q = end;                                                           \
-          end_of_live = end;                                                 \
-          continue;                                                          \
-        }                                                                    \
-      }                                                                      \
-                                                                             \
-      /* otherwise, it really is a free region. */                           \
-                                                                             \
-      /* for the previous LiveRange, record the end of the live objects. */  \
-      if (liveRange) {                                                       \
-        liveRange->set_end(q);                                               \
-      }                                                                      \
-                                                                             \
-      /* record the current LiveRange object.                                \
-       * liveRange->start() is overlaid on the mark word.                    \
-       */                                                                    \
-      liveRange = (LiveRange*)q;                                             \
-      liveRange->set_start(end);                                             \
-      liveRange->set_end(end);                                               \
-                                                                             \
-      /* see if this is the first dead region. */                            \
-      if (q < first_dead) {                                                  \
-        first_dead = q;                                                      \
-      }                                                                      \
-                                                                             \
-      /* move on to the next object */                                       \
-      q = end;                                                               \
-    }                                                                        \
-  }                                                                          \
-                                                                             \
-  assert(q == t, "just checking");                                           \
-  if (liveRange != NULL) {                                                   \
-    liveRange->set_end(q);                                                   \
-  }                                                                          \
-  _end_of_live = end_of_live;                                                \
-  if (end_of_live < first_dead) {                                            \
-    first_dead = end_of_live;                                                \
-  }                                                                          \
-  _first_dead = first_dead;                                                  \
-                                                                             \
-  /* save the compaction_top of the compaction space. */                     \
-  cp->space->set_compaction_top(compact_top);                                \
-}
-
-#define SCAN_AND_ADJUST_POINTERS(adjust_obj_size) {                             \
-  /* adjust all the interior pointers to point at the new locations of objects  \
-   * Used by MarkSweep::mark_sweep_phase3() */                                  \
-                                                                                \
-  HeapWord* q = bottom();                                                       \
-  HeapWord* t = _end_of_live;  /* Established by "prepare_for_compaction". */   \
-                                                                                \
-  assert(_first_dead <= _end_of_live, "Stands to reason, no?");                 \
-                                                                                \
-  if (q < t && _first_dead > q &&                                               \
-      !oop(q)->is_gc_marked()) {                                                \
-    /* we have a chunk of the space which hasn't moved and we've                \
-     * reinitialized the mark word during the previous pass, so we can't        \
-     * use is_gc_marked for the traversal. */                                   \
-    HeapWord* end = _first_dead;                                                \
-                                                                                \
-    while (q < end) {                                                           \
-      /* I originally tried to conjoin "block_start(q) == q" to the             \
-       * assertion below, but that doesn't work, because you can't              \
-       * accurately traverse previous objects to get to the current one         \
-       * after their pointers have been                                         \
-       * updated, until the actual compaction is done.  dld, 4/00 */            \
-      assert(block_is_obj(q),                                                   \
-             "should be at block boundaries, and should be looking at objs");   \
-                                                                                \
-      /* point all the oops to the new location */                              \
-      size_t size = oop(q)->adjust_pointers();                                  \
-      size = adjust_obj_size(size);                                             \
-                                                                                \
-      q += size;                                                                \
-    }                                                                           \
-                                                                                \
-    if (_first_dead == t) {                                                     \
-      q = t;                                                                    \
-    } else {                                                                    \
-      /* $$$ This is funky.  Using this to read the previously written          \
-       * LiveRange.  See also use below. */                                     \
-      q = (HeapWord*)oop(_first_dead)->mark()->decode_pointer();                \
-    }                                                                           \
-  }                                                                             \
-                                                                                \
-  const intx interval = PrefetchScanIntervalInBytes;                            \
-                                                                                \
-  debug_only(HeapWord* prev_q = NULL);                                          \
-  while (q < t) {                                                               \
-    /* prefetch beyond q */                                                     \
-    Prefetch::write(q, interval);                                               \
-    if (oop(q)->is_gc_marked()) {                                               \
-      /* q is alive */                                                          \
-      /* point all the oops to the new location */                              \
-      size_t size = oop(q)->adjust_pointers();                                  \
-      size = adjust_obj_size(size);                                             \
-      debug_only(prev_q = q);                                                   \
-      q += size;                                                                \
-    } else {                                                                    \
-      /* q is not a live object, so its mark should point at the next           \
-       * live object */                                                         \
-      debug_only(prev_q = q);                                                   \
-      q = (HeapWord*) oop(q)->mark()->decode_pointer();                         \
-      assert(q > prev_q, "we should be moving forward through memory");         \
-    }                                                                           \
-  }                                                                             \
-                                                                                \
-  assert(q == t, "just checking");                                              \
-}
-
-#define SCAN_AND_COMPACT(obj_size) {                                            \
-  /* Copy all live objects to their new location                                \
-   * Used by MarkSweep::mark_sweep_phase4() */                                  \
-                                                                                \
-  HeapWord*       q = bottom();                                                 \
-  HeapWord* const t = _end_of_live;                                             \
-  debug_only(HeapWord* prev_q = NULL);                                          \
-                                                                                \
-  if (q < t && _first_dead > q &&                                               \
-      !oop(q)->is_gc_marked()) {                                                \
-    debug_only(                                                                 \
-    /* we have a chunk of the space which hasn't moved and we've reinitialized  \
-     * the mark word during the previous pass, so we can't use is_gc_marked for \
-     * the traversal. */                                                        \
-    HeapWord* const end = _first_dead;                                          \
-                                                                                \
-    while (q < end) {                                                           \
-      size_t size = obj_size(q);                                                \
-      assert(!oop(q)->is_gc_marked(),                                           \
-             "should be unmarked (special dense prefix handling)");             \
-      debug_only(prev_q = q);                                                   \
-      q += size;                                                                \
-    }                                                                           \
-    )  /* debug_only */                                                         \
-                                                                                \
-    if (_first_dead == t) {                                                     \
-      q = t;                                                                    \
-    } else {                                                                    \
-      /* $$$ Funky */                                                           \
-      q = (HeapWord*) oop(_first_dead)->mark()->decode_pointer();               \
-    }                                                                           \
-  }                                                                             \
-                                                                                \
-  const intx scan_interval = PrefetchScanIntervalInBytes;                       \
-  const intx copy_interval = PrefetchCopyIntervalInBytes;                       \
-  while (q < t) {                                                               \
-    if (!oop(q)->is_gc_marked()) {                                              \
-      /* mark is pointer to next marked oop */                                  \
-      debug_only(prev_q = q);                                                   \
-      q = (HeapWord*) oop(q)->mark()->decode_pointer();                         \
-      assert(q > prev_q, "we should be moving forward through memory");         \
-    } else {                                                                    \
-      /* prefetch beyond q */                                                   \
-      Prefetch::read(q, scan_interval);                                         \
-                                                                                \
-      /* size and destination */                                                \
-      size_t size = obj_size(q);                                                \
-      HeapWord* compaction_top = (HeapWord*)oop(q)->forwardee();                \
-                                                                                \
-      /* prefetch beyond compaction_top */                                      \
-      Prefetch::write(compaction_top, copy_interval);                           \
-                                                                                \
-      /* copy object and reinit its mark */                                     \
-      assert(q != compaction_top, "everything in this pass should be moving");  \
-      Copy::aligned_conjoint_words(q, compaction_top, size);                    \
-      oop(compaction_top)->init_mark();                                         \
-      assert(oop(compaction_top)->klass() != NULL, "should have a class");      \
-                                                                                \
-      debug_only(prev_q = q);                                                   \
-      q += size;                                                                \
-    }                                                                           \
-  }                                                                             \
-                                                                                \
-  /* Let's remember if we were empty before we did the compaction. */           \
-  bool was_empty = used_region().is_empty();                                    \
-  /* Reset space after compaction is complete */                                \
-  reset_after_compaction();                                                     \
-  /* We do this clear, below, since it has overloaded meanings for some */      \
-  /* space subtypes.  For example, OffsetTableContigSpace's that were   */      \
-  /* compacted into will have had their offset table thresholds updated */      \
-  /* continuously, but those that weren't need to have their thresholds */      \
-  /* re-initialized.  Also mangles unused area for debugging.           */      \
-  if (used_region().is_empty()) {                                               \
-    if (!was_empty) clear(SpaceDecorator::Mangle);                              \
-  } else {                                                                      \
-    if (ZapUnusedHeapArea) mangle_unused_area();                                \
-  }                                                                             \
-}
-
 class GenSpaceMangler;
 
 // A space in which the free area is contiguous.  It therefore supports
@@ -808,7 +482,7 @@
   HeapWord* top() const            { return _top;    }
   void set_top(HeapWord* value)    { _top = value; }
 
-  virtual void set_saved_mark()    { _saved_mark_word = top();    }
+  void set_saved_mark()            { _saved_mark_word = top();    }
   void reset_saved_mark()          { _saved_mark_word = bottom(); }
 
   WaterMark bottom_mark()     { return WaterMark(this, bottom()); }
@@ -843,36 +517,31 @@
   size_t used() const            { return byte_size(bottom(), top()); }
   size_t free() const            { return byte_size(top(),    end()); }
 
-  // Override from space.
-  bool is_in(const void* p) const;
-
   virtual bool is_free_block(const HeapWord* p) const;
 
   // In a contiguous space we have a more obvious bound on what parts
   // contain objects.
   MemRegion used_region() const { return MemRegion(bottom(), top()); }
 
-  MemRegion used_region_at_save_marks() const {
-    return MemRegion(bottom(), saved_mark_word());
-  }
-
   // Allocation (return NULL if full)
   virtual HeapWord* allocate(size_t word_size);
   virtual HeapWord* par_allocate(size_t word_size);
-
-  virtual bool obj_allocated_since_save_marks(const oop obj) const {
-    return (HeapWord*)obj >= saved_mark_word();
-  }
+  HeapWord* allocate_aligned(size_t word_size);
 
   // Iteration
   void oop_iterate(ExtendedOopClosure* cl);
-  void oop_iterate(MemRegion mr, ExtendedOopClosure* cl);
   void object_iterate(ObjectClosure* blk);
   // For contiguous spaces this method will iterate safely over objects
   // in the space (i.e., between bottom and top) when at a safepoint.
   void safe_object_iterate(ObjectClosure* blk);
-  void object_iterate_mem(MemRegion mr, UpwardsObjectClosure* cl);
-  // iterates on objects up to the safe limit
+
+  // Iterate over as many initialized objects in the space as possible,
+  // calling "cl.do_object_careful" on each. Return NULL if all objects
+  // in the space (at the start of the iteration) were iterated over.
+  // Return an address indicating the extent of the iteration in the
+  // event that the iteration had to return because of finding an
+  // uninitialized object in the space, or if the closure "cl"
+  // signaled early termination.
   HeapWord* object_iterate_careful(ObjectClosureCareful* cl);
   HeapWord* concurrent_iteration_safe_limit() {
     assert(_concurrent_iteration_safe_limit <= top(),
@@ -903,7 +572,6 @@
     // set new iteration safe limit
     set_concurrent_iteration_safe_limit(compaction_top());
   }
-  virtual size_t minimum_free_block_size() const { return 0; }
 
   // Override.
   DirtyCardToOopClosure* new_dcto_cl(ExtendedOopClosure* cl,
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/memory/space.inline.hpp
--- a/src/share/vm/memory/space.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/memory/space.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -28,12 +28,279 @@
 #include "gc_interface/collectedHeap.hpp"
 #include "memory/space.hpp"
 #include "memory/universe.hpp"
+#include "runtime/prefetch.inline.hpp"
 #include "runtime/safepoint.hpp"
 
 inline HeapWord* Space::block_start(const void* p) {
   return block_start_const(p);
 }
 
+#define SCAN_AND_FORWARD(cp,scan_limit,block_is_obj,block_size) {            \
+  /* Compute the new addresses for the live objects and store it in the mark \
+   * Used by universe::mark_sweep_phase2()                                   \
+   */                                                                        \
+  HeapWord* compact_top; /* This is where we are currently compacting to. */ \
+                                                                             \
+  /* We're sure to be here before any objects are compacted into this        \
+   * space, so this is a good time to initialize this:                       \
+   */                                                                        \
+  set_compaction_top(bottom());                                              \
+                                                                             \
+  if (cp->space == NULL) {                                                   \
+    assert(cp->gen != NULL, "need a generation");                            \
+    assert(cp->threshold == NULL, "just checking");                          \
+    assert(cp->gen->first_compaction_space() == this, "just checking");      \
+    cp->space = cp->gen->first_compaction_space();                           \
+    compact_top = cp->space->bottom();                                       \
+    cp->space->set_compaction_top(compact_top);                              \
+    cp->threshold = cp->space->initialize_threshold();                       \
+  } else {                                                                   \
+    compact_top = cp->space->compaction_top();                               \
+  }                                                                          \
+                                                                             \
+  /* We allow some amount of garbage towards the bottom of the space, so     \
+   * we don't start compacting before there is a significant gain to be made.\
+   * Occasionally, we want to ensure a full compaction, which is determined  \
+   * by the MarkSweepAlwaysCompactCount parameter.                           \
+   */                                                                        \
+  uint invocations = MarkSweep::total_invocations();                         \
+  bool skip_dead = ((invocations % MarkSweepAlwaysCompactCount) != 0);       \
+                                                                             \
+  size_t allowed_deadspace = 0;                                              \
+  if (skip_dead) {                                                           \
+    const size_t ratio = allowed_dead_ratio();                               \
+    allowed_deadspace = (capacity() * ratio / 100) / HeapWordSize;           \
+  }                                                                          \
+                                                                             \
+  HeapWord* q = bottom();                                                    \
+  HeapWord* t = scan_limit();                                                \
+                                                                             \
+  HeapWord*  end_of_live= q;    /* One byte beyond the last byte of the last \
+                                   live object. */                           \
+  HeapWord*  first_dead = end();/* The first dead object. */                 \
+  LiveRange* liveRange  = NULL; /* The current live range, recorded in the   \
+                                   first header of preceding free area. */   \
+  _first_dead = first_dead;                                                  \
+                                                                             \
+  const intx interval = PrefetchScanIntervalInBytes;                         \
+                                                                             \
+  while (q < t) {                                                            \
+    assert(!block_is_obj(q) ||                                               \
+           oop(q)->mark()->is_marked() || oop(q)->mark()->is_unlocked() ||   \
+           oop(q)->mark()->has_bias_pattern(),                               \
+           "these are the only valid states during a mark sweep");           \
+    if (block_is_obj(q) && oop(q)->is_gc_marked()) {                         \
+      /* prefetch beyond q */                                                \
+      Prefetch::write(q, interval);                                          \
+      size_t size = block_size(q);                                           \
+      compact_top = cp->space->forward(oop(q), size, cp, compact_top);       \
+      q += size;                                                             \
+      end_of_live = q;                                                       \
+    } else {                                                                 \
+      /* run over all the contiguous dead objects */                         \
+      HeapWord* end = q;                                                     \
+      do {                                                                   \
+        /* prefetch beyond end */                                            \
+        Prefetch::write(end, interval);                                      \
+        end += block_size(end);                                              \
+      } while (end < t && (!block_is_obj(end) || !oop(end)->is_gc_marked()));\
+                                                                             \
+      /* see if we might want to pretend this object is alive so that        \
+       * we don't have to compact quite as often.                            \
+       */                                                                    \
+      if (allowed_deadspace > 0 && q == compact_top) {                       \
+        size_t sz = pointer_delta(end, q);                                   \
+        if (insert_deadspace(allowed_deadspace, q, sz)) {                    \
+          compact_top = cp->space->forward(oop(q), sz, cp, compact_top);     \
+          q = end;                                                           \
+          end_of_live = end;                                                 \
+          continue;                                                          \
+        }                                                                    \
+      }                                                                      \
+                                                                             \
+      /* otherwise, it really is a free region. */                           \
+                                                                             \
+      /* for the previous LiveRange, record the end of the live objects. */  \
+      if (liveRange) {                                                       \
+        liveRange->set_end(q);                                               \
+      }                                                                      \
+                                                                             \
+      /* record the current LiveRange object.                                \
+       * liveRange->start() is overlaid on the mark word.                    \
+       */                                                                    \
+      liveRange = (LiveRange*)q;                                             \
+      liveRange->set_start(end);                                             \
+      liveRange->set_end(end);                                               \
+                                                                             \
+      /* see if this is the first dead region. */                            \
+      if (q < first_dead) {                                                  \
+        first_dead = q;                                                      \
+      }                                                                      \
+                                                                             \
+      /* move on to the next object */                                       \
+      q = end;                                                               \
+    }                                                                        \
+  }                                                                          \
+                                                                             \
+  assert(q == t, "just checking");                                           \
+  if (liveRange != NULL) {                                                   \
+    liveRange->set_end(q);                                                   \
+  }                                                                          \
+  _end_of_live = end_of_live;                                                \
+  if (end_of_live < first_dead) {                                            \
+    first_dead = end_of_live;                                                \
+  }                                                                          \
+  _first_dead = first_dead;                                                  \
+                                                                             \
+  /* save the compaction_top of the compaction space. */                     \
+  cp->space->set_compaction_top(compact_top);                                \
+}
+
+#define SCAN_AND_ADJUST_POINTERS(adjust_obj_size) {                             \
+  /* adjust all the interior pointers to point at the new locations of objects  \
+   * Used by MarkSweep::mark_sweep_phase3() */                                  \
+                                                                                \
+  HeapWord* q = bottom();                                                       \
+  HeapWord* t = _end_of_live;  /* Established by "prepare_for_compaction". */   \
+                                                                                \
+  assert(_first_dead <= _end_of_live, "Stands to reason, no?");                 \
+                                                                                \
+  if (q < t && _first_dead > q &&                                               \
+      !oop(q)->is_gc_marked()) {                                                \
+    /* we have a chunk of the space which hasn't moved and we've                \
+     * reinitialized the mark word during the previous pass, so we can't        \
+     * use is_gc_marked for the traversal. */                                   \
+    HeapWord* end = _first_dead;                                                \
+                                                                                \
+    while (q < end) {                                                           \
+      /* I originally tried to conjoin "block_start(q) == q" to the             \
+       * assertion below, but that doesn't work, because you can't              \
+       * accurately traverse previous objects to get to the current one         \
+       * after their pointers have been                                         \
+       * updated, until the actual compaction is done.  dld, 4/00 */            \
+      assert(block_is_obj(q),                                                   \
+             "should be at block boundaries, and should be looking at objs");   \
+                                                                                \
+      /* point all the oops to the new location */                              \
+      size_t size = oop(q)->adjust_pointers();                                  \
+      size = adjust_obj_size(size);                                             \
+                                                                                \
+      q += size;                                                                \
+    }                                                                           \
+                                                                                \
+    if (_first_dead == t) {                                                     \
+      q = t;                                                                    \
+    } else {                                                                    \
+      /* $$$ This is funky.  Using this to read the previously written          \
+       * LiveRange.  See also use below. */                                     \
+      q = (HeapWord*)oop(_first_dead)->mark()->decode_pointer();                \
+    }                                                                           \
+  }                                                                             \
+                                                                                \
+  const intx interval = PrefetchScanIntervalInBytes;                            \
+                                                                                \
+  debug_only(HeapWord* prev_q = NULL);                                          \
+  while (q < t) {                                                               \
+    /* prefetch beyond q */                                                     \
+    Prefetch::write(q, interval);                                               \
+    if (oop(q)->is_gc_marked()) {                                               \
+      /* q is alive */                                                          \
+      /* point all the oops to the new location */                              \
+      size_t size = oop(q)->adjust_pointers();                                  \
+      size = adjust_obj_size(size);                                             \
+      debug_only(prev_q = q);                                                   \
+      q += size;                                                                \
+    } else {                                                                    \
+      /* q is not a live object, so its mark should point at the next           \
+       * live object */                                                         \
+      debug_only(prev_q = q);                                                   \
+      q = (HeapWord*) oop(q)->mark()->decode_pointer();                         \
+      assert(q > prev_q, "we should be moving forward through memory");         \
+    }                                                                           \
+  }                                                                             \
+                                                                                \
+  assert(q == t, "just checking");                                              \
+}
+
+#define SCAN_AND_COMPACT(obj_size) {                                            \
+  /* Copy all live objects to their new location                                \
+   * Used by MarkSweep::mark_sweep_phase4() */                                  \
+                                                                                \
+  HeapWord*       q = bottom();                                                 \
+  HeapWord* const t = _end_of_live;                                             \
+  debug_only(HeapWord* prev_q = NULL);                                          \
+                                                                                \
+  if (q < t && _first_dead > q &&                                               \
+      !oop(q)->is_gc_marked()) {                                                \
+    debug_only(                                                                 \
+    /* we have a chunk of the space which hasn't moved and we've reinitialized  \
+     * the mark word during the previous pass, so we can't use is_gc_marked for \
+     * the traversal. */                                                        \
+    HeapWord* const end = _first_dead;                                          \
+                                                                                \
+    while (q < end) {                                                           \
+      size_t size = obj_size(q);                                                \
+      assert(!oop(q)->is_gc_marked(),                                           \
+             "should be unmarked (special dense prefix handling)");             \
+      debug_only(prev_q = q);                                                   \
+      q += size;                                                                \
+    }                                                                           \
+    )  /* debug_only */                                                         \
+                                                                                \
+    if (_first_dead == t) {                                                     \
+      q = t;                                                                    \
+    } else {                                                                    \
+      /* $$$ Funky */                                                           \
+      q = (HeapWord*) oop(_first_dead)->mark()->decode_pointer();               \
+    }                                                                           \
+  }                                                                             \
+                                                                                \
+  const intx scan_interval = PrefetchScanIntervalInBytes;                       \
+  const intx copy_interval = PrefetchCopyIntervalInBytes;                       \
+  while (q < t) {                                                               \
+    if (!oop(q)->is_gc_marked()) {                                              \
+      /* mark is pointer to next marked oop */                                  \
+      debug_only(prev_q = q);                                                   \
+      q = (HeapWord*) oop(q)->mark()->decode_pointer();                         \
+      assert(q > prev_q, "we should be moving forward through memory");         \
+    } else {                                                                    \
+      /* prefetch beyond q */                                                   \
+      Prefetch::read(q, scan_interval);                                         \
+                                                                                \
+      /* size and destination */                                                \
+      size_t size = obj_size(q);                                                \
+      HeapWord* compaction_top = (HeapWord*)oop(q)->forwardee();                \
+                                                                                \
+      /* prefetch beyond compaction_top */                                      \
+      Prefetch::write(compaction_top, copy_interval);                           \
+                                                                                \
+      /* copy object and reinit its mark */                                     \
+      assert(q != compaction_top, "everything in this pass should be moving");  \
+      Copy::aligned_conjoint_words(q, compaction_top, size);                    \
+      oop(compaction_top)->init_mark();                                         \
+      assert(oop(compaction_top)->klass() != NULL, "should have a class");      \
+                                                                                \
+      debug_only(prev_q = q);                                                   \
+      q += size;                                                                \
+    }                                                                           \
+  }                                                                             \
+                                                                                \
+  /* Let's remember if we were empty before we did the compaction. */           \
+  bool was_empty = used_region().is_empty();                                    \
+  /* Reset space after compaction is complete */                                \
+  reset_after_compaction();                                                     \
+  /* We do this clear, below, since it has overloaded meanings for some */      \
+  /* space subtypes.  For example, OffsetTableContigSpace's that were   */      \
+  /* compacted into will have had their offset table thresholds updated */      \
+  /* continuously, but those that weren't need to have their thresholds */      \
+  /* re-initialized.  Also mangles unused area for debugging.           */      \
+  if (used_region().is_empty()) {                                               \
+    if (!was_empty) clear(SpaceDecorator::Mangle);                              \
+  } else {                                                                      \
+    if (ZapUnusedHeapArea) mangle_unused_area();                                \
+  }                                                                             \
+}
+
 inline HeapWord* OffsetTableContigSpace::allocate(size_t size) {
   HeapWord* res = ContiguousSpace::allocate(size);
   if (res != NULL) {
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/oops/cpCache.cpp
--- a/src/share/vm/oops/cpCache.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/oops/cpCache.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -33,6 +33,7 @@
 #include "prims/jvmtiRedefineClassesTrace.hpp"
 #include "prims/methodHandles.hpp"
 #include "runtime/handles.inline.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "utilities/macros.hpp"
 #if INCLUDE_ALL_GCS
 # include "gc_implementation/parallelScavenge/psPromotionManager.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/oops/cpCache.hpp
--- a/src/share/vm/oops/cpCache.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/oops/cpCache.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -27,6 +27,7 @@
 
 #include "interpreter/bytecodes.hpp"
 #include "memory/allocation.hpp"
+#include "runtime/orderAccess.hpp"
 #include "utilities/array.hpp"
 
 class PSPromotionManager;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/oops/instanceClassLoaderKlass.cpp
--- a/src/share/vm/oops/instanceClassLoaderKlass.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/oops/instanceClassLoaderKlass.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -28,6 +28,7 @@
 #include "gc_implementation/shared/markSweep.inline.hpp"
 #include "gc_interface/collectedHeap.inline.hpp"
 #include "memory/genOopClosures.inline.hpp"
+#include "memory/iterator.inline.hpp"
 #include "memory/oopFactory.hpp"
 #include "oops/instanceKlass.hpp"
 #include "oops/instanceClassLoaderKlass.hpp"
@@ -44,12 +45,6 @@
 #include "oops/oop.pcgc.inline.hpp"
 #endif // INCLUDE_ALL_GCS
 
-#define if_do_metadata_checked(closure, nv_suffix)                    \
-  /* Make sure the non-virtual and the virtual versions match. */     \
-  assert(closure->do_metadata##nv_suffix() == closure->do_metadata(), \
-      "Inconsistency in do_metadata");                                \
-  if (closure->do_metadata##nv_suffix())
-
 // Macro to define InstanceClassLoaderKlass::oop_oop_iterate for virtual/nonvirtual for
 // all closures.  Macros calling macros above for each oop size.
 // Since ClassLoader objects have only a pointer to the loader_data, they are not
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/oops/instanceKlass.cpp
--- a/src/share/vm/oops/instanceKlass.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/oops/instanceKlass.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -35,6 +35,7 @@
 #include "jvmtifiles/jvmti.h"
 #include "memory/genOopClosures.inline.hpp"
 #include "memory/heapInspection.hpp"
+#include "memory/iterator.inline.hpp"
 #include "memory/metadataFactory.hpp"
 #include "memory/oopFactory.hpp"
 #include "oops/fieldStreams.hpp"
@@ -54,6 +55,7 @@
 #include "runtime/handles.inline.hpp"
 #include "runtime/javaCalls.hpp"
 #include "runtime/mutexLocker.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/thread.inline.hpp"
 #include "services/classLoadingService.hpp"
 #include "services/threadService.hpp"
@@ -287,6 +289,7 @@
   set_static_oop_field_count(0);
   set_nonstatic_field_size(0);
   set_is_marked_dependent(false);
+  set_has_unloaded_dependent(false);
   set_init_state(InstanceKlass::allocated);
   set_init_thread(NULL);
   set_reference_type(rt);
@@ -1817,6 +1820,9 @@
   return id;
 }
 
+int nmethodBucket::decrement() {
+  return Atomic::add(-1, (volatile int *)&_count);
+}
 
 //
 // Walk the list of dependent nmethods searching for nmethods which
@@ -1831,7 +1837,7 @@
     nmethod* nm = b->get_nmethod();
     // since dependencies aren't removed until an nmethod becomes a zombie,
     // the dependency list may contain nmethods which aren't alive.
-    if (nm->is_alive() && !nm->is_marked_for_deoptimization() && nm->check_dependency_on(changes)) {
+    if (b->count() > 0 && nm->is_alive() && !nm->is_marked_for_deoptimization() && nm->check_dependency_on(changes)) {
       if (TraceDependencies) {
         ResourceMark rm;
         tty->print_cr("Marked for deoptimization");
@@ -1848,6 +1854,43 @@
   return found;
 }
 
+void InstanceKlass::clean_dependent_nmethods() {
+  assert_locked_or_safepoint(CodeCache_lock);
+
+  if (has_unloaded_dependent()) {
+    nmethodBucket* b = _dependencies;
+    nmethodBucket* last = NULL;
+    while (b != NULL) {
+      assert(b->count() >= 0, err_msg("bucket count: %d", b->count()));
+
+      nmethodBucket* next = b->next();
+
+      if (b->count() == 0) {
+        if (last == NULL) {
+          _dependencies = next;
+        } else {
+          last->set_next(next);
+        }
+        delete b;
+        // last stays the same.
+      } else {
+        last = b;
+      }
+
+      b = next;
+    }
+    set_has_unloaded_dependent(false);
+  }
+#ifdef ASSERT
+  else {
+    // Verification
+    for (nmethodBucket* b = _dependencies; b != NULL; b = b->next()) {
+      assert(b->count() >= 0, err_msg("bucket count: %d", b->count()));
+      assert(b->count() != 0, "empty buckets need to be cleaned");
+    }
+  }
+#endif
+}
 
 //
 // Add an nmethodBucket to the list of dependencies for this nmethod.
@@ -1882,13 +1925,10 @@
   nmethodBucket* last = NULL;
   while (b != NULL) {
     if (nm == b->get_nmethod()) {
-      if (b->decrement() == 0) {
-        if (last == NULL) {
-          _dependencies = b->next();
-        } else {
-          last->set_next(b->next());
-        }
-        delete b;
+      int val = b->decrement();
+      guarantee(val >= 0, err_msg("Underflow: %d", val));
+      if (val == 0) {
+        set_has_unloaded_dependent(true);
       }
       return;
     }
@@ -1927,6 +1967,10 @@
   nmethodBucket* b = _dependencies;
   while (b != NULL) {
     if (nm == b->get_nmethod()) {
+#ifdef ASSERT
+      int count = b->count();
+      assert(count >= 0, err_msg("count shouldn't be negative: %d", count));
+#endif
       return true;
     }
     b = b->next();
@@ -2131,12 +2175,6 @@
 // closure's do_metadata() method dictates whether the given closure should be
 // applied to the klass ptr in the object header.
 
-#define if_do_metadata_checked(closure, nv_suffix)                    \
-  /* Make sure the non-virtual and the virtual versions match. */     \
-  assert(closure->do_metadata##nv_suffix() == closure->do_metadata(), \
-      "Inconsistency in do_metadata");                                \
-  if (closure->do_metadata##nv_suffix())
-
 #define InstanceKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)        \
                                                                              \
 int InstanceKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) { \
@@ -2160,10 +2198,9 @@
 int InstanceKlass::oop_oop_iterate_backwards##nv_suffix(oop obj,                \
                                               OopClosureType* closure) {        \
   SpecializationStats::record_iterate_call##nv_suffix(SpecializationStats::ik); \
-  /* header */                                                                  \
-  if_do_metadata_checked(closure, nv_suffix) {                                  \
-    closure->do_klass##nv_suffix(obj->klass());                                 \
-  }                                                                             \
+                                                                                \
+  assert_should_ignore_metadata(closure, nv_suffix);                            \
+                                                                                \
   /* instance variables */                                                      \
   InstanceKlass_OOP_MAP_REVERSE_ITERATE(                                        \
     obj,                                                                        \
@@ -2232,7 +2269,7 @@
 #endif // INCLUDE_ALL_GCS
 
 void InstanceKlass::clean_implementors_list(BoolObjectClosure* is_alive) {
-  assert(is_loader_alive(is_alive), "this klass should be live");
+  assert(class_loader_data()->is_alive(is_alive), "this klass should be live");
   if (is_interface()) {
     if (ClassUnloading) {
       Klass* impl = implementor();
@@ -3041,8 +3078,7 @@
         offset          <= (juint) value->length() &&
         offset + length <= (juint) value->length()) {
       st->print(BULLET"string: ");
-      Handle h_obj(obj);
-      java_lang_String::print(h_obj, st);
+      java_lang_String::print(obj, st);
       st->cr();
       if (!WizardMode)  return;  // that is enough
     }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/oops/instanceKlass.hpp
--- a/src/share/vm/oops/instanceKlass.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/oops/instanceKlass.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -226,6 +226,7 @@
   // _is_marked_dependent can be set concurrently, thus cannot be part of the
   // _misc_flags.
   bool            _is_marked_dependent;  // used for marking during flushing and deoptimization
+  bool            _has_unloaded_dependent;
 
   enum {
     _misc_rewritten            = 1 << 0, // methods rewritten.
@@ -473,6 +474,9 @@
   bool is_marked_dependent() const         { return _is_marked_dependent; }
   void set_is_marked_dependent(bool value) { _is_marked_dependent = value; }
 
+  bool has_unloaded_dependent() const         { return _has_unloaded_dependent; }
+  void set_has_unloaded_dependent(bool value) { _has_unloaded_dependent = value; }
+
   // initialization (virtuals from Klass)
   bool should_be_initialized() const;  // means that initialize should be called
   void initialize(TRAPS);
@@ -946,6 +950,7 @@
 
   void clean_implementors_list(BoolObjectClosure* is_alive);
   void clean_method_data(BoolObjectClosure* is_alive);
+  void clean_dependent_nmethods();
 
   // Explicit metaspace deallocation of fields
   // For RedefineClasses and class file parsing errors, we need to deallocate
@@ -1234,7 +1239,7 @@
   }
   int count()                             { return _count; }
   int increment()                         { _count += 1; return _count; }
-  int decrement()                         { _count -= 1; assert(_count >= 0, "don't underflow"); return _count; }
+  int decrement();
   nmethodBucket* next()                   { return _next; }
   void set_next(nmethodBucket* b)         { _next = b; }
   nmethod* get_nmethod()                  { return _nmethod; }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/oops/instanceMirrorKlass.cpp
--- a/src/share/vm/oops/instanceMirrorKlass.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/oops/instanceMirrorKlass.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -28,6 +28,7 @@
 #include "gc_implementation/shared/markSweep.inline.hpp"
 #include "gc_interface/collectedHeap.inline.hpp"
 #include "memory/genOopClosures.inline.hpp"
+#include "memory/iterator.inline.hpp"
 #include "memory/oopFactory.hpp"
 #include "oops/instanceKlass.hpp"
 #include "oops/instanceMirrorKlass.hpp"
@@ -241,12 +242,6 @@
   return oop_size(obj);                                                               \
 
 
-#define if_do_metadata_checked(closure, nv_suffix)                    \
-  /* Make sure the non-virtual and the virtual versions match. */     \
-  assert(closure->do_metadata##nv_suffix() == closure->do_metadata(), \
-      "Inconsistency in do_metadata");                                \
-  if (closure->do_metadata##nv_suffix())
-
 // Macro to define InstanceMirrorKlass::oop_oop_iterate for virtual/nonvirtual for
 // all closures.  Macros calling macros above for each oop size.
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/oops/klass.cpp
--- a/src/share/vm/oops/klass.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/oops/klass.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -36,11 +36,13 @@
 #include "oops/instanceKlass.hpp"
 #include "oops/klass.inline.hpp"
 #include "oops/oop.inline2.hpp"
-#include "runtime/atomic.hpp"
+#include "runtime/atomic.inline.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "trace/traceMacros.hpp"
 #include "utilities/stack.hpp"
 #include "utilities/macros.hpp"
 #if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc_implementation/parallelScavenge/psParallelCompact.hpp"
 #include "gc_implementation/parallelScavenge/psPromotionManager.hpp"
 #include "gc_implementation/parallelScavenge/psScavenge.hpp"
@@ -158,7 +160,12 @@
   _primary_supers[0] = k;
   set_super_check_offset(in_bytes(primary_supers_offset()));
 
-  set_java_mirror(NULL);
+  // The constructor is used from init_self_patching_vtbl_list,
+  // which doesn't zero out the memory before calling the constructor.
+  // Need to set the field explicitly to not hit an assert that the field
+  // should be NULL before setting it.
+  _java_mirror = NULL;
+
   set_modifier_flags(0);
   set_layout_helper(Klass::_lh_neutral_value);
   set_name(NULL);
@@ -390,7 +397,7 @@
   return mirror_alive;
 }
 
-void Klass::clean_weak_klass_links(BoolObjectClosure* is_alive) {
+void Klass::clean_weak_klass_links(BoolObjectClosure* is_alive, bool clean_alive_klasses) {
   if (!ClassUnloading) {
     return;
   }
@@ -435,7 +442,7 @@
     }
 
     // Clean the implementors list and method data.
-    if (current->oop_is_instance()) {
+    if (clean_alive_klasses && current->oop_is_instance()) {
       InstanceKlass* ik = InstanceKlass::cast(current);
       ik->clean_implementors_list(is_alive);
       ik->clean_method_data(is_alive);
@@ -447,12 +454,18 @@
   record_modified_oops();
 }
 
-void Klass::klass_update_barrier_set_pre(void* p, oop v) {
-  // This barrier used by G1, where it's used remember the old oop values,
-  // so that we don't forget any objects that were live at the snapshot at
-  // the beginning. This function is only used when we write oops into
-  // Klasses. Since the Klasses are used as roots in G1, we don't have to
-  // do anything here.
+// This barrier is used by G1 to remember the old oop values, so
+// that we don't forget any objects that were live at the snapshot at
+// the beginning. This function is only used when we write oops into Klasses.
+void Klass::klass_update_barrier_set_pre(oop* p, oop v) {
+#if INCLUDE_ALL_GCS
+  if (UseG1GC) {
+    oop obj = *p;
+    if (obj != NULL) {
+      G1SATBCardTableModRefBS::enqueue(obj);
+    }
+  }
+#endif
 }
 
 void Klass::klass_oop_store(oop* p, oop v) {
@@ -463,7 +476,7 @@
   if (always_do_update_barrier) {
     klass_oop_store((volatile oop*)p, v);
   } else {
-    klass_update_barrier_set_pre((void*)p, v);
+    klass_update_barrier_set_pre(p, v);
     *p = v;
     klass_update_barrier_set(v);
   }
@@ -473,7 +486,7 @@
   assert(!Universe::heap()->is_in_reserved((void*)p), "Should store pointer into metadata");
   assert(v == NULL || Universe::heap()->is_in_reserved((void*)v), "Should store pointer to an object");
 
-  klass_update_barrier_set_pre((void*)p, v);
+  klass_update_barrier_set_pre((oop*)p, v); // Cast away volatile.
   OrderAccess::release_store_ptr(p, v);
   klass_update_barrier_set(v);
 }
@@ -698,3 +711,21 @@
 }
 
 #endif
+
+/////////////// Unit tests ///////////////
+
+#ifndef PRODUCT
+
+class TestKlass {
+ public:
+  static void test_oop_is_instanceClassLoader() {
+    assert(SystemDictionary::ClassLoader_klass()->oop_is_instanceClassLoader(), "assert");
+    assert(!SystemDictionary::String_klass()->oop_is_instanceClassLoader(), "assert");
+  }
+};
+
+void TestKlass_test() {
+  TestKlass::test_oop_is_instanceClassLoader();
+}
+
+#endif
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/oops/klass.hpp
--- a/src/share/vm/oops/klass.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/oops/klass.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -32,7 +32,6 @@
 #include "oops/klassPS.hpp"
 #include "oops/metadata.hpp"
 #include "oops/oop.hpp"
-#include "runtime/orderAccess.hpp"
 #include "trace/traceMacros.hpp"
 #include "utilities/accessFlags.hpp"
 #include "utilities/macros.hpp"
@@ -500,6 +499,7 @@
   virtual bool oop_is_objArray_slow()       const { return false; }
   virtual bool oop_is_typeArray_slow()      const { return false; }
  public:
+  virtual bool oop_is_instanceClassLoader() const { return false; }
   virtual bool oop_is_instanceMirror()      const { return false; }
   virtual bool oop_is_instanceRef()         const { return false; }
 
@@ -583,36 +583,9 @@
   // The is_alive closure passed in depends on the Garbage Collector used.
   bool is_loader_alive(BoolObjectClosure* is_alive);
 
-  static void clean_weak_klass_links(BoolObjectClosure* is_alive);
-
-  // Prefetch within oop iterators.  This is a macro because we
-  // can't guarantee that the compiler will inline it.  In 64-bit
-  // it generally doesn't.  Signature is
-  //
-  // static void prefetch_beyond(oop* const start,
-  //                             oop* const end,
-  //                             const intx foffset,
-  //                             const Prefetch::style pstyle);
-#define prefetch_beyond(start, end, foffset, pstyle) {   \
-    const intx foffset_ = (foffset);                     \
-    const Prefetch::style pstyle_ = (pstyle);            \
-    assert(foffset_ > 0, "prefetch beyond, not behind"); \
-    if (pstyle_ != Prefetch::do_none) {                  \
-      oop* ref = (start);                                \
-      if (ref < (end)) {                                 \
-        switch (pstyle_) {                               \
-        case Prefetch::do_read:                          \
-          Prefetch::read(*ref, foffset_);                \
-          break;                                         \
-        case Prefetch::do_write:                         \
-          Prefetch::write(*ref, foffset_);               \
-          break;                                         \
-        default:                                         \
-          ShouldNotReachHere();                          \
-          break;                                         \
-        }                                                \
-      }                                                  \
-    }                                                    \
+  static void clean_weak_klass_links(BoolObjectClosure* is_alive, bool clean_alive_klasses = true);
+  static void clean_subklass_tree(BoolObjectClosure* is_alive) {
+    clean_weak_klass_links(is_alive, false /* clean_alive_klasses */);
   }
 
   // iterators
@@ -720,7 +693,7 @@
  private:
   // barriers used by klass_oop_store
   void klass_update_barrier_set(oop v);
-  void klass_update_barrier_set_pre(void* p, oop v);
+  void klass_update_barrier_set_pre(oop* p, oop v);
 };
 
 #endif // SHARE_VM_OOPS_KLASS_HPP
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/oops/method.cpp
--- a/src/share/vm/oops/method.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/oops/method.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -49,6 +49,7 @@
 #include "runtime/compilationPolicy.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/handles.inline.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/relocator.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/signature.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/oops/methodData.cpp
--- a/src/share/vm/oops/methodData.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/oops/methodData.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -34,6 +34,7 @@
 #include "runtime/compilationPolicy.hpp"
 #include "runtime/deoptimization.hpp"
 #include "runtime/handles.inline.hpp"
+#include "runtime/orderAccess.inline.hpp"
 
 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/oops/objArrayKlass.cpp
--- a/src/share/vm/oops/objArrayKlass.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/oops/objArrayKlass.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -29,6 +29,7 @@
 #include "gc_implementation/shared/markSweep.inline.hpp"
 #include "gc_interface/collectedHeap.inline.hpp"
 #include "memory/genOopClosures.inline.hpp"
+#include "memory/iterator.inline.hpp"
 #include "memory/metadataFactory.hpp"
 #include "memory/resourceArea.hpp"
 #include "memory/universe.inline.hpp"
@@ -42,6 +43,7 @@
 #include "oops/symbol.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/mutexLocker.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "utilities/copy.hpp"
 #include "utilities/macros.hpp"
 #if INCLUDE_ALL_GCS
@@ -475,12 +477,6 @@
 }
 #endif // INCLUDE_ALL_GCS
 
-#define if_do_metadata_checked(closure, nv_suffix)                    \
-  /* Make sure the non-virtual and the virtual versions match. */     \
-  assert(closure->do_metadata##nv_suffix() == closure->do_metadata(), \
-      "Inconsistency in do_metadata");                                \
-  if (closure->do_metadata##nv_suffix())
-
 #define ObjArrayKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)           \
                                                                                 \
 int ObjArrayKlass::oop_oop_iterate##nv_suffix(oop obj,                          \
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/oops/oop.hpp
--- a/src/share/vm/oops/oop.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/oops/oop.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -109,12 +109,13 @@
   int size_given_klass(Klass* klass);
 
   // type test operations (inlined in oop.inline.h)
-  bool is_instance()           const;
-  bool is_instanceMirror()     const;
-  bool is_instanceRef()        const;
-  bool is_array()              const;
-  bool is_objArray()           const;
-  bool is_typeArray()          const;
+  bool is_instance()            const;
+  bool is_instanceMirror()      const;
+  bool is_instanceClassLoader() const;
+  bool is_instanceRef()         const;
+  bool is_array()               const;
+  bool is_objArray()            const;
+  bool is_typeArray()           const;
 
  private:
   // field addresses in oop
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/oops/oop.inline.hpp
--- a/src/share/vm/oops/oop.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/oops/oop.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -38,7 +38,8 @@
 #include "oops/klass.inline.hpp"
 #include "oops/markOop.inline.hpp"
 #include "oops/oop.hpp"
-#include "runtime/atomic.hpp"
+#include "runtime/atomic.inline.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/os.hpp"
 #include "utilities/macros.hpp"
 #ifdef TARGET_ARCH_x86
@@ -147,12 +148,13 @@
 
 inline bool oopDesc::is_a(Klass* k)        const { return klass()->is_subtype_of(k); }
 
-inline bool oopDesc::is_instance()           const { return klass()->oop_is_instance(); }
-inline bool oopDesc::is_instanceMirror()     const { return klass()->oop_is_instanceMirror(); }
-inline bool oopDesc::is_instanceRef()        const { return klass()->oop_is_instanceRef(); }
-inline bool oopDesc::is_array()              const { return klass()->oop_is_array(); }
-inline bool oopDesc::is_objArray()           const { return klass()->oop_is_objArray(); }
-inline bool oopDesc::is_typeArray()          const { return klass()->oop_is_typeArray(); }
+inline bool oopDesc::is_instance()            const { return klass()->oop_is_instance(); }
+inline bool oopDesc::is_instanceClassLoader() const { return klass()->oop_is_instanceClassLoader(); }
+inline bool oopDesc::is_instanceMirror()      const { return klass()->oop_is_instanceMirror(); }
+inline bool oopDesc::is_instanceRef()         const { return klass()->oop_is_instanceRef(); }
+inline bool oopDesc::is_array()               const { return klass()->oop_is_array(); }
+inline bool oopDesc::is_objArray()            const { return klass()->oop_is_objArray(); }
+inline bool oopDesc::is_typeArray()           const { return klass()->oop_is_typeArray(); }
 
 inline void*     oopDesc::field_base(int offset)        const { return (void*)&((char*)this)[offset]; }
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/oops/oop.pcgc.inline.hpp
--- a/src/share/vm/oops/oop.pcgc.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/oops/oop.pcgc.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -54,8 +54,6 @@
   klass()->oop_follow_contents(cm, this);
 }
 
-// Used by parallel old GC.
-
 inline oop oopDesc::forward_to_atomic(oop p) {
   assert(ParNewGeneration::is_legal_forward_ptr(p),
          "illegal forwarding pointer value.");
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/oops/typeArrayKlass.cpp
--- a/src/share/vm/oops/typeArrayKlass.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/oops/typeArrayKlass.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -39,6 +39,7 @@
 #include "oops/typeArrayKlass.hpp"
 #include "oops/typeArrayOop.hpp"
 #include "runtime/handles.inline.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "utilities/macros.hpp"
 
 bool TypeArrayKlass::compute_is_subtype_of(Klass* k) {
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/oops/typeArrayOop.hpp
--- a/src/share/vm/oops/typeArrayOop.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/oops/typeArrayOop.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -27,39 +27,7 @@
 
 #include "oops/arrayOop.hpp"
 #include "oops/typeArrayKlass.hpp"
-#ifdef TARGET_OS_ARCH_linux_x86
-# include "orderAccess_linux_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_sparc
-# include "orderAccess_linux_sparc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_zero
-# include "orderAccess_linux_zero.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_solaris_x86
-# include "orderAccess_solaris_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_solaris_sparc
-# include "orderAccess_solaris_sparc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_windows_x86
-# include "orderAccess_windows_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_arm
-# include "orderAccess_linux_arm.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_ppc
-# include "orderAccess_linux_ppc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_aix_ppc
-# include "orderAccess_aix_ppc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_bsd_x86
-# include "orderAccess_bsd_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_bsd_zero
-# include "orderAccess_bsd_zero.inline.hpp"
-#endif
+#include "runtime/orderAccess.inline.hpp"
 
 // A typeArrayOop is an array containing basic types (non oop elements).
 // It is used for arrays of {characters, singles, doubles, bytes, shorts, integers, longs}
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/opto/callGenerator.cpp
--- a/src/share/vm/opto/callGenerator.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/opto/callGenerator.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -710,7 +710,15 @@
   Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO);
   iophi->set_req(2, slow_map->i_o());
   kit.set_i_o(gvn.transform(iophi));
+  // Merge memory
   kit.merge_memory(slow_map->merged_memory(), region, 2);
+  // Transform new memory Phis.
+  for (MergeMemStream mms(kit.merged_memory()); mms.next_non_empty();) {
+    Node* phi = mms.memory();
+    if (phi->is_Phi() && phi->in(0) == region) {
+      mms.set_memory(gvn.transform(phi));
+    }
+  }
   uint tos = kit.jvms()->stkoff() + kit.sp();
   uint limit = slow_map->req();
   for (uint i = TypeFunc::Parms; i < limit; i++) {
@@ -864,15 +872,15 @@
 }
 
 
-//------------------------PredictedIntrinsicGenerator------------------------------
-// Internal class which handles all predicted Intrinsic calls.
-class PredictedIntrinsicGenerator : public CallGenerator {
+//------------------------PredicatedIntrinsicGenerator------------------------------
+// Internal class which handles all predicated Intrinsic calls.
+class PredicatedIntrinsicGenerator : public CallGenerator {
   CallGenerator* _intrinsic;
   CallGenerator* _cg;
 
 public:
-  PredictedIntrinsicGenerator(CallGenerator* intrinsic,
-                              CallGenerator* cg)
+  PredicatedIntrinsicGenerator(CallGenerator* intrinsic,
+                               CallGenerator* cg)
     : CallGenerator(cg->method())
   {
     _intrinsic = intrinsic;
@@ -887,103 +895,182 @@
 };
 
 
-CallGenerator* CallGenerator::for_predicted_intrinsic(CallGenerator* intrinsic,
-                                                      CallGenerator* cg) {
-  return new PredictedIntrinsicGenerator(intrinsic, cg);
+CallGenerator* CallGenerator::for_predicated_intrinsic(CallGenerator* intrinsic,
+                                                       CallGenerator* cg) {
+  return new PredicatedIntrinsicGenerator(intrinsic, cg);
 }
 
 
-JVMState* PredictedIntrinsicGenerator::generate(JVMState* jvms, Parse* parent_parser) {
+JVMState* PredicatedIntrinsicGenerator::generate(JVMState* jvms, Parse* parent_parser) {
+  // The code we want to generate here is:
+  //    if (receiver == NULL)
+  //        uncommon_Trap
+  //    if (predicate(0))
+  //        do_intrinsic(0)
+  //    else
+  //    if (predicate(1))
+  //        do_intrinsic(1)
+  //    ...
+  //    else
+  //        do_java_comp
+
   GraphKit kit(jvms);
   PhaseGVN& gvn = kit.gvn();
 
   CompileLog* log = kit.C->log();
   if (log != NULL) {
-    log->elem("predicted_intrinsic bci='%d' method='%d'",
+    log->elem("predicated_intrinsic bci='%d' method='%d'",
               jvms->bci(), log->identify(method()));
   }
 
-  Node* slow_ctl = _intrinsic->generate_predicate(kit.sync_jvms());
-  if (kit.failing())
-    return NULL;  // might happen because of NodeCountInliningCutoff
-
-  SafePointNode* slow_map = NULL;
-  JVMState* slow_jvms;
-  if (slow_ctl != NULL) {
-    PreserveJVMState pjvms(&kit);
-    kit.set_control(slow_ctl);
-    if (!kit.stopped()) {
-      slow_jvms = _cg->generate(kit.sync_jvms(), parent_parser);
-      if (kit.failing())
-        return NULL;  // might happen because of NodeCountInliningCutoff
-      assert(slow_jvms != NULL, "must be");
-      kit.add_exception_states_from(slow_jvms);
-      kit.set_map(slow_jvms->map());
-      if (!kit.stopped())
-        slow_map = kit.stop();
+  if (!method()->is_static()) {
+    // We need an explicit receiver null_check before checking its type in predicate.
+    // We share a map with the caller, so his JVMS gets adjusted.
+    Node* receiver = kit.null_check_receiver_before_call(method());
+    if (kit.stopped()) {
+      return kit.transfer_exceptions_into_jvms();
     }
   }
 
-  if (kit.stopped()) {
-    // Predicate is always false.
-    kit.set_jvms(slow_jvms);
+  int n_predicates = _intrinsic->predicates_count();
+  assert(n_predicates > 0, "sanity");
+
+  JVMState** result_jvms = NEW_RESOURCE_ARRAY(JVMState*, (n_predicates+1));
+
+  // Region for normal compilation code if intrinsic failed.
+  Node* slow_region = new (kit.C) RegionNode(1);
+
+  int results = 0;
+  for (int predicate = 0; (predicate < n_predicates) && !kit.stopped(); predicate++) {
+#ifdef ASSERT
+    JVMState* old_jvms = kit.jvms();
+    SafePointNode* old_map = kit.map();
+    Node* old_io  = old_map->i_o();
+    Node* old_mem = old_map->memory();
+    Node* old_exc = old_map->next_exception();
+#endif
+    Node* else_ctrl = _intrinsic->generate_predicate(kit.sync_jvms(), predicate);
+#ifdef ASSERT
+    // Assert(no_new_memory && no_new_io && no_new_exceptions) after generate_predicate.
+    assert(old_jvms == kit.jvms(), "generate_predicate should not change jvm state");
+    SafePointNode* new_map = kit.map();
+    assert(old_io  == new_map->i_o(), "generate_predicate should not change i_o");
+    assert(old_mem == new_map->memory(), "generate_predicate should not change memory");
+    assert(old_exc == new_map->next_exception(), "generate_predicate should not add exceptions");
+#endif
+    if (!kit.stopped()) {
+      PreserveJVMState pjvms(&kit);
+      // Generate intrinsic code:
+      JVMState* new_jvms = _intrinsic->generate(kit.sync_jvms(), parent_parser);
+      if (new_jvms == NULL) {
+        // Intrinsic failed, use normal compilation path for this predicate.
+        slow_region->add_req(kit.control());
+      } else {
+        kit.add_exception_states_from(new_jvms);
+        kit.set_jvms(new_jvms);
+        if (!kit.stopped()) {
+          result_jvms[results++] = kit.jvms();
+        }
+      }
+    }
+    if (else_ctrl == NULL) {
+      else_ctrl = kit.C->top();
+    }
+    kit.set_control(else_ctrl);
+  }
+  if (!kit.stopped()) {
+    // Final 'else' after predicates.
+    slow_region->add_req(kit.control());
+  }
+  if (slow_region->req() > 1) {
+    PreserveJVMState pjvms(&kit);
+    // Generate normal compilation code:
+    kit.set_control(gvn.transform(slow_region));
+    JVMState* new_jvms = _cg->generate(kit.sync_jvms(), parent_parser);
+    if (kit.failing())
+      return NULL;  // might happen because of NodeCountInliningCutoff
+    assert(new_jvms != NULL, "must be");
+    kit.add_exception_states_from(new_jvms);
+    kit.set_jvms(new_jvms);
+    if (!kit.stopped()) {
+      result_jvms[results++] = kit.jvms();
+    }
+  }
+
+  if (results == 0) {
+    // All paths ended in uncommon traps.
+    (void) kit.stop();
     return kit.transfer_exceptions_into_jvms();
   }
 
-  // Generate intrinsic code:
-  JVMState* new_jvms = _intrinsic->generate(kit.sync_jvms(), parent_parser);
-  if (new_jvms == NULL) {
-    // Intrinsic failed, so use slow code or make a direct call.
-    if (slow_map == NULL) {
-      CallGenerator* cg = CallGenerator::for_direct_call(method());
-      new_jvms = cg->generate(kit.sync_jvms(), parent_parser);
-    } else {
-      kit.set_jvms(slow_jvms);
-      return kit.transfer_exceptions_into_jvms();
-    }
-  }
-  kit.add_exception_states_from(new_jvms);
-  kit.set_jvms(new_jvms);
-
-  // Need to merge slow and fast?
-  if (slow_map == NULL) {
-    // The fast path is the only path remaining.
+  if (results == 1) { // Only one path
+    kit.set_jvms(result_jvms[0]);
     return kit.transfer_exceptions_into_jvms();
   }
 
-  if (kit.stopped()) {
-    // Intrinsic method threw an exception, so it's just the slow path after all.
-    kit.set_jvms(slow_jvms);
-    return kit.transfer_exceptions_into_jvms();
+  // Merge all paths.
+  kit.C->set_has_split_ifs(true); // Has chance for split-if optimization
+  RegionNode* region = new (kit.C) RegionNode(results + 1);
+  Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO);
+  for (int i = 0; i < results; i++) {
+    JVMState* jvms = result_jvms[i];
+    int path = i + 1;
+    SafePointNode* map = jvms->map();
+    region->init_req(path, map->control());
+    iophi->set_req(path, map->i_o());
+    if (i == 0) {
+      kit.set_jvms(jvms);
+    } else {
+      kit.merge_memory(map->merged_memory(), region, path);
+    }
+  }
+  kit.set_control(gvn.transform(region));
+  kit.set_i_o(gvn.transform(iophi));
+  // Transform new memory Phis.
+  for (MergeMemStream mms(kit.merged_memory()); mms.next_non_empty();) {
+    Node* phi = mms.memory();
+    if (phi->is_Phi() && phi->in(0) == region) {
+      mms.set_memory(gvn.transform(phi));
+    }
   }
 
-  // Finish the diamond.
-  kit.C->set_has_split_ifs(true); // Has chance for split-if optimization
-  RegionNode* region = new (kit.C) RegionNode(3);
-  region->init_req(1, kit.control());
-  region->init_req(2, slow_map->control());
-  kit.set_control(gvn.transform(region));
-  Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO);
-  iophi->set_req(2, slow_map->i_o());
-  kit.set_i_o(gvn.transform(iophi));
-  kit.merge_memory(slow_map->merged_memory(), region, 2);
+  // Merge debug info.
+  Node** ins = NEW_RESOURCE_ARRAY(Node*, results);
   uint tos = kit.jvms()->stkoff() + kit.sp();
-  uint limit = slow_map->req();
+  Node* map = kit.map();
+  uint limit = map->req();
   for (uint i = TypeFunc::Parms; i < limit; i++) {
     // Skip unused stack slots; fast forward to monoff();
     if (i == tos) {
       i = kit.jvms()->monoff();
       if( i >= limit ) break;
     }
-    Node* m = kit.map()->in(i);
-    Node* n = slow_map->in(i);
-    if (m != n) {
-      const Type* t = gvn.type(m)->meet_speculative(gvn.type(n));
-      Node* phi = PhiNode::make(region, m, t);
-      phi->set_req(2, n);
-      kit.map()->set_req(i, gvn.transform(phi));
+    Node* n = map->in(i);
+    ins[0] = n;
+    const Type* t = gvn.type(n);
+    bool needs_phi = false;
+    for (int j = 1; j < results; j++) {
+      JVMState* jvms = result_jvms[j];
+      Node* jmap = jvms->map();
+      Node* m = NULL;
+      if (jmap->req() > i) {
+        m = jmap->in(i);
+        if (m != n) {
+          needs_phi = true;
+          t = t->meet_speculative(gvn.type(m));
+        }
+      }
+      ins[j] = m;
+    }
+    if (needs_phi) {
+      Node* phi = PhiNode::make(region, n, t);
+      for (int j = 1; j < results; j++) {
+        phi->set_req(j + 1, ins[j]);
+      }
+      map->set_req(i, gvn.transform(phi));
     }
   }
+
   return kit.transfer_exceptions_into_jvms();
 }
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/opto/callGenerator.hpp
--- a/src/share/vm/opto/callGenerator.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/opto/callGenerator.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -63,8 +63,9 @@
   virtual bool      is_virtual() const          { return false; }
   // is_deferred: The decision whether to inline or not is deferred.
   virtual bool      is_deferred() const         { return false; }
-  // is_predicted: Uses an explicit check against a predicted type.
-  virtual bool      is_predicted() const        { return false; }
+  // is_predicated: Uses an explicit check (predicate).
+  virtual bool      is_predicated() const       { return false; }
+  virtual int       predicates_count() const    { return 0; }
   // is_trap: Does not return to the caller.  (E.g., uncommon trap.)
   virtual bool      is_trap() const             { return false; }
   // does_virtual_dispatch: Should try inlining as normal method first.
@@ -157,9 +158,9 @@
   // Registry for intrinsics:
   static CallGenerator* for_intrinsic(ciMethod* m);
   static void register_intrinsic(ciMethod* m, CallGenerator* cg);
-  static CallGenerator* for_predicted_intrinsic(CallGenerator* intrinsic,
-                                                CallGenerator* cg);
-  virtual Node* generate_predicate(JVMState* jvms) { return NULL; };
+  static CallGenerator* for_predicated_intrinsic(CallGenerator* intrinsic,
+                                                 CallGenerator* cg);
+  virtual Node* generate_predicate(JVMState* jvms, int predicate) { return NULL; };
 
   virtual void print_inlining_late(const char* msg) { ShouldNotReachHere(); }
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/opto/doCall.cpp
--- a/src/share/vm/opto/doCall.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/opto/doCall.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -115,12 +115,12 @@
   if (allow_inline && allow_intrinsics) {
     CallGenerator* cg = find_intrinsic(callee, call_does_dispatch);
     if (cg != NULL) {
-      if (cg->is_predicted()) {
+      if (cg->is_predicated()) {
         // Code without intrinsic but, hopefully, inlined.
         CallGenerator* inline_cg = this->call_generator(callee,
               vtable_index, call_does_dispatch, jvms, allow_inline, prof_factor, speculative_receiver_type, false);
         if (inline_cg != NULL) {
-          cg = CallGenerator::for_predicted_intrinsic(cg, inline_cg);
+          cg = CallGenerator::for_predicated_intrinsic(cg, inline_cg);
         }
       }
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/opto/escape.cpp
--- a/src/share/vm/opto/escape.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/opto/escape.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -938,7 +938,13 @@
                   strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
-                  strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0)
+                  strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "sha1_implCompress") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "sha1_implCompressMB") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "sha256_implCompress") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "sha256_implCompressMB") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "sha512_implCompress") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "sha512_implCompressMB") == 0)
                   ))) {
             call->dump();
             fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/opto/graphKit.cpp
--- a/src/share/vm/opto/graphKit.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/opto/graphKit.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -2435,23 +2435,24 @@
     Node* new_slice = mms.memory2();
     if (old_slice != new_slice) {
       PhiNode* phi;
-      if (new_slice->is_Phi() && new_slice->as_Phi()->region() == region) {
-        phi = new_slice->as_Phi();
-        #ifdef ASSERT
-        if (old_slice->is_Phi() && old_slice->as_Phi()->region() == region)
-          old_slice = old_slice->in(new_path);
-        // Caller is responsible for ensuring that any pre-existing
-        // phis are already aware of old memory.
-        int old_path = (new_path > 1) ? 1 : 2;  // choose old_path != new_path
-        assert(phi->in(old_path) == old_slice, "pre-existing phis OK");
-        #endif
-        mms.set_memory(phi);
+      if (old_slice->is_Phi() && old_slice->as_Phi()->region() == region) {
+        if (mms.is_empty()) {
+          // clone base memory Phi's inputs for this memory slice
+          assert(old_slice == mms.base_memory(), "sanity");
+          phi = PhiNode::make(region, NULL, Type::MEMORY, mms.adr_type(C));
+          _gvn.set_type(phi, Type::MEMORY);
+          for (uint i = 1; i < phi->req(); i++) {
+            phi->init_req(i, old_slice->in(i));
+          }
+        } else {
+          phi = old_slice->as_Phi(); // Phi was generated already
+        }
       } else {
         phi = PhiNode::make(region, old_slice, Type::MEMORY, mms.adr_type(C));
         _gvn.set_type(phi, Type::MEMORY);
-        phi->set_req(new_path, new_slice);
-        mms.set_memory(_gvn.transform(phi));  // assume it is complete
       }
+      phi->set_req(new_path, new_slice);
+      mms.set_memory(phi);
     }
   }
 }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/opto/lcm.cpp
--- a/src/share/vm/opto/lcm.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/opto/lcm.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -484,7 +484,9 @@
         iop == Op_CreateEx ||   // Create-exception must start block
         iop == Op_CheckCastPP
         ) {
-      worklist.map(i,worklist.pop());
+      // select the node n
+      // remove n from worklist and retain the order of remaining nodes
+      worklist.remove((uint)i);
       return n;
     }
 
@@ -570,7 +572,9 @@
   assert(idx >= 0, "index should be set");
   Node *n = worklist[(uint)idx];      // Get the winner
 
-  worklist.map((uint)idx, worklist.pop());     // Compress worklist
+  // select the node n
+  // remove n from worklist and retain the order of remaining nodes
+  worklist.remove((uint)idx);
   return n;
 }
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/opto/library_call.cpp
--- a/src/share/vm/opto/library_call.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/opto/library_call.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -46,25 +46,28 @@
  public:
  private:
   bool             _is_virtual;
-  bool             _is_predicted;
   bool             _does_virtual_dispatch;
+  int8_t           _predicates_count;  // Intrinsic is predicated by several conditions
+  int8_t           _last_predicate; // Last generated predicate
   vmIntrinsics::ID _intrinsic_id;
 
  public:
-  LibraryIntrinsic(ciMethod* m, bool is_virtual, bool is_predicted, bool does_virtual_dispatch, vmIntrinsics::ID id)
+  LibraryIntrinsic(ciMethod* m, bool is_virtual, int predicates_count, bool does_virtual_dispatch, vmIntrinsics::ID id)
     : InlineCallGenerator(m),
       _is_virtual(is_virtual),
-      _is_predicted(is_predicted),
       _does_virtual_dispatch(does_virtual_dispatch),
+      _predicates_count((int8_t)predicates_count),
+      _last_predicate((int8_t)-1),
       _intrinsic_id(id)
   {
   }
   virtual bool is_intrinsic() const { return true; }
   virtual bool is_virtual()   const { return _is_virtual; }
-  virtual bool is_predicted()   const { return _is_predicted; }
+  virtual bool is_predicated() const { return _predicates_count > 0; }
+  virtual int  predicates_count() const { return _predicates_count; }
   virtual bool does_virtual_dispatch()   const { return _does_virtual_dispatch; }
   virtual JVMState* generate(JVMState* jvms, Parse* parent_parser);
-  virtual Node* generate_predicate(JVMState* jvms);
+  virtual Node* generate_predicate(JVMState* jvms, int predicate);
   vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; }
 };
 
@@ -107,8 +110,8 @@
   vmIntrinsics::ID  intrinsic_id() const { return _intrinsic->intrinsic_id(); }
   ciMethod*         callee()    const    { return _intrinsic->method(); }
 
-  bool try_to_inline();
-  Node* try_to_predicate();
+  bool  try_to_inline(int predicate);
+  Node* try_to_predicate(int predicate);
 
   void push_result() {
     // Push the result onto the stack.
@@ -307,6 +310,14 @@
   Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
   Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
   Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
+  bool inline_sha_implCompress(vmIntrinsics::ID id);
+  bool inline_digestBase_implCompressMB(int predicate);
+  bool inline_sha_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass_SHA,
+                                 bool long_state, address stubAddr, const char *stubName,
+                                 Node* src_start, Node* ofs, Node* limit);
+  Node* get_state_from_sha_object(Node *sha_object);
+  Node* get_state_from_sha5_object(Node *sha_object);
+  Node* inline_digestBase_implCompressMB_predicate(int predicate);
   bool inline_encodeISOArray();
   bool inline_updateCRC32();
   bool inline_updateBytesCRC32();
@@ -367,7 +378,7 @@
     }
   }
 
-  bool is_predicted = false;
+  int predicates = 0;
   bool does_virtual_dispatch = false;
 
   switch (id) {
@@ -508,7 +519,24 @@
   case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
     if (!UseAESIntrinsics) return NULL;
     // these two require the predicated logic
-    is_predicted = true;
+    predicates = 1;
+    break;
+
+  case vmIntrinsics::_sha_implCompress:
+    if (!UseSHA1Intrinsics) return NULL;
+    break;
+
+  case vmIntrinsics::_sha2_implCompress:
+    if (!UseSHA256Intrinsics) return NULL;
+    break;
+
+  case vmIntrinsics::_sha5_implCompress:
+    if (!UseSHA512Intrinsics) return NULL;
+    break;
+
+  case vmIntrinsics::_digestBase_implCompressMB:
+    if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) return NULL;
+    predicates = 3;
     break;
 
   case vmIntrinsics::_updateCRC32:
@@ -577,7 +605,7 @@
     if (!InlineUnsafeOps)  return NULL;
   }
 
-  return new LibraryIntrinsic(m, is_virtual, is_predicted, does_virtual_dispatch, (vmIntrinsics::ID) id);
+  return new LibraryIntrinsic(m, is_virtual, predicates, does_virtual_dispatch, (vmIntrinsics::ID) id);
 }
 
 //----------------------register_library_intrinsics-----------------------
@@ -601,7 +629,7 @@
   const int bci    = kit.bci();
 
   // Try to inline the intrinsic.
-  if (kit.try_to_inline()) {
+  if (kit.try_to_inline(_last_predicate)) {
     if (C->print_intrinsics() || C->print_inlining()) {
       C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
     }
@@ -634,12 +662,13 @@
   return NULL;
 }
 
-Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) {
+Node* LibraryIntrinsic::generate_predicate(JVMState* jvms, int predicate) {
   LibraryCallKit kit(jvms, this);
   Compile* C = kit.C;
   int nodes = C->unique();
+  _last_predicate = predicate;
 #ifndef PRODUCT
-  assert(is_predicted(), "sanity");
+  assert(is_predicated() && predicate < predicates_count(), "sanity");
   if ((C->print_intrinsics() || C->print_inlining()) && Verbose) {
     char buf[1000];
     const char* str = vmIntrinsics::short_name_as_C_string(intrinsic_id(), buf, sizeof(buf));
@@ -649,10 +678,10 @@
   ciMethod* callee = kit.callee();
   const int bci    = kit.bci();
 
-  Node* slow_ctl = kit.try_to_predicate();
+  Node* slow_ctl = kit.try_to_predicate(predicate);
   if (!kit.failing()) {
     if (C->print_intrinsics() || C->print_inlining()) {
-      C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
+      C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual, predicate)" : "(intrinsic, predicate)");
     }
     C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked);
     if (C->log()) {
@@ -681,7 +710,7 @@
   return NULL;
 }
 
-bool LibraryCallKit::try_to_inline() {
+bool LibraryCallKit::try_to_inline(int predicate) {
   // Handle symbolic names for otherwise undistinguished boolean switches:
   const bool is_store       = true;
   const bool is_native_ptr  = true;
@@ -875,6 +904,14 @@
   case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
     return inline_cipherBlockChaining_AESCrypt(intrinsic_id());
 
+  case vmIntrinsics::_sha_implCompress:
+  case vmIntrinsics::_sha2_implCompress:
+  case vmIntrinsics::_sha5_implCompress:
+    return inline_sha_implCompress(intrinsic_id());
+
+  case vmIntrinsics::_digestBase_implCompressMB:
+    return inline_digestBase_implCompressMB(predicate);
+
   case vmIntrinsics::_encodeISOArray:
     return inline_encodeISOArray();
 
@@ -898,7 +935,7 @@
   }
 }
 
-Node* LibraryCallKit::try_to_predicate() {
+Node* LibraryCallKit::try_to_predicate(int predicate) {
   if (!jvms()->has_method()) {
     // Root JVMState has a null method.
     assert(map()->memory()->Opcode() == Op_Parm, "");
@@ -912,6 +949,8 @@
     return inline_cipherBlockChaining_AESCrypt_predicate(false);
   case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
     return inline_cipherBlockChaining_AESCrypt_predicate(true);
+  case vmIntrinsics::_digestBase_implCompressMB:
+    return inline_digestBase_implCompressMB_predicate(predicate);
 
   default:
     // If you get here, it may be that someone has added a new intrinsic
@@ -5866,7 +5905,12 @@
   BasicType bt = field->layout_type();
 
   // Build the resultant type of the load
-  const Type *type = TypeOopPtr::make_from_klass(field_klass->as_klass());
+  const Type *type;
+  if (bt == T_OBJECT) {
+    type = TypeOopPtr::make_from_klass(field_klass->as_klass());
+  } else {
+    type = Type::get_const_basic_type(bt);
+  }
 
   // Build the load.
   Node* loadedField = make_load(NULL, adr, type, bt, adr_type, MemNode::unordered, is_vol);
@@ -5996,7 +6040,7 @@
   assert(tinst != NULL, "CBC obj is null");
   assert(tinst->klass()->is_loaded(), "CBC obj is not loaded");
   ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
-  if (!klass_AESCrypt->is_loaded()) return false;
+  assert(klass_AESCrypt->is_loaded(), "predicate checks that this class is loaded");
 
   ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
   const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_AESCrypt);
@@ -6071,11 +6115,8 @@
 //    note cipher==plain is more conservative than the original java code but that's OK
 //
 Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting) {
-  // First, check receiver for NULL since it is virtual method.
+  // The receiver was checked for NULL already.
   Node* objCBC = argument(0);
-  objCBC = null_check(objCBC);
-
-  if (stopped()) return NULL; // Always NULL
 
   // Load embeddedCipher field of CipherBlockChaining object.
   Node* embeddedCipherObj = load_field_from_object(objCBC, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false);
@@ -6122,3 +6163,258 @@
   record_for_igvn(region);
   return _gvn.transform(region);
 }
+
+//------------------------------inline_sha_implCompress-----------------------
+//
+// Calculate SHA (i.e., SHA-1) for single-block byte[] array.
+// void com.sun.security.provider.SHA.implCompress(byte[] buf, int ofs)
+//
+// Calculate SHA2 (i.e., SHA-244 or SHA-256) for single-block byte[] array.
+// void com.sun.security.provider.SHA2.implCompress(byte[] buf, int ofs)
+//
+// Calculate SHA5 (i.e., SHA-384 or SHA-512) for single-block byte[] array.
+// void com.sun.security.provider.SHA5.implCompress(byte[] buf, int ofs)
+//
+bool LibraryCallKit::inline_sha_implCompress(vmIntrinsics::ID id) {
+  assert(callee()->signature()->size() == 2, "sha_implCompress has 2 parameters");
+
+  Node* sha_obj = argument(0);
+  Node* src     = argument(1); // type oop
+  Node* ofs     = argument(2); // type int
+
+  const Type* src_type = src->Value(&_gvn);
+  const TypeAryPtr* top_src = src_type->isa_aryptr();
+  if (top_src  == NULL || top_src->klass()  == NULL) {
+    // failed array check
+    return false;
+  }
+  // Figure out the size and type of the elements we will be copying.
+  BasicType src_elem = src_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  if (src_elem != T_BYTE) {
+    return false;
+  }
+  // 'src_start' points to src array + offset
+  Node* src_start = array_element_address(src, ofs, src_elem);
+  Node* state = NULL;
+  address stubAddr;
+  const char *stubName;
+
+  switch(id) {
+  case vmIntrinsics::_sha_implCompress:
+    assert(UseSHA1Intrinsics, "need SHA1 instruction support");
+    state = get_state_from_sha_object(sha_obj);
+    stubAddr = StubRoutines::sha1_implCompress();
+    stubName = "sha1_implCompress";
+    break;
+  case vmIntrinsics::_sha2_implCompress:
+    assert(UseSHA256Intrinsics, "need SHA256 instruction support");
+    state = get_state_from_sha_object(sha_obj);
+    stubAddr = StubRoutines::sha256_implCompress();
+    stubName = "sha256_implCompress";
+    break;
+  case vmIntrinsics::_sha5_implCompress:
+    assert(UseSHA512Intrinsics, "need SHA512 instruction support");
+    state = get_state_from_sha5_object(sha_obj);
+    stubAddr = StubRoutines::sha512_implCompress();
+    stubName = "sha512_implCompress";
+    break;
+  default:
+    fatal_unexpected_iid(id);
+    return false;
+  }
+  if (state == NULL) return false;
+
+  // Call the stub.
+  Node* call = make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::sha_implCompress_Type(),
+                                 stubAddr, stubName, TypePtr::BOTTOM,
+                                 src_start, state);
+
+  return true;
+}
+
+//------------------------------inline_digestBase_implCompressMB-----------------------
+//
+// Calculate SHA/SHA2/SHA5 for multi-block byte[] array.
+// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
+//
+bool LibraryCallKit::inline_digestBase_implCompressMB(int predicate) {
+  assert(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics,
+         "need SHA1/SHA256/SHA512 instruction support");
+  assert((uint)predicate < 3, "sanity");
+  assert(callee()->signature()->size() == 3, "digestBase_implCompressMB has 3 parameters");
+
+  Node* digestBase_obj = argument(0); // The receiver was checked for NULL already.
+  Node* src            = argument(1); // byte[] array
+  Node* ofs            = argument(2); // type int
+  Node* limit          = argument(3); // type int
+
+  const Type* src_type = src->Value(&_gvn);
+  const TypeAryPtr* top_src = src_type->isa_aryptr();
+  if (top_src  == NULL || top_src->klass()  == NULL) {
+    // failed array check
+    return false;
+  }
+  // Figure out the size and type of the elements we will be copying.
+  BasicType src_elem = src_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  if (src_elem != T_BYTE) {
+    return false;
+  }
+  // 'src_start' points to src array + offset
+  Node* src_start = array_element_address(src, ofs, src_elem);
+
+  const char* klass_SHA_name = NULL;
+  const char* stub_name = NULL;
+  address     stub_addr = NULL;
+  bool        long_state = false;
+
+  switch (predicate) {
+  case 0:
+    if (UseSHA1Intrinsics) {
+      klass_SHA_name = "sun/security/provider/SHA";
+      stub_name = "sha1_implCompressMB";
+      stub_addr = StubRoutines::sha1_implCompressMB();
+    }
+    break;
+  case 1:
+    if (UseSHA256Intrinsics) {
+      klass_SHA_name = "sun/security/provider/SHA2";
+      stub_name = "sha256_implCompressMB";
+      stub_addr = StubRoutines::sha256_implCompressMB();
+    }
+    break;
+  case 2:
+    if (UseSHA512Intrinsics) {
+      klass_SHA_name = "sun/security/provider/SHA5";
+      stub_name = "sha512_implCompressMB";
+      stub_addr = StubRoutines::sha512_implCompressMB();
+      long_state = true;
+    }
+    break;
+  default:
+    fatal(err_msg_res("unknown SHA intrinsic predicate: %d", predicate));
+  }
+  if (klass_SHA_name != NULL) {
+    // get DigestBase klass to lookup for SHA klass
+    const TypeInstPtr* tinst = _gvn.type(digestBase_obj)->isa_instptr();
+    assert(tinst != NULL, "digestBase_obj is not instance???");
+    assert(tinst->klass()->is_loaded(), "DigestBase is not loaded");
+
+    ciKlass* klass_SHA = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make(klass_SHA_name));
+    assert(klass_SHA->is_loaded(), "predicate checks that this class is loaded");
+    ciInstanceKlass* instklass_SHA = klass_SHA->as_instance_klass();
+    return inline_sha_implCompressMB(digestBase_obj, instklass_SHA, long_state, stub_addr, stub_name, src_start, ofs, limit);
+  }
+  return false;
+}
+//------------------------------inline_sha_implCompressMB-----------------------
+bool LibraryCallKit::inline_sha_implCompressMB(Node* digestBase_obj, ciInstanceKlass* instklass_SHA,
+                                               bool long_state, address stubAddr, const char *stubName,
+                                               Node* src_start, Node* ofs, Node* limit) {
+  const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_SHA);
+  const TypeOopPtr* xtype = aklass->as_instance_type();
+  Node* sha_obj = new (C) CheckCastPPNode(control(), digestBase_obj, xtype);
+  sha_obj = _gvn.transform(sha_obj);
+
+  Node* state;
+  if (long_state) {
+    state = get_state_from_sha5_object(sha_obj);
+  } else {
+    state = get_state_from_sha_object(sha_obj);
+  }
+  if (state == NULL) return false;
+
+  // Call the stub.
+  Node* call = make_runtime_call(RC_LEAF|RC_NO_FP,
+                                 OptoRuntime::digestBase_implCompressMB_Type(),
+                                 stubAddr, stubName, TypePtr::BOTTOM,
+                                 src_start, state, ofs, limit);
+  // return ofs (int)
+  Node* result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms));
+  set_result(result);
+
+  return true;
+}
+
+//------------------------------get_state_from_sha_object-----------------------
+Node * LibraryCallKit::get_state_from_sha_object(Node *sha_object) {
+  Node* sha_state = load_field_from_object(sha_object, "state", "[I", /*is_exact*/ false);
+  assert (sha_state != NULL, "wrong version of sun.security.provider.SHA/SHA2");
+  if (sha_state == NULL) return (Node *) NULL;
+
+  // now have the array, need to get the start address of the state array
+  Node* state = array_element_address(sha_state, intcon(0), T_INT);
+  return state;
+}
+
+//------------------------------get_state_from_sha5_object-----------------------
+Node * LibraryCallKit::get_state_from_sha5_object(Node *sha_object) {
+  Node* sha_state = load_field_from_object(sha_object, "state", "[J", /*is_exact*/ false);
+  assert (sha_state != NULL, "wrong version of sun.security.provider.SHA5");
+  if (sha_state == NULL) return (Node *) NULL;
+
+  // now have the array, need to get the start address of the state array
+  Node* state = array_element_address(sha_state, intcon(0), T_LONG);
+  return state;
+}
+
+//----------------------------inline_digestBase_implCompressMB_predicate----------------------------
+// Return node representing slow path of predicate check.
+// the pseudo code we want to emulate with this predicate is:
+//    if (digestBaseObj instanceof SHA/SHA2/SHA5) do_intrinsic, else do_javapath
+//
+Node* LibraryCallKit::inline_digestBase_implCompressMB_predicate(int predicate) {
+  assert(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics,
+         "need SHA1/SHA256/SHA512 instruction support");
+  assert((uint)predicate < 3, "sanity");
+
+  // The receiver was checked for NULL already.
+  Node* digestBaseObj = argument(0);
+
+  // get DigestBase klass for instanceOf check
+  const TypeInstPtr* tinst = _gvn.type(digestBaseObj)->isa_instptr();
+  assert(tinst != NULL, "digestBaseObj is null");
+  assert(tinst->klass()->is_loaded(), "DigestBase is not loaded");
+
+  const char* klass_SHA_name = NULL;
+  switch (predicate) {
+  case 0:
+    if (UseSHA1Intrinsics) {
+      // we want to do an instanceof comparison against the SHA class
+      klass_SHA_name = "sun/security/provider/SHA";
+    }
+    break;
+  case 1:
+    if (UseSHA256Intrinsics) {
+      // we want to do an instanceof comparison against the SHA2 class
+      klass_SHA_name = "sun/security/provider/SHA2";
+    }
+    break;
+  case 2:
+    if (UseSHA512Intrinsics) {
+      // we want to do an instanceof comparison against the SHA5 class
+      klass_SHA_name = "sun/security/provider/SHA5";
+    }
+    break;
+  default:
+    fatal(err_msg_res("unknown SHA intrinsic predicate: %d", predicate));
+  }
+
+  ciKlass* klass_SHA = NULL;
+  if (klass_SHA_name != NULL) {
+    klass_SHA = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make(klass_SHA_name));
+  }
+  if ((klass_SHA == NULL) || !klass_SHA->is_loaded()) {
+    // if none of SHA/SHA2/SHA5 is loaded, we never take the intrinsic fast path
+    Node* ctrl = control();
+    set_control(top()); // no intrinsic path
+    return ctrl;
+  }
+  ciInstanceKlass* instklass_SHA = klass_SHA->as_instance_klass();
+
+  Node* instofSHA = gen_instanceof(digestBaseObj, makecon(TypeKlassPtr::make(instklass_SHA)));
+  Node* cmp_instof = _gvn.transform(new (C) CmpINode(instofSHA, intcon(1)));
+  Node* bool_instof = _gvn.transform(new (C) BoolNode(cmp_instof, BoolTest::ne));
+  Node* instof_false = generate_guard(bool_instof, NULL, PROB_MIN);
+
+  return instof_false;  // even if it is NULL
+}
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/opto/multnode.cpp
--- a/src/share/vm/opto/multnode.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/opto/multnode.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -194,7 +194,9 @@
     }
   }
 
-  ProjNode* other_proj = iff->proj_out(1-_con)->as_Proj();
+  ProjNode* other_proj = iff->proj_out(1-_con);
+  if (other_proj == NULL) // Should never happen, but make Parfait happy.
+      return false;
   if (other_proj->is_uncommon_trap_proj(reason)) {
     assert(reason == Deoptimization::Reason_none ||
            Compile::current()->is_predicate_opaq(iff->in(1)->in(1)), "should be on the list");
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/opto/output.cpp
--- a/src/share/vm/opto/output.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/opto/output.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -783,9 +783,10 @@
     // grow downwards in all implementations.
     // (If, on some machine, the interpreter's Java locals or stack
     // were to grow upwards, the embedded doubles would be word-swapped.)
-    jint   *dp = (jint*)&d;
-    array->append(new ConstantIntValue(dp[1]));
-    array->append(new ConstantIntValue(dp[0]));
+    jlong_accessor acc;
+    acc.long_value = jlong_cast(d);
+    array->append(new ConstantIntValue(acc.words[1]));
+    array->append(new ConstantIntValue(acc.words[0]));
 #endif
     break;
   }
@@ -802,9 +803,10 @@
     // grow downwards in all implementations.
     // (If, on some machine, the interpreter's Java locals or stack
     // were to grow upwards, the embedded doubles would be word-swapped.)
-    jint *dp = (jint*)&d;
-    array->append(new ConstantIntValue(dp[1]));
-    array->append(new ConstantIntValue(dp[0]));
+    jlong_accessor acc;
+    acc.long_value = d;
+    array->append(new ConstantIntValue(acc.words[1]));
+    array->append(new ConstantIntValue(acc.words[0]));
 #endif
     break;
   }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/opto/parse1.cpp
--- a/src/share/vm/opto/parse1.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/opto/parse1.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -565,12 +565,13 @@
     set_map(entry_map);
     do_method_entry();
   }
-  if (depth() == 1) {
+
+  if (depth() == 1 && !failing()) {
     // Add check to deoptimize the nmethod if RTM state was changed
     rtm_deopt();
   }
 
-  // Check for bailouts during method entry.
+  // Check for bailouts during method entry or RTM state check setup.
   if (failing()) {
     if (log)  log->done("parse");
     C->set_default_node_notes(caller_nn);
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/opto/runtime.cpp
--- a/src/share/vm/opto/runtime.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/opto/runtime.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -898,6 +898,50 @@
   return TypeFunc::make(domain, range);
 }
 
+/*
+ * void implCompress(byte[] buf, int ofs)
+ */
+const TypeFunc* OptoRuntime::sha_implCompress_Type() {
+  // create input type (domain)
+  int num_args = 2;
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL; // buf
+  fields[argp++] = TypePtr::NOTNULL; // state
+  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // no result type needed
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = NULL; // void
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+  return TypeFunc::make(domain, range);
+}
+
+/*
+ * int implCompressMultiBlock(byte[] b, int ofs, int limit)
+ */
+const TypeFunc* OptoRuntime::digestBase_implCompressMB_Type() {
+  // create input type (domain)
+  int num_args = 4;
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL; // buf
+  fields[argp++] = TypePtr::NOTNULL; // state
+  fields[argp++] = TypeInt::INT;     // ofs
+  fields[argp++] = TypeInt::INT;     // limit
+  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // returning ofs (int)
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypeInt::INT; // ofs
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
+  return TypeFunc::make(domain, range);
+}
+
 //------------- Interpreter state access for on stack replacement
 const TypeFunc* OptoRuntime::osr_end_Type() {
   // create input type (domain)
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/opto/runtime.hpp
--- a/src/share/vm/opto/runtime.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/opto/runtime.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -300,6 +300,9 @@
   static const TypeFunc* aescrypt_block_Type();
   static const TypeFunc* cipherBlockChaining_aescrypt_Type();
 
+  static const TypeFunc* sha_implCompress_Type();
+  static const TypeFunc* digestBase_implCompressMB_Type();
+
   static const TypeFunc* updateBytesCRC32_Type();
 
   // leaf on stack replacement interpreter accessor types
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/opto/superword.cpp
--- a/src/share/vm/opto/superword.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/opto/superword.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1374,6 +1374,20 @@
       if (n->is_Load()) {
         Node* ctl = n->in(MemNode::Control);
         Node* mem = first->in(MemNode::Memory);
+        SWPointer p1(n->as_Mem(), this);
+        // Identify the memory dependency for the new loadVector node by
+        // walking up through memory chain.
+        // This is done to give flexibility to the new loadVector node so that
+        // it can move above independent storeVector nodes.
+        while (mem->is_StoreVector()) {
+          SWPointer p2(mem->as_Mem(), this);
+          int cmp = p1.cmp(p2);
+          if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) {
+            mem = mem->in(MemNode::Memory);
+          } else {
+            break; // dependent memory
+          }
+        }
         Node* adr = low_adr->in(MemNode::Address);
         const TypePtr* atyp = n->adr_type();
         vn = LoadVectorNode::make(C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/precompiled/precompiled.hpp
--- a/src/share/vm/precompiled/precompiled.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/precompiled/precompiled.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -193,11 +193,13 @@
 # include "runtime/mutexLocker.hpp"
 # include "runtime/objectMonitor.hpp"
 # include "runtime/orderAccess.hpp"
+# include "runtime/orderAccess.inline.hpp"
 # include "runtime/os.hpp"
 # include "runtime/osThread.hpp"
 # include "runtime/perfData.hpp"
 # include "runtime/perfMemory.hpp"
 # include "runtime/prefetch.hpp"
+# include "runtime/prefetch.inline.hpp"
 # include "runtime/reflection.hpp"
 # include "runtime/reflectionUtils.hpp"
 # include "runtime/registerMap.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/prims/forte.cpp
--- a/src/share/vm/prims/forte.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/prims/forte.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -32,7 +32,7 @@
 #include "oops/oop.inline2.hpp"
 #include "prims/forte.hpp"
 #include "runtime/javaCalls.hpp"
-#include "runtime/thread.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vframe.hpp"
 #include "runtime/vframeArray.hpp"
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/prims/jni.cpp
--- a/src/share/vm/prims/jni.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/prims/jni.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -67,6 +67,7 @@
 #include "runtime/java.hpp"
 #include "runtime/javaCalls.hpp"
 #include "runtime/jfieldIDWorkaround.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/reflection.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/signature.hpp"
@@ -5063,6 +5064,7 @@
 #if INCLUDE_ALL_GCS
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #endif
+#include "memory/guardedMemory.hpp"
 #include "utilities/quickSort.hpp"
 #include "utilities/ostream.hpp"
 #if INCLUDE_VM_STRUCTS
@@ -5081,9 +5083,11 @@
 void TestMetachunk_test();
 void TestVirtualSpaceNode_test();
 void TestNewSize_test();
+void TestKlass_test();
 #if INCLUDE_ALL_GCS
 void TestOldFreeSpaceCalculation_test();
 void TestG1BiasedArray_test();
+void TestBufferingOopClosure_test();
 void TestCodeCacheRemSet_test();
 #endif
 
@@ -5101,9 +5105,11 @@
     run_unit_test(arrayOopDesc::test_max_array_length());
     run_unit_test(CollectedHeap::test_is_in());
     run_unit_test(QuickSort::test_quick_sort());
+    run_unit_test(GuardedMemory::test_guarded_memory());
     run_unit_test(AltHashing::test_alt_hash());
     run_unit_test(test_loggc_filename());
     run_unit_test(TestNewSize_test());
+    run_unit_test(TestKlass_test());
 #if INCLUDE_VM_STRUCTS
     run_unit_test(VMStructs::test());
 #endif
@@ -5111,6 +5117,7 @@
     run_unit_test(TestOldFreeSpaceCalculation_test());
     run_unit_test(TestG1BiasedArray_test());
     run_unit_test(HeapRegionRemSet::test_prt());
+    run_unit_test(TestBufferingOopClosure_test());
     run_unit_test(TestCodeCacheRemSet_test());
 #endif
     tty->print_cr("All internal VM tests passed");
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/prims/jniCheck.cpp
--- a/src/share/vm/prims/jniCheck.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/prims/jniCheck.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
+#include "memory/guardedMemory.hpp"
 #include "oops/instanceKlass.hpp"
 #include "oops/oop.inline.hpp"
 #include "oops/symbol.hpp"
@@ -35,7 +36,7 @@
 #include "runtime/handles.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/jfieldIDWorkaround.hpp"
-#include "runtime/thread.hpp"
+#include "runtime/thread.inline.hpp"
 #ifdef TARGET_ARCH_x86
 # include "jniTypes_x86.hpp"
 #endif
@@ -323,6 +324,74 @@
   }
 }
 
+/*
+ * Copy and wrap array elements for bounds checking.
+ * Remember the original elements (GuardedMemory::get_tag())
+ */
+static void* check_jni_wrap_copy_array(JavaThread* thr, jarray array,
+    void* orig_elements) {
+  void* result;
+  IN_VM(
+    oop a = JNIHandles::resolve_non_null(array);
+    size_t len = arrayOop(a)->length() <<
+        TypeArrayKlass::cast(a->klass())->log2_element_size();
+    result = GuardedMemory::wrap_copy(orig_elements, len, orig_elements);
+  )
+  return result;
+}
+
+static void* check_wrapped_array(JavaThread* thr, const char* fn_name,
+    void* obj, void* carray, size_t* rsz) {
+  if (carray == NULL) {
+    tty->print_cr("%s: elements vector NULL" PTR_FORMAT, fn_name, p2i(obj));
+    NativeReportJNIFatalError(thr, "Elements vector NULL");
+  }
+  GuardedMemory guarded(carray);
+  void* orig_result = guarded.get_tag();
+  if (!guarded.verify_guards()) {
+    tty->print_cr("ReleasePrimitiveArrayCritical: release array failed bounds "
+        "check, incorrect pointer returned ? array: " PTR_FORMAT " carray: "
+        PTR_FORMAT, p2i(obj), p2i(carray));
+    guarded.print_on(tty);
+    NativeReportJNIFatalError(thr, "ReleasePrimitiveArrayCritical: "
+        "failed bounds check");
+  }
+  if (orig_result == NULL) {
+    tty->print_cr("ReleasePrimitiveArrayCritical: unrecognized elements. array: "
+        PTR_FORMAT " carray: " PTR_FORMAT, p2i(obj), p2i(carray));
+    guarded.print_on(tty);
+    NativeReportJNIFatalError(thr, "ReleasePrimitiveArrayCritical: "
+        "unrecognized elements");
+  }
+  if (rsz != NULL) {
+    *rsz = guarded.get_user_size();
+  }
+  return orig_result;
+}
+
+static void* check_wrapped_array_release(JavaThread* thr, const char* fn_name,
+    void* obj, void* carray, jint mode) {
+  size_t sz;
+  void* orig_result = check_wrapped_array(thr, fn_name, obj, carray, &sz);
+  switch (mode) {
+  case 0:
+    memcpy(orig_result, carray, sz);
+    GuardedMemory::free_copy(carray);
+    break;
+  case JNI_COMMIT:
+    memcpy(orig_result, carray, sz);
+    break;
+  case JNI_ABORT:
+    GuardedMemory::free_copy(carray);
+    break;
+  default:
+    tty->print_cr("%s: Unrecognized mode %i releasing array "
+        PTR_FORMAT " elements " PTR_FORMAT, fn_name, mode, p2i(obj), p2i(carray));
+    NativeReportJNIFatalError(thr, "Unrecognized array release mode");
+  }
+  return orig_result;
+}
+
 oop jniCheck::validate_handle(JavaThread* thr, jobject obj) {
   if (JNIHandles::is_frame_handle(thr, obj) ||
       JNIHandles::is_local_handle(thr, obj) ||
@@ -1314,7 +1383,7 @@
 JNI_END
 
 // Arbitrary (but well-known) tag
-const jint STRING_TAG = 0x47114711;
+const void* STRING_TAG = (void*)0x47114711;
 
 JNI_ENTRY_CHECKED(const jchar *,
   checked_jni_GetStringChars(JNIEnv *env,
@@ -1324,21 +1393,22 @@
     IN_VM(
       checkString(thr, str);
     )
-    jchar* newResult = NULL;
+    jchar* new_result = NULL;
     const jchar *result = UNCHECKED()->GetStringChars(env,str,isCopy);
     assert (isCopy == NULL || *isCopy == JNI_TRUE, "GetStringChars didn't return a copy as expected");
     if (result != NULL) {
       size_t len = UNCHECKED()->GetStringLength(env,str) + 1; // + 1 for NULL termination
-      jint* tagLocation = (jint*) AllocateHeap(len * sizeof(jchar) + sizeof(jint), mtInternal);
-      *tagLocation = STRING_TAG;
-      newResult = (jchar*) (tagLocation + 1);
-      memcpy(newResult, result, len * sizeof(jchar));
+      len *= sizeof(jchar);
+      new_result = (jchar*) GuardedMemory::wrap_copy(result, len, STRING_TAG);
+      if (new_result == NULL) {
+        vm_exit_out_of_memory(len, OOM_MALLOC_ERROR, "checked_jni_GetStringChars");
+      }
       // Avoiding call to UNCHECKED()->ReleaseStringChars() since that will fire unexpected dtrace probes
       // Note that the dtrace arguments for the allocated memory will not match up with this solution.
       FreeHeap((char*)result);
     }
     functionExit(env);
-    return newResult;
+    return new_result;
 JNI_END
 
 JNI_ENTRY_CHECKED(void,
@@ -1354,11 +1424,23 @@
        UNCHECKED()->ReleaseStringChars(env,str,chars);
     }
     else {
-       jint* tagLocation = ((jint*) chars) - 1;
-       if (*tagLocation != STRING_TAG) {
-          NativeReportJNIFatalError(thr, "ReleaseStringChars called on something not allocated by GetStringChars");
-       }
-       UNCHECKED()->ReleaseStringChars(env,str,(const jchar*)tagLocation);
+      GuardedMemory guarded((void*)chars);
+      if (!guarded.verify_guards()) {
+        tty->print_cr("ReleaseStringChars: release chars failed bounds check. "
+            "string: " PTR_FORMAT " chars: " PTR_FORMAT, p2i(str), p2i(chars));
+        guarded.print_on(tty);
+        NativeReportJNIFatalError(thr, "ReleaseStringChars: "
+            "release chars failed bounds check.");
+      }
+      if (guarded.get_tag() != STRING_TAG) {
+        tty->print_cr("ReleaseStringChars: called on something not allocated "
+            "by GetStringChars. string: " PTR_FORMAT " chars: " PTR_FORMAT,
+            p2i(str), p2i(chars));
+        NativeReportJNIFatalError(thr, "ReleaseStringChars called on something "
+            "not allocated by GetStringChars");
+      }
+       UNCHECKED()->ReleaseStringChars(env, str,
+           (const jchar*) guarded.release_for_freeing());
     }
     functionExit(env);
 JNI_END
@@ -1385,7 +1467,7 @@
 JNI_END
 
 // Arbitrary (but well-known) tag - different than GetStringChars
-const jint STRING_UTF_TAG = 0x48124812;
+const void* STRING_UTF_TAG = (void*) 0x48124812;
 
 JNI_ENTRY_CHECKED(const char *,
   checked_jni_GetStringUTFChars(JNIEnv *env,
@@ -1395,21 +1477,21 @@
     IN_VM(
       checkString(thr, str);
     )
-    char* newResult = NULL;
+    char* new_result = NULL;
     const char *result = UNCHECKED()->GetStringUTFChars(env,str,isCopy);
     assert (isCopy == NULL || *isCopy == JNI_TRUE, "GetStringUTFChars didn't return a copy as expected");
     if (result != NULL) {
       size_t len = strlen(result) + 1; // + 1 for NULL termination
-      jint* tagLocation = (jint*) AllocateHeap(len + sizeof(jint), mtInternal);
-      *tagLocation = STRING_UTF_TAG;
-      newResult = (char*) (tagLocation + 1);
-      strcpy(newResult, result);
+      new_result = (char*) GuardedMemory::wrap_copy(result, len, STRING_UTF_TAG);
+      if (new_result == NULL) {
+        vm_exit_out_of_memory(len, OOM_MALLOC_ERROR, "checked_jni_GetStringUTFChars");
+      }
       // Avoiding call to UNCHECKED()->ReleaseStringUTFChars() since that will fire unexpected dtrace probes
       // Note that the dtrace arguments for the allocated memory will not match up with this solution.
       FreeHeap((char*)result, mtInternal);
     }
     functionExit(env);
-    return newResult;
+    return new_result;
 JNI_END
 
 JNI_ENTRY_CHECKED(void,
@@ -1425,11 +1507,23 @@
        UNCHECKED()->ReleaseStringUTFChars(env,str,chars);
     }
     else {
-       jint* tagLocation = ((jint*) chars) - 1;
-       if (*tagLocation != STRING_UTF_TAG) {
-          NativeReportJNIFatalError(thr, "ReleaseStringUTFChars called on something not allocated by GetStringUTFChars");
-       }
-       UNCHECKED()->ReleaseStringUTFChars(env,str,(const char*)tagLocation);
+      GuardedMemory guarded((void*)chars);
+      if (!guarded.verify_guards()) {
+        tty->print_cr("ReleaseStringUTFChars: release chars failed bounds check. "
+            "string: " PTR_FORMAT " chars: " PTR_FORMAT, p2i(str), p2i(chars));
+        guarded.print_on(tty);
+        NativeReportJNIFatalError(thr, "ReleaseStringUTFChars: "
+            "release chars failed bounds check.");
+      }
+      if (guarded.get_tag() != STRING_UTF_TAG) {
+        tty->print_cr("ReleaseStringUTFChars: called on something not "
+            "allocated by GetStringUTFChars. string: " PTR_FORMAT " chars: "
+            PTR_FORMAT, p2i(str), p2i(chars));
+        NativeReportJNIFatalError(thr, "ReleaseStringUTFChars "
+            "called on something not allocated by GetStringUTFChars");
+      }
+      UNCHECKED()->ReleaseStringUTFChars(env, str,
+          (const char*) guarded.release_for_freeing());
     }
     functionExit(env);
 JNI_END
@@ -1514,6 +1608,9 @@
     ElementType *result = UNCHECKED()->Get##Result##ArrayElements(env, \
                                                                   array, \
                                                                   isCopy); \
+    if (result != NULL) { \
+      result = (ElementType *) check_jni_wrap_copy_array(thr, array, result); \
+    } \
     functionExit(env); \
     return result; \
 JNI_END
@@ -1538,12 +1635,10 @@
       check_primitive_array_type(thr, array, ElementTag); \
       ASSERT_OOPS_ALLOWED; \
       typeArrayOop a = typeArrayOop(JNIHandles::resolve_non_null(array)); \
-      /* cannot check validity of copy, unless every request is logged by
-       * checking code.  Implementation of this check is deferred until a
-       * subsequent release.
-       */ \
     ) \
-    UNCHECKED()->Release##Result##ArrayElements(env,array,elems,mode); \
+    ElementType* orig_result = (ElementType *) check_wrapped_array_release( \
+        thr, "checked_jni_Release"#Result"ArrayElements", array, elems, mode); \
+    UNCHECKED()->Release##Result##ArrayElements(env, array, orig_result, mode); \
     functionExit(env); \
 JNI_END
 
@@ -1694,6 +1789,9 @@
       check_is_primitive_array(thr, array);
     )
     void *result = UNCHECKED()->GetPrimitiveArrayCritical(env, array, isCopy);
+    if (result != NULL) {
+      result = check_jni_wrap_copy_array(thr, array, result);
+    }
     functionExit(env);
     return result;
 JNI_END
@@ -1707,10 +1805,9 @@
     IN_VM(
       check_is_primitive_array(thr, array);
     )
-    /* The Hotspot JNI code does not use the parameters, so just check the
-     * array parameter as a minor sanity check
-     */
-    UNCHECKED()->ReleasePrimitiveArrayCritical(env, array, carray, mode);
+    // Check the element array...
+    void* orig_result = check_wrapped_array_release(thr, "ReleasePrimitiveArrayCritical", array, carray, mode);
+    UNCHECKED()->ReleasePrimitiveArrayCritical(env, array, orig_result, mode);
     functionExit(env);
 JNI_END
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/prims/jvm.cpp
--- a/src/share/vm/prims/jvm.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/prims/jvm.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -51,6 +51,7 @@
 #include "runtime/java.hpp"
 #include "runtime/javaCalls.hpp"
 #include "runtime/jfieldIDWorkaround.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/os.hpp"
 #include "runtime/perfData.hpp"
 #include "runtime/reflection.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/prims/jvmtiEnvBase.cpp
--- a/src/share/vm/prims/jvmtiEnvBase.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/prims/jvmtiEnvBase.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -41,6 +41,7 @@
 #include "runtime/objectMonitor.hpp"
 #include "runtime/objectMonitor.inline.hpp"
 #include "runtime/signature.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vframe.hpp"
 #include "runtime/vframe_hp.hpp"
 #include "runtime/vmThread.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/prims/jvmtiExport.cpp
--- a/src/share/vm/prims/jvmtiExport.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/prims/jvmtiExport.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -47,7 +47,7 @@
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/objectMonitor.hpp"
 #include "runtime/objectMonitor.inline.hpp"
-#include "runtime/thread.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vframe.hpp"
 #include "services/attachListener.hpp"
 #include "services/serviceUtil.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/prims/jvmtiRawMonitor.cpp
--- a/src/share/vm/prims/jvmtiRawMonitor.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/prims/jvmtiRawMonitor.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -25,7 +25,8 @@
 #include "precompiled.hpp"
 #include "prims/jvmtiRawMonitor.hpp"
 #include "runtime/interfaceSupport.hpp"
-#include "runtime/thread.hpp"
+#include "runtime/orderAccess.inline.hpp"
+#include "runtime/thread.inline.hpp"
 
 GrowableArray<JvmtiRawMonitor*> *JvmtiPendingMonitors::_monitors = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<JvmtiRawMonitor*>(1,true);
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/prims/jvmtiTagMap.cpp
--- a/src/share/vm/prims/jvmtiTagMap.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/prims/jvmtiTagMap.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -3017,7 +3017,7 @@
 
   // If there are any non-perm roots in the code cache, visit them.
   blk.set_kind(JVMTI_HEAP_REFERENCE_OTHER);
-  CodeBlobToOopClosure look_in_blobs(&blk, false);
+  CodeBlobToOopClosure look_in_blobs(&blk, !CodeBlobToOopClosure::FixRelocations);
   CodeCache::scavenge_root_nmethods_do(&look_in_blobs);
 
   return true;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/prims/jvmtiThreadState.inline.hpp
--- a/src/share/vm/prims/jvmtiThreadState.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/prims/jvmtiThreadState.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -27,6 +27,7 @@
 
 #include "prims/jvmtiEnvThreadState.hpp"
 #include "prims/jvmtiThreadState.hpp"
+#include "runtime/thread.inline.hpp"
 
 // JvmtiEnvThreadStateIterator implementation
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/prims/unsafe.cpp
--- a/src/share/vm/prims/unsafe.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/prims/unsafe.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -33,6 +33,8 @@
 #include "prims/jvm.h"
 #include "runtime/globals.hpp"
 #include "runtime/interfaceSupport.hpp"
+#include "runtime/prefetch.inline.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/reflection.hpp"
 #include "runtime/synchronizer.hpp"
 #include "services/threadService.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/prims/whitebox.cpp
--- a/src/share/vm/prims/whitebox.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/prims/whitebox.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 
+#include "memory/metadataFactory.hpp"
 #include "memory/universe.hpp"
 #include "oops/oop.inline.hpp"
 
@@ -36,6 +37,7 @@
 #include "runtime/arguments.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/os.hpp"
+#include "utilities/array.hpp"
 #include "utilities/debug.hpp"
 #include "utilities/macros.hpp"
 #include "utilities/exceptions.hpp"
@@ -501,6 +503,159 @@
   }
 WB_END
 
+template <typename T>
+static bool GetVMFlag(JavaThread* thread, JNIEnv* env, jstring name, T* value, bool (*TAt)(const char*, T*)) {
+  if (name == NULL) {
+    return false;
+  }
+  ThreadToNativeFromVM ttnfv(thread);   // can't be in VM when we call JNI
+  const char* flag_name = env->GetStringUTFChars(name, NULL);
+  bool result = (*TAt)(flag_name, value);
+  env->ReleaseStringUTFChars(name, flag_name);
+  return result;
+}
+
+template <typename T>
+static bool SetVMFlag(JavaThread* thread, JNIEnv* env, jstring name, T* value, bool (*TAtPut)(const char*, T*, Flag::Flags)) {
+  if (name == NULL) {
+    return false;
+  }
+  ThreadToNativeFromVM ttnfv(thread);   // can't be in VM when we call JNI
+  const char* flag_name = env->GetStringUTFChars(name, NULL);
+  bool result = (*TAtPut)(flag_name, value, Flag::INTERNAL);
+  env->ReleaseStringUTFChars(name, flag_name);
+  return result;
+}
+
+template <typename T>
+static jobject box(JavaThread* thread, JNIEnv* env, Symbol* name, Symbol* sig, T value) {
+  ResourceMark rm(thread);
+  jclass clazz = env->FindClass(name->as_C_string());
+  CHECK_JNI_EXCEPTION_(env, NULL);
+  jmethodID methodID = env->GetStaticMethodID(clazz,
+        vmSymbols::valueOf_name()->as_C_string(),
+        sig->as_C_string());
+  CHECK_JNI_EXCEPTION_(env, NULL);
+  jobject result = env->CallStaticObjectMethod(clazz, methodID, value);
+  CHECK_JNI_EXCEPTION_(env, NULL);
+  return result;
+}
+
+static jobject booleanBox(JavaThread* thread, JNIEnv* env, jboolean value) {
+  return box(thread, env, vmSymbols::java_lang_Boolean(), vmSymbols::Boolean_valueOf_signature(), value);
+}
+static jobject integerBox(JavaThread* thread, JNIEnv* env, jint value) {
+  return box(thread, env, vmSymbols::java_lang_Integer(), vmSymbols::Integer_valueOf_signature(), value);
+}
+static jobject longBox(JavaThread* thread, JNIEnv* env, jlong value) {
+  return box(thread, env, vmSymbols::java_lang_Long(), vmSymbols::Long_valueOf_signature(), value);
+}
+/* static jobject floatBox(JavaThread* thread, JNIEnv* env, jfloat value) {
+  return box(thread, env, vmSymbols::java_lang_Float(), vmSymbols::Float_valueOf_signature(), value);
+}*/
+static jobject doubleBox(JavaThread* thread, JNIEnv* env, jdouble value) {
+  return box(thread, env, vmSymbols::java_lang_Double(), vmSymbols::Double_valueOf_signature(), value);
+}
+
+WB_ENTRY(jobject, WB_GetBooleanVMFlag(JNIEnv* env, jobject o, jstring name))
+  bool result;
+  if (GetVMFlag <bool> (thread, env, name, &result, &CommandLineFlags::boolAt)) {
+    ThreadToNativeFromVM ttnfv(thread);   // can't be in VM when we call JNI
+    return booleanBox(thread, env, result);
+  }
+  return NULL;
+WB_END
+
+WB_ENTRY(jobject, WB_GetIntxVMFlag(JNIEnv* env, jobject o, jstring name))
+  intx result;
+  if (GetVMFlag <intx> (thread, env, name, &result, &CommandLineFlags::intxAt)) {
+    ThreadToNativeFromVM ttnfv(thread);   // can't be in VM when we call JNI
+    return longBox(thread, env, result);
+  }
+  return NULL;
+WB_END
+
+WB_ENTRY(jobject, WB_GetUintxVMFlag(JNIEnv* env, jobject o, jstring name))
+  uintx result;
+  if (GetVMFlag <uintx> (thread, env, name, &result, &CommandLineFlags::uintxAt)) {
+    ThreadToNativeFromVM ttnfv(thread);   // can't be in VM when we call JNI
+    return longBox(thread, env, result);
+  }
+  return NULL;
+WB_END
+
+WB_ENTRY(jobject, WB_GetUint64VMFlag(JNIEnv* env, jobject o, jstring name))
+  uint64_t result;
+  if (GetVMFlag <uint64_t> (thread, env, name, &result, &CommandLineFlags::uint64_tAt)) {
+    ThreadToNativeFromVM ttnfv(thread);   // can't be in VM when we call JNI
+    return longBox(thread, env, result);
+  }
+  return NULL;
+WB_END
+
+WB_ENTRY(jobject, WB_GetDoubleVMFlag(JNIEnv* env, jobject o, jstring name))
+  double result;
+  if (GetVMFlag <double> (thread, env, name, &result, &CommandLineFlags::doubleAt)) {
+    ThreadToNativeFromVM ttnfv(thread);   // can't be in VM when we call JNI
+    return doubleBox(thread, env, result);
+  }
+  return NULL;
+WB_END
+
+WB_ENTRY(jstring, WB_GetStringVMFlag(JNIEnv* env, jobject o, jstring name))
+  ccstr ccstrResult;
+  if (GetVMFlag <ccstr> (thread, env, name, &ccstrResult, &CommandLineFlags::ccstrAt)) {
+    ThreadToNativeFromVM ttnfv(thread);   // can't be in VM when we call JNI
+    jstring result = env->NewStringUTF(ccstrResult);
+    CHECK_JNI_EXCEPTION_(env, NULL);
+    return result;
+  }
+  return NULL;
+WB_END
+
+WB_ENTRY(void, WB_SetBooleanVMFlag(JNIEnv* env, jobject o, jstring name, jboolean value))
+  bool result = value == JNI_TRUE ? true : false;
+  SetVMFlag <bool> (thread, env, name, &result, &CommandLineFlags::boolAtPut);
+WB_END
+
+WB_ENTRY(void, WB_SetIntxVMFlag(JNIEnv* env, jobject o, jstring name, jlong value))
+  intx result = value;
+  SetVMFlag <intx> (thread, env, name, &result, &CommandLineFlags::intxAtPut);
+WB_END
+
+WB_ENTRY(void, WB_SetUintxVMFlag(JNIEnv* env, jobject o, jstring name, jlong value))
+  uintx result = value;
+  SetVMFlag <uintx> (thread, env, name, &result, &CommandLineFlags::uintxAtPut);
+WB_END
+
+WB_ENTRY(void, WB_SetUint64VMFlag(JNIEnv* env, jobject o, jstring name, jlong value))
+  uint64_t result = value;
+  SetVMFlag <uint64_t> (thread, env, name, &result, &CommandLineFlags::uint64_tAtPut);
+WB_END
+
+WB_ENTRY(void, WB_SetDoubleVMFlag(JNIEnv* env, jobject o, jstring name, jdouble value))
+  double result = value;
+  SetVMFlag <double> (thread, env, name, &result, &CommandLineFlags::doubleAtPut);
+WB_END
+
+WB_ENTRY(void, WB_SetStringVMFlag(JNIEnv* env, jobject o, jstring name, jstring value))
+  ThreadToNativeFromVM ttnfv(thread);   // can't be in VM when we call JNI
+  const char* ccstrValue = (value == NULL) ? NULL : env->GetStringUTFChars(value, NULL);
+  ccstr ccstrResult = ccstrValue;
+  bool needFree;
+  {
+    ThreadInVMfromNative ttvfn(thread); // back to VM
+    needFree = SetVMFlag <ccstr> (thread, env, name, &ccstrResult, &CommandLineFlags::ccstrAtPut);
+  }
+  if (value != NULL) {
+    env->ReleaseStringUTFChars(value, ccstrValue);
+  }
+  if (needFree) {
+    FREE_C_HEAP_ARRAY(char, ccstrResult, mtInternal);
+  }
+WB_END
+
+
 WB_ENTRY(jboolean, WB_IsInStringTable(JNIEnv* env, jobject o, jstring javaString))
   ResourceMark rm(THREAD);
   int len;
@@ -559,11 +714,7 @@
     return result;
   }
 
-  clazz = env->FindClass(vmSymbols::java_lang_Integer()->as_C_string());
-  CHECK_JNI_EXCEPTION_(env, NULL);
-  jmethodID constructor = env->GetMethodID(clazz, vmSymbols::object_initializer_name()->as_C_string(), vmSymbols::int_void_signature()->as_C_string());
-  CHECK_JNI_EXCEPTION_(env, NULL);
-  jobject obj = env->NewObject(clazz, constructor, code->comp_level());
+  jobject obj = integerBox(thread, env, code->comp_level());
   CHECK_JNI_EXCEPTION_(env, NULL);
   env->SetObjectArrayElement(result, 0, obj);
 
@@ -576,6 +727,35 @@
 WB_END
 
 
+int WhiteBox::array_bytes_to_length(size_t bytes) {
+  return Array<u1>::bytes_to_length(bytes);
+}
+
+WB_ENTRY(jlong, WB_AllocateMetaspace(JNIEnv* env, jobject wb, jobject class_loader, jlong size))
+  if (size < 0) {
+    THROW_MSG_0(vmSymbols::java_lang_IllegalArgumentException(),
+        err_msg("WB_AllocateMetaspace: size is negative: " JLONG_FORMAT, size));
+  }
+
+  oop class_loader_oop = JNIHandles::resolve(class_loader);
+  ClassLoaderData* cld = class_loader_oop != NULL
+      ? java_lang_ClassLoader::loader_data(class_loader_oop)
+      : ClassLoaderData::the_null_class_loader_data();
+
+  void* metadata = MetadataFactory::new_writeable_array<u1>(cld, WhiteBox::array_bytes_to_length((size_t)size), thread);
+
+  return (jlong)(uintptr_t)metadata;
+WB_END
+
+WB_ENTRY(void, WB_FreeMetaspace(JNIEnv* env, jobject wb, jobject class_loader, jlong addr, jlong size))
+  oop class_loader_oop = JNIHandles::resolve(class_loader);
+  ClassLoaderData* cld = class_loader_oop != NULL
+      ? java_lang_ClassLoader::loader_data(class_loader_oop)
+      : ClassLoaderData::the_null_class_loader_data();
+
+  MetadataFactory::free_array(cld, (Array<u1>*)(uintptr_t)addr);
+WB_END
+
 //Some convenience methods to deal with objects from java
 int WhiteBox::offset_for_field(const char* field_name, oop object,
     Symbol* signature_symbol) {
@@ -684,9 +864,32 @@
       CC"(Ljava/lang/reflect/Executable;II)Z",        (void*)&WB_EnqueueMethodForCompilation},
   {CC"clearMethodState",
       CC"(Ljava/lang/reflect/Executable;)V",          (void*)&WB_ClearMethodState},
-  {CC"isInStringTable",   CC"(Ljava/lang/String;)Z",  (void*)&WB_IsInStringTable  },
+  {CC"setBooleanVMFlag",   CC"(Ljava/lang/String;Z)V",(void*)&WB_SetBooleanVMFlag},
+  {CC"setIntxVMFlag",      CC"(Ljava/lang/String;J)V",(void*)&WB_SetIntxVMFlag},
+  {CC"setUintxVMFlag",     CC"(Ljava/lang/String;J)V",(void*)&WB_SetUintxVMFlag},
+  {CC"setUint64VMFlag",    CC"(Ljava/lang/String;J)V",(void*)&WB_SetUint64VMFlag},
+  {CC"setDoubleVMFlag",    CC"(Ljava/lang/String;D)V",(void*)&WB_SetDoubleVMFlag},
+  {CC"setStringVMFlag",    CC"(Ljava/lang/String;Ljava/lang/String;)V",
+                                                      (void*)&WB_SetStringVMFlag},
+  {CC"getBooleanVMFlag",   CC"(Ljava/lang/String;)Ljava/lang/Boolean;",
+                                                      (void*)&WB_GetBooleanVMFlag},
+  {CC"getIntxVMFlag",      CC"(Ljava/lang/String;)Ljava/lang/Long;",
+                                                      (void*)&WB_GetIntxVMFlag},
+  {CC"getUintxVMFlag",     CC"(Ljava/lang/String;)Ljava/lang/Long;",
+                                                      (void*)&WB_GetUintxVMFlag},
+  {CC"getUint64VMFlag",    CC"(Ljava/lang/String;)Ljava/lang/Long;",
+                                                      (void*)&WB_GetUint64VMFlag},
+  {CC"getDoubleVMFlag",    CC"(Ljava/lang/String;)Ljava/lang/Double;",
+                                                      (void*)&WB_GetDoubleVMFlag},
+  {CC"getStringVMFlag",    CC"(Ljava/lang/String;)Ljava/lang/String;",
+                                                      (void*)&WB_GetStringVMFlag},
+  {CC"isInStringTable",    CC"(Ljava/lang/String;)Z", (void*)&WB_IsInStringTable  },
   {CC"fullGC",   CC"()V",                             (void*)&WB_FullGC },
   {CC"readReservedMemory", CC"()V",                   (void*)&WB_ReadReservedMemory },
+  {CC"allocateMetaspace",
+     CC"(Ljava/lang/ClassLoader;J)J",                 (void*)&WB_AllocateMetaspace },
+  {CC"freeMetaspace",
+     CC"(Ljava/lang/ClassLoader;JJ)V",                (void*)&WB_FreeMetaspace },
   {CC"getCPUFeatures",     CC"()Ljava/lang/String;",  (void*)&WB_GetCPUFeatures     },
   {CC"getNMethod",         CC"(Ljava/lang/reflect/Executable;Z)[Ljava/lang/Object;",
                                                       (void*)&WB_GetNMethod         },
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/prims/whitebox.hpp
--- a/src/share/vm/prims/whitebox.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/prims/whitebox.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -62,6 +62,8 @@
     Symbol* signature_symbol);
   static const char* lookup_jstring(const char* field_name, oop object);
   static bool lookup_bool(const char* field_name, oop object);
+
+  static int array_bytes_to_length(size_t bytes);
 };
 
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/arguments.cpp
--- a/src/share/vm/runtime/arguments.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/arguments.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1398,10 +1398,26 @@
                 (int)ObjectAlignmentInBytes, os::vm_page_size());
     return false;
   }
+  if(SurvivorAlignmentInBytes == 0) {
+    SurvivorAlignmentInBytes = ObjectAlignmentInBytes;
+  } else {
+    if (!is_power_of_2(SurvivorAlignmentInBytes)) {
+      jio_fprintf(defaultStream::error_stream(),
+            "error: SurvivorAlignmentInBytes=%d must be power of 2\n",
+            (int)SurvivorAlignmentInBytes);
+      return false;
+    }
+    if (SurvivorAlignmentInBytes < ObjectAlignmentInBytes) {
+      jio_fprintf(defaultStream::error_stream(),
+          "error: SurvivorAlignmentInBytes=%d must be greater than ObjectAlignmentInBytes=%d \n",
+          (int)SurvivorAlignmentInBytes, (int)ObjectAlignmentInBytes);
+      return false;
+    }
+  }
   return true;
 }
 
-uintx Arguments::max_heap_for_compressed_oops() {
+size_t Arguments::max_heap_for_compressed_oops() {
   // Avoid sign flip.
   assert(OopEncodingHeapMax > (uint64_t)os::vm_page_size(), "Unusual page size");
   // We need to fit both the NULL page and the heap into the memory budget, while
@@ -1505,8 +1521,10 @@
     heap_alignment = G1CollectedHeap::conservative_max_heap_alignment();
   }
 #endif // INCLUDE_ALL_GCS
-  _conservative_max_heap_alignment = MAX3(heap_alignment, os::max_page_size(),
-    CollectorPolicy::compute_heap_alignment());
+  _conservative_max_heap_alignment = MAX4(heap_alignment,
+                                          (size_t)os::vm_allocation_granularity(),
+                                          os::max_page_size(),
+                                          CollectorPolicy::compute_heap_alignment());
 }
 
 void Arguments::set_ergonomics_flags() {
@@ -2387,6 +2405,8 @@
     warning("The VM option CICompilerCountPerCPU overrides CICompilerCount.");
   }
 
+  status &= check_vm_args_consistency_ext();
+
   return status;
 }
 
@@ -3833,18 +3853,24 @@
 }
 
 jint Arguments::adjust_after_os() {
-#if INCLUDE_ALL_GCS
-  if (UseParallelGC || UseParallelOldGC) {
-    if (UseNUMA) {
+  if (UseNUMA) {
+    if (UseParallelGC || UseParallelOldGC) {
       if (FLAG_IS_DEFAULT(MinHeapDeltaBytes)) {
-        FLAG_SET_DEFAULT(MinHeapDeltaBytes, 64*M);
+         FLAG_SET_DEFAULT(MinHeapDeltaBytes, 64*M);
       }
-      // For those collectors or operating systems (eg, Windows) that do
-      // not support full UseNUMA, we will map to UseNUMAInterleaving for now
-      UseNUMAInterleaving = true;
+    }
+    // UseNUMAInterleaving is set to ON for all collectors and
+    // platforms when UseNUMA is set to ON. NUMA-aware collectors
+    // such as the parallel collector for Linux and Solaris will
+    // interleave old gen and survivor spaces on top of NUMA
+    // allocation policy for the eden space.
+    // Non NUMA-aware collectors such as CMS, G1 and Serial-GC on
+    // all platforms and ParallelGC on Windows will interleave all
+    // of the heap spaces across NUMA nodes.
+    if (FLAG_IS_DEFAULT(UseNUMAInterleaving)) {
+      FLAG_SET_ERGO(bool, UseNUMAInterleaving, true);
     }
   }
-#endif // INCLUDE_ALL_GCS
   return JNI_OK;
 }
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/arguments.hpp
--- a/src/share/vm/runtime/arguments.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/arguments.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -461,6 +461,7 @@
   static void check_deprecated_gc_flags();
   // Check consistecy or otherwise of VM argument settings
   static bool check_vm_args_consistency();
+  static bool check_vm_args_consistency_ext();
   // Check stack pages settings
   static bool check_stack_pages();
   // Used by os_solaris
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/arguments_ext.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/runtime/arguments_ext.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/arguments.hpp"
+
+bool Arguments::check_vm_args_consistency_ext() {
+  return true;
+}
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/deoptimization.cpp
--- a/src/share/vm/runtime/deoptimization.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/deoptimization.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -698,7 +698,7 @@
              (iframe->interpreter_frame_expression_stack_size() == (next_mask_expression_stack_size -
                                                                     top_frame_expression_stack_adjustment))) ||
             (is_top_frame && (exec_mode == Unpack_exception) && iframe->interpreter_frame_expression_stack_size() == 0) ||
-            (is_top_frame && (exec_mode == Unpack_uncommon_trap || exec_mode == Unpack_reexecute) &&
+            (is_top_frame && (exec_mode == Unpack_uncommon_trap || exec_mode == Unpack_reexecute || el->should_reexecute()) &&
              (iframe->interpreter_frame_expression_stack_size() == mask.expression_stack_size() + cur_invoke_parameter_size))
             )) {
         ttyLocker ttyl;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/fprofiler.cpp
--- a/src/share/vm/runtime/fprofiler.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/fprofiler.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -38,6 +38,7 @@
 #include "runtime/stubCodeGenerator.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "runtime/task.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vframe.hpp"
 #include "utilities/macros.hpp"
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/frame.cpp
--- a/src/share/vm/runtime/frame.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/frame.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -900,7 +900,7 @@
 }
 
 
-void frame::oops_interpreted_do(OopClosure* f, CLDToOopClosure* cld_f,
+void frame::oops_interpreted_do(OopClosure* f, CLDClosure* cld_f,
     const RegisterMap* map, bool query_oop_map_cache) {
   assert(is_interpreted_frame(), "Not an interpreted frame");
   assert(map != NULL, "map must be set");
@@ -1140,7 +1140,7 @@
 }
 
 
-void frame::oops_do_internal(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf, RegisterMap* map, bool use_interpreter_oop_map_cache) {
+void frame::oops_do_internal(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf, RegisterMap* map, bool use_interpreter_oop_map_cache) {
 #ifndef PRODUCT
   // simulate GC crash here to dump java thread in error report
   if (CrashGCForDumpingJavaThread) {
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/frame.hpp
--- a/src/share/vm/runtime/frame.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/frame.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -419,19 +419,19 @@
 
   // Oops-do's
   void oops_compiled_arguments_do(Symbol* signature, bool has_receiver, bool has_appendix, const RegisterMap* reg_map, OopClosure* f);
-  void oops_interpreted_do(OopClosure* f, CLDToOopClosure* cld_f, const RegisterMap* map, bool query_oop_map_cache = true);
+  void oops_interpreted_do(OopClosure* f, CLDClosure* cld_f, const RegisterMap* map, bool query_oop_map_cache = true);
 
  private:
   void oops_interpreted_arguments_do(Symbol* signature, bool has_receiver, OopClosure* f);
 
   // Iteration of oops
-  void oops_do_internal(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf, RegisterMap* map, bool use_interpreter_oop_map_cache);
+  void oops_do_internal(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf, RegisterMap* map, bool use_interpreter_oop_map_cache);
   void oops_entry_do(OopClosure* f, const RegisterMap* map);
   void oops_code_blob_do(OopClosure* f, CodeBlobClosure* cf, const RegisterMap* map);
   int adjust_offset(Method* method, int index); // helper for above fn
  public:
   // Memory management
-  void oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf, RegisterMap* map) { oops_do_internal(f, cld_f, cf, map, true); }
+  void oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf, RegisterMap* map) { oops_do_internal(f, cld_f, cf, map, true); }
   void nmethods_do(CodeBlobClosure* cf);
 
   // RedefineClasses support for finding live interpreted methods on the stack
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/globals.cpp
--- a/src/share/vm/runtime/globals.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/globals.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -611,7 +611,7 @@
   e.commit();
 }
 
-bool CommandLineFlags::boolAt(char* name, size_t len, bool* value) {
+bool CommandLineFlags::boolAt(const char* name, size_t len, bool* value) {
   Flag* result = Flag::find_flag(name, len);
   if (result == NULL) return false;
   if (!result->is_bool()) return false;
@@ -619,7 +619,7 @@
   return true;
 }
 
-bool CommandLineFlags::boolAtPut(char* name, size_t len, bool* value, Flag::Flags origin) {
+bool CommandLineFlags::boolAtPut(const char* name, size_t len, bool* value, Flag::Flags origin) {
   Flag* result = Flag::find_flag(name, len);
   if (result == NULL) return false;
   if (!result->is_bool()) return false;
@@ -639,7 +639,7 @@
   faddr->set_origin(origin);
 }
 
-bool CommandLineFlags::intxAt(char* name, size_t len, intx* value) {
+bool CommandLineFlags::intxAt(const char* name, size_t len, intx* value) {
   Flag* result = Flag::find_flag(name, len);
   if (result == NULL) return false;
   if (!result->is_intx()) return false;
@@ -647,7 +647,7 @@
   return true;
 }
 
-bool CommandLineFlags::intxAtPut(char* name, size_t len, intx* value, Flag::Flags origin) {
+bool CommandLineFlags::intxAtPut(const char* name, size_t len, intx* value, Flag::Flags origin) {
   Flag* result = Flag::find_flag(name, len);
   if (result == NULL) return false;
   if (!result->is_intx()) return false;
@@ -667,7 +667,7 @@
   faddr->set_origin(origin);
 }
 
-bool CommandLineFlags::uintxAt(char* name, size_t len, uintx* value) {
+bool CommandLineFlags::uintxAt(const char* name, size_t len, uintx* value) {
   Flag* result = Flag::find_flag(name, len);
   if (result == NULL) return false;
   if (!result->is_uintx()) return false;
@@ -675,7 +675,7 @@
   return true;
 }
 
-bool CommandLineFlags::uintxAtPut(char* name, size_t len, uintx* value, Flag::Flags origin) {
+bool CommandLineFlags::uintxAtPut(const char* name, size_t len, uintx* value, Flag::Flags origin) {
   Flag* result = Flag::find_flag(name, len);
   if (result == NULL) return false;
   if (!result->is_uintx()) return false;
@@ -695,7 +695,7 @@
   faddr->set_origin(origin);
 }
 
-bool CommandLineFlags::uint64_tAt(char* name, size_t len, uint64_t* value) {
+bool CommandLineFlags::uint64_tAt(const char* name, size_t len, uint64_t* value) {
   Flag* result = Flag::find_flag(name, len);
   if (result == NULL) return false;
   if (!result->is_uint64_t()) return false;
@@ -703,7 +703,7 @@
   return true;
 }
 
-bool CommandLineFlags::uint64_tAtPut(char* name, size_t len, uint64_t* value, Flag::Flags origin) {
+bool CommandLineFlags::uint64_tAtPut(const char* name, size_t len, uint64_t* value, Flag::Flags origin) {
   Flag* result = Flag::find_flag(name, len);
   if (result == NULL) return false;
   if (!result->is_uint64_t()) return false;
@@ -723,7 +723,7 @@
   faddr->set_origin(origin);
 }
 
-bool CommandLineFlags::doubleAt(char* name, size_t len, double* value) {
+bool CommandLineFlags::doubleAt(const char* name, size_t len, double* value) {
   Flag* result = Flag::find_flag(name, len);
   if (result == NULL) return false;
   if (!result->is_double()) return false;
@@ -731,7 +731,7 @@
   return true;
 }
 
-bool CommandLineFlags::doubleAtPut(char* name, size_t len, double* value, Flag::Flags origin) {
+bool CommandLineFlags::doubleAtPut(const char* name, size_t len, double* value, Flag::Flags origin) {
   Flag* result = Flag::find_flag(name, len);
   if (result == NULL) return false;
   if (!result->is_double()) return false;
@@ -751,7 +751,7 @@
   faddr->set_origin(origin);
 }
 
-bool CommandLineFlags::ccstrAt(char* name, size_t len, ccstr* value) {
+bool CommandLineFlags::ccstrAt(const char* name, size_t len, ccstr* value) {
   Flag* result = Flag::find_flag(name, len);
   if (result == NULL) return false;
   if (!result->is_ccstr()) return false;
@@ -759,9 +759,7 @@
   return true;
 }
 
-// Contract:  Flag will make private copy of the incoming value.
-// Outgoing value is always malloc-ed, and caller MUST call free.
-bool CommandLineFlags::ccstrAtPut(char* name, size_t len, ccstr* value, Flag::Flags origin) {
+bool CommandLineFlags::ccstrAtPut(const char* name, size_t len, ccstr* value, Flag::Flags origin) {
   Flag* result = Flag::find_flag(name, len);
   if (result == NULL) return false;
   if (!result->is_ccstr()) return false;
@@ -784,7 +782,6 @@
   return true;
 }
 
-// Contract:  Flag will make private copy of the incoming value.
 void CommandLineFlagsEx::ccstrAtPut(CommandLineFlagWithType flag, ccstr value, Flag::Flags origin) {
   Flag* faddr = address_of_flag(flag);
   guarantee(faddr != NULL && faddr->is_ccstr(), "wrong flag type");
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/globals.hpp
--- a/src/share/vm/runtime/globals.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/globals.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -363,35 +363,37 @@
 
 class CommandLineFlags {
  public:
-  static bool boolAt(char* name, size_t len, bool* value);
-  static bool boolAt(char* name, bool* value)      { return boolAt(name, strlen(name), value); }
-  static bool boolAtPut(char* name, size_t len, bool* value, Flag::Flags origin);
-  static bool boolAtPut(char* name, bool* value, Flag::Flags origin)   { return boolAtPut(name, strlen(name), value, origin); }
+  static bool boolAt(const char* name, size_t len, bool* value);
+  static bool boolAt(const char* name, bool* value)      { return boolAt(name, strlen(name), value); }
+  static bool boolAtPut(const char* name, size_t len, bool* value, Flag::Flags origin);
+  static bool boolAtPut(const char* name, bool* value, Flag::Flags origin)   { return boolAtPut(name, strlen(name), value, origin); }
 
-  static bool intxAt(char* name, size_t len, intx* value);
-  static bool intxAt(char* name, intx* value)      { return intxAt(name, strlen(name), value); }
-  static bool intxAtPut(char* name, size_t len, intx* value, Flag::Flags origin);
-  static bool intxAtPut(char* name, intx* value, Flag::Flags origin)   { return intxAtPut(name, strlen(name), value, origin); }
+  static bool intxAt(const char* name, size_t len, intx* value);
+  static bool intxAt(const char* name, intx* value)      { return intxAt(name, strlen(name), value); }
+  static bool intxAtPut(const char* name, size_t len, intx* value, Flag::Flags origin);
+  static bool intxAtPut(const char* name, intx* value, Flag::Flags origin)   { return intxAtPut(name, strlen(name), value, origin); }
 
-  static bool uintxAt(char* name, size_t len, uintx* value);
-  static bool uintxAt(char* name, uintx* value)    { return uintxAt(name, strlen(name), value); }
-  static bool uintxAtPut(char* name, size_t len, uintx* value, Flag::Flags origin);
-  static bool uintxAtPut(char* name, uintx* value, Flag::Flags origin) { return uintxAtPut(name, strlen(name), value, origin); }
+  static bool uintxAt(const char* name, size_t len, uintx* value);
+  static bool uintxAt(const char* name, uintx* value)    { return uintxAt(name, strlen(name), value); }
+  static bool uintxAtPut(const char* name, size_t len, uintx* value, Flag::Flags origin);
+  static bool uintxAtPut(const char* name, uintx* value, Flag::Flags origin) { return uintxAtPut(name, strlen(name), value, origin); }
 
-  static bool uint64_tAt(char* name, size_t len, uint64_t* value);
-  static bool uint64_tAt(char* name, uint64_t* value) { return uint64_tAt(name, strlen(name), value); }
-  static bool uint64_tAtPut(char* name, size_t len, uint64_t* value, Flag::Flags origin);
-  static bool uint64_tAtPut(char* name, uint64_t* value, Flag::Flags origin) { return uint64_tAtPut(name, strlen(name), value, origin); }
+  static bool uint64_tAt(const char* name, size_t len, uint64_t* value);
+  static bool uint64_tAt(const char* name, uint64_t* value) { return uint64_tAt(name, strlen(name), value); }
+  static bool uint64_tAtPut(const char* name, size_t len, uint64_t* value, Flag::Flags origin);
+  static bool uint64_tAtPut(const char* name, uint64_t* value, Flag::Flags origin) { return uint64_tAtPut(name, strlen(name), value, origin); }
 
-  static bool doubleAt(char* name, size_t len, double* value);
-  static bool doubleAt(char* name, double* value)    { return doubleAt(name, strlen(name), value); }
-  static bool doubleAtPut(char* name, size_t len, double* value, Flag::Flags origin);
-  static bool doubleAtPut(char* name, double* value, Flag::Flags origin) { return doubleAtPut(name, strlen(name), value, origin); }
+  static bool doubleAt(const char* name, size_t len, double* value);
+  static bool doubleAt(const char* name, double* value)    { return doubleAt(name, strlen(name), value); }
+  static bool doubleAtPut(const char* name, size_t len, double* value, Flag::Flags origin);
+  static bool doubleAtPut(const char* name, double* value, Flag::Flags origin) { return doubleAtPut(name, strlen(name), value, origin); }
 
-  static bool ccstrAt(char* name, size_t len, ccstr* value);
-  static bool ccstrAt(char* name, ccstr* value)    { return ccstrAt(name, strlen(name), value); }
-  static bool ccstrAtPut(char* name, size_t len, ccstr* value, Flag::Flags origin);
-  static bool ccstrAtPut(char* name, ccstr* value, Flag::Flags origin) { return ccstrAtPut(name, strlen(name), value, origin); }
+  static bool ccstrAt(const char* name, size_t len, ccstr* value);
+  static bool ccstrAt(const char* name, ccstr* value)    { return ccstrAt(name, strlen(name), value); }
+  // Contract:  Flag will make private copy of the incoming value.
+  // Outgoing value is always malloc-ed, and caller MUST call free.
+  static bool ccstrAtPut(const char* name, size_t len, ccstr* value, Flag::Flags origin);
+  static bool ccstrAtPut(const char* name, ccstr* value, Flag::Flags origin) { return ccstrAtPut(name, strlen(name), value, origin); }
 
   // Returns false if name is not a command line flag.
   static bool wasSetOnCmdline(const char* name, bool* value);
@@ -595,6 +597,9 @@
   product(bool, UseAES, false,                                              \
           "Control whether AES instructions can be used on x86/x64")        \
                                                                             \
+  product(bool, UseSHA, false,                                              \
+          "Control whether SHA instructions can be used on SPARC")          \
+                                                                            \
   product(uintx, LargePageSizeInBytes, 0,                                   \
           "Large page size (0 to let VM choose the page size)")             \
                                                                             \
@@ -701,6 +706,15 @@
   product(bool, UseAESIntrinsics, false,                                    \
           "Use intrinsics for AES versions of crypto")                      \
                                                                             \
+  product(bool, UseSHA1Intrinsics, false,                                   \
+          "Use intrinsics for SHA-1 crypto hash function")                  \
+                                                                            \
+  product(bool, UseSHA256Intrinsics, false,                                 \
+          "Use intrinsics for SHA-224 and SHA-256 crypto hash functions")   \
+                                                                            \
+  product(bool, UseSHA512Intrinsics, false,                                 \
+          "Use intrinsics for SHA-384 and SHA-512 crypto hash functions")   \
+                                                                            \
   product(bool, UseCRC32Intrinsics, false,                                  \
           "use intrinsics for java.util.zip.CRC32")                         \
                                                                             \
@@ -1062,6 +1076,9 @@
   product(bool, ClassUnloading, true,                                       \
           "Do unloading of classes")                                        \
                                                                             \
+  product(bool, ClassUnloadingWithConcurrentMark, true,                     \
+          "Do unloading of classes with a concurrent marking cycle")        \
+                                                                            \
   develop(bool, DisableStartThread, false,                                  \
           "Disable starting of additional Java threads "                    \
           "(for debugging only)")                                           \
@@ -2287,6 +2304,9 @@
   manageable(bool, PrintGCTimeStamps, false,                                \
           "Print timestamps at garbage collection")                         \
                                                                             \
+  manageable(bool, PrintGCID, false,                                        \
+          "Print an identifier for each garbage collection")                \
+                                                                            \
   product(bool, PrintGCTaskTimeStamps, false,                               \
           "Print timestamps for individual gc worker thread tasks")         \
                                                                             \
@@ -3865,6 +3885,9 @@
   product(bool, PrintGCCause, true,                                         \
           "Include GC cause in GC logging")                                 \
                                                                             \
+  experimental(intx, SurvivorAlignmentInBytes, 0,                           \
+           "Default survivor space alignment in bytes")                     \
+                                                                            \
   product(bool , AllowNonVirtualCalls, false,                               \
           "Obey the ACC_SUPER flag and allow invokenonvirtual calls")       \
                                                                             \
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/globals_extension.hpp
--- a/src/share/vm/runtime/globals_extension.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/globals_extension.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -201,6 +201,7 @@
   static void uintxAtPut(CommandLineFlagWithType flag, uintx value, Flag::Flags origin);
   static void uint64_tAtPut(CommandLineFlagWithType flag, uint64_t value, Flag::Flags origin);
   static void doubleAtPut(CommandLineFlagWithType flag, double value, Flag::Flags origin);
+  // Contract:  Flag will make private copy of the incoming value
   static void ccstrAtPut(CommandLineFlagWithType flag, ccstr value, Flag::Flags origin);
 
   static bool is_default(CommandLineFlag flag);
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/interfaceSupport.cpp
--- a/src/share/vm/runtime/interfaceSupport.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/interfaceSupport.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -30,6 +30,7 @@
 #include "memory/resourceArea.hpp"
 #include "runtime/init.hpp"
 #include "runtime/interfaceSupport.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/threadLocalStorage.hpp"
 #include "runtime/vframe.hpp"
 #include "utilities/preserveException.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/javaFrameAnchor.hpp
--- a/src/share/vm/runtime/javaFrameAnchor.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/javaFrameAnchor.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -26,39 +26,7 @@
 #define SHARE_VM_RUNTIME_JAVAFRAMEANCHOR_HPP
 
 #include "utilities/globalDefinitions.hpp"
-#ifdef TARGET_OS_ARCH_linux_x86
-# include "orderAccess_linux_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_sparc
-# include "orderAccess_linux_sparc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_zero
-# include "orderAccess_linux_zero.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_solaris_x86
-# include "orderAccess_solaris_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_solaris_sparc
-# include "orderAccess_solaris_sparc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_windows_x86
-# include "orderAccess_windows_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_arm
-# include "orderAccess_linux_arm.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_ppc
-# include "orderAccess_linux_ppc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_aix_ppc
-# include "orderAccess_aix_ppc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_bsd_x86
-# include "orderAccess_bsd_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_bsd_zero
-# include "orderAccess_bsd_zero.inline.hpp"
-#endif
+#include "runtime/orderAccess.inline.hpp"
 
 //
 // An object for encapsulating the machine/os dependent part of a JavaThread frame state
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/mutex.cpp
--- a/src/share/vm/runtime/mutex.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/mutex.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -25,6 +25,7 @@
 
 #include "precompiled.hpp"
 #include "runtime/mutex.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/osThread.hpp"
 #include "runtime/thread.inline.hpp"
 #include "utilities/events.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/mutexLocker.cpp
--- a/src/share/vm/runtime/mutexLocker.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/mutexLocker.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -69,7 +69,7 @@
 Monitor* SerializePage_lock           = NULL;
 Monitor* Threads_lock                 = NULL;
 Monitor* CGC_lock                     = NULL;
-Mutex*   STS_init_lock                = NULL;
+Monitor* STS_lock                     = NULL;
 Monitor* SLT_lock                     = NULL;
 Monitor* iCMS_lock                    = NULL;
 Monitor* FullGCCount_lock             = NULL;
@@ -173,7 +173,7 @@
   def(tty_lock                     , Mutex  , event,       true ); // allow to lock in VM
 
   def(CGC_lock                   , Monitor, special,     true ); // coordinate between fore- and background GC
-  def(STS_init_lock              , Mutex,   leaf,        true );
+  def(STS_lock                   , Monitor, leaf,        true );
   if (UseConcMarkSweepGC) {
     def(iCMS_lock                  , Monitor, special,     true ); // CMS incremental mode start/stop notification
   }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/mutexLocker.hpp
--- a/src/share/vm/runtime/mutexLocker.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/mutexLocker.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -79,7 +79,7 @@
                                                  // (also used by Safepoints too to block threads creation/destruction)
 extern Monitor* CGC_lock;                        // used for coordination between
                                                  // fore- & background GC threads.
-extern Mutex*   STS_init_lock;                   // coordinate initialization of SuspendibleThreadSets.
+extern Monitor* STS_lock;                        // used for joining/leaving SuspendibleThreadSet.
 extern Monitor* SLT_lock;                        // used in CMS GC for acquiring PLL
 extern Monitor* iCMS_lock;                       // CMS incremental mode start/stop notification
 extern Monitor* FullGCCount_lock;                // in support of "concurrent" full gc
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/objectMonitor.cpp
--- a/src/share/vm/runtime/objectMonitor.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/objectMonitor.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -32,6 +32,7 @@
 #include "runtime/mutexLocker.hpp"
 #include "runtime/objectMonitor.hpp"
 #include "runtime/objectMonitor.inline.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/osThread.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "runtime/thread.inline.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/orderAccess.inline.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/runtime/orderAccess.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2014 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_RUNTIME_ORDERACCESS_INLINE_HPP
+#define SHARE_VM_RUNTIME_ORDERACCESS_INLINE_HPP
+
+#include "runtime/orderAccess.hpp"
+
+// Linux
+#ifdef TARGET_OS_ARCH_linux_x86
+# include "orderAccess_linux_x86.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_linux_sparc
+# include "orderAccess_linux_sparc.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_linux_zero
+# include "orderAccess_linux_zero.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_linux_arm
+# include "orderAccess_linux_arm.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_linux_ppc
+# include "orderAccess_linux_ppc.inline.hpp"
+#endif
+
+// Solaris
+#ifdef TARGET_OS_ARCH_solaris_x86
+# include "orderAccess_solaris_x86.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_solaris_sparc
+# include "orderAccess_solaris_sparc.inline.hpp"
+#endif
+
+// Windows
+#ifdef TARGET_OS_ARCH_windows_x86
+# include "orderAccess_windows_x86.inline.hpp"
+#endif
+
+// AIX
+#ifdef TARGET_OS_ARCH_aix_ppc
+# include "orderAccess_aix_ppc.inline.hpp"
+#endif
+
+// BSD
+#ifdef TARGET_OS_ARCH_bsd_x86
+# include "orderAccess_bsd_x86.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_bsd_zero
+# include "orderAccess_bsd_zero.inline.hpp"
+#endif
+
+#endif // SHARE_VM_RUNTIME_ORDERACCESS_INLINE_HPP
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/os.cpp
--- a/src/share/vm/runtime/os.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/os.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -32,6 +32,9 @@
 #include "gc_implementation/shared/vmGCOperations.hpp"
 #include "interpreter/interpreter.hpp"
 #include "memory/allocation.inline.hpp"
+#ifdef ASSERT
+#include "memory/guardedMemory.hpp"
+#endif
 #include "oops/oop.inline.hpp"
 #include "prims/jvm.h"
 #include "prims/jvm_misc.hpp"
@@ -524,118 +527,16 @@
 
 
 
-#ifdef ASSERT
-#define space_before             (MallocCushion + sizeof(double))
-#define space_after              MallocCushion
-#define size_addr_from_base(p)   (size_t*)(p + space_before - sizeof(size_t))
-#define size_addr_from_obj(p)    ((size_t*)p - 1)
-// MallocCushion: size of extra cushion allocated around objects with +UseMallocOnly
-// NB: cannot be debug variable, because these aren't set from the command line until
-// *after* the first few allocs already happened
-#define MallocCushion            16
-#else
-#define space_before             0
-#define space_after              0
-#define size_addr_from_base(p)   should not use w/o ASSERT
-#define size_addr_from_obj(p)    should not use w/o ASSERT
-#define MallocCushion            0
-#endif
 #define paranoid                 0  /* only set to 1 if you suspect checking code has bug */
 
 #ifdef ASSERT
-inline size_t get_size(void* obj) {
-  size_t size = *size_addr_from_obj(obj);
-  if (size < 0) {
-    fatal(err_msg("free: size field of object #" PTR_FORMAT " was overwritten ("
-                  SIZE_FORMAT ")", obj, size));
-  }
-  return size;
-}
-
-u_char* find_cushion_backwards(u_char* start) {
-  u_char* p = start;
-  while (p[ 0] != badResourceValue || p[-1] != badResourceValue ||
-         p[-2] != badResourceValue || p[-3] != badResourceValue) p--;
-  // ok, we have four consecutive marker bytes; find start
-  u_char* q = p - 4;
-  while (*q == badResourceValue) q--;
-  return q + 1;
-}
-
-u_char* find_cushion_forwards(u_char* start) {
-  u_char* p = start;
-  while (p[0] != badResourceValue || p[1] != badResourceValue ||
-         p[2] != badResourceValue || p[3] != badResourceValue) p++;
-  // ok, we have four consecutive marker bytes; find end of cushion
-  u_char* q = p + 4;
-  while (*q == badResourceValue) q++;
-  return q - MallocCushion;
-}
-
-void print_neighbor_blocks(void* ptr) {
-  // find block allocated before ptr (not entirely crash-proof)
-  if (MallocCushion < 4) {
-    tty->print_cr("### cannot find previous block (MallocCushion < 4)");
-    return;
-  }
-  u_char* start_of_this_block = (u_char*)ptr - space_before;
-  u_char* end_of_prev_block_data = start_of_this_block - space_after -1;
-  // look for cushion in front of prev. block
-  u_char* start_of_prev_block = find_cushion_backwards(end_of_prev_block_data);
-  ptrdiff_t size = *size_addr_from_base(start_of_prev_block);
-  u_char* obj = start_of_prev_block + space_before;
-  if (size <= 0 ) {
-    // start is bad; mayhave been confused by OS data inbetween objects
-    // search one more backwards
-    start_of_prev_block = find_cushion_backwards(start_of_prev_block);
-    size = *size_addr_from_base(start_of_prev_block);
-    obj = start_of_prev_block + space_before;
-  }
-
-  if (start_of_prev_block + space_before + size + space_after == start_of_this_block) {
-    tty->print_cr("### previous object: " PTR_FORMAT " (" SSIZE_FORMAT " bytes)", obj, size);
-  } else {
-    tty->print_cr("### previous object (not sure if correct): " PTR_FORMAT " (" SSIZE_FORMAT " bytes)", obj, size);
-  }
-
-  // now find successor block
-  u_char* start_of_next_block = (u_char*)ptr + *size_addr_from_obj(ptr) + space_after;
-  start_of_next_block = find_cushion_forwards(start_of_next_block);
-  u_char* next_obj = start_of_next_block + space_before;
-  ptrdiff_t next_size = *size_addr_from_base(start_of_next_block);
-  if (start_of_next_block[0] == badResourceValue &&
-      start_of_next_block[1] == badResourceValue &&
-      start_of_next_block[2] == badResourceValue &&
-      start_of_next_block[3] == badResourceValue) {
-    tty->print_cr("### next object: " PTR_FORMAT " (" SSIZE_FORMAT " bytes)", next_obj, next_size);
-  } else {
-    tty->print_cr("### next object (not sure if correct): " PTR_FORMAT " (" SSIZE_FORMAT " bytes)", next_obj, next_size);
-  }
-}
-
-
-void report_heap_error(void* memblock, void* bad, const char* where) {
-  tty->print_cr("## nof_mallocs = " UINT64_FORMAT ", nof_frees = " UINT64_FORMAT, os::num_mallocs, os::num_frees);
-  tty->print_cr("## memory stomp: byte at " PTR_FORMAT " %s object " PTR_FORMAT, bad, where, memblock);
-  print_neighbor_blocks(memblock);
-  fatal("memory stomping error");
-}
-
-void verify_block(void* memblock) {
-  size_t size = get_size(memblock);
-  if (MallocCushion) {
-    u_char* ptr = (u_char*)memblock - space_before;
-    for (int i = 0; i < MallocCushion; i++) {
-      if (ptr[i] != badResourceValue) {
-        report_heap_error(memblock, ptr+i, "in front of");
-      }
-    }
-    u_char* end = (u_char*)memblock + size + space_after;
-    for (int j = -MallocCushion; j < 0; j++) {
-      if (end[j] != badResourceValue) {
-        report_heap_error(memblock, end+j, "after");
-      }
-    }
+static void verify_memory(void* ptr) {
+  GuardedMemory guarded(ptr);
+  if (!guarded.verify_guards()) {
+    tty->print_cr("## nof_mallocs = " UINT64_FORMAT ", nof_frees = " UINT64_FORMAT, os::num_mallocs, os::num_frees);
+    tty->print_cr("## memory stomp:");
+    guarded.print_on(tty);
+    fatal("memory stomping error");
   }
 }
 #endif
@@ -686,16 +587,18 @@
     size = 1;
   }
 
-  const size_t alloc_size = size + space_before + space_after;
-
+#ifndef ASSERT
+  const size_t alloc_size = size;
+#else
+  const size_t alloc_size = GuardedMemory::get_total_size(size);
   if (size > alloc_size) { // Check for rollover.
     return NULL;
   }
+#endif
 
   NOT_PRODUCT(if (MallocVerifyInterval > 0) check_heap());
 
   u_char* ptr;
-
   if (MallocMaxTestWords > 0) {
     ptr = testMalloc(alloc_size);
   } else {
@@ -703,28 +606,26 @@
   }
 
 #ifdef ASSERT
-  if (ptr == NULL) return NULL;
-  if (MallocCushion) {
-    for (u_char* p = ptr; p < ptr + MallocCushion; p++) *p = (u_char)badResourceValue;
-    u_char* end = ptr + space_before + size;
-    for (u_char* pq = ptr+MallocCushion; pq < end; pq++) *pq = (u_char)uninitBlockPad;
-    for (u_char* q = end; q < end + MallocCushion; q++) *q = (u_char)badResourceValue;
+  if (ptr == NULL) {
+    return NULL;
   }
-  // put size just before data
-  *size_addr_from_base(ptr) = size;
+  // Wrap memory with guard
+  GuardedMemory guarded(ptr, size);
+  ptr = guarded.get_user_ptr();
 #endif
-  u_char* memblock = ptr + space_before;
-  if ((intptr_t)memblock == (intptr_t)MallocCatchPtr) {
-    tty->print_cr("os::malloc caught, " SIZE_FORMAT " bytes --> " PTR_FORMAT, size, memblock);
+  if ((intptr_t)ptr == (intptr_t)MallocCatchPtr) {
+    tty->print_cr("os::malloc caught, " SIZE_FORMAT " bytes --> " PTR_FORMAT, size, ptr);
     breakpoint();
   }
-  debug_only(if (paranoid) verify_block(memblock));
-  if (PrintMalloc && tty != NULL) tty->print_cr("os::malloc " SIZE_FORMAT " bytes --> " PTR_FORMAT, size, memblock);
+  debug_only(if (paranoid) verify_memory(ptr));
+  if (PrintMalloc && tty != NULL) {
+    tty->print_cr("os::malloc " SIZE_FORMAT " bytes --> " PTR_FORMAT, size, ptr);
+  }
 
-  // we do not track MallocCushion memory
-    MemTracker::record_malloc((address)memblock, size, memflags, caller == 0 ? CALLER_PC : caller);
+  // we do not track guard memory
+  MemTracker::record_malloc((address)ptr, size, memflags, caller == 0 ? CALLER_PC : caller);
 
-  return memblock;
+  return ptr;
 }
 
 
@@ -743,27 +644,32 @@
   return ptr;
 #else
   if (memblock == NULL) {
-    return malloc(size, memflags, (caller == 0 ? CALLER_PC : caller));
+    return os::malloc(size, memflags, (caller == 0 ? CALLER_PC : caller));
   }
   if ((intptr_t)memblock == (intptr_t)MallocCatchPtr) {
     tty->print_cr("os::realloc caught " PTR_FORMAT, memblock);
     breakpoint();
   }
-  verify_block(memblock);
+  verify_memory(memblock);
   NOT_PRODUCT(if (MallocVerifyInterval > 0) check_heap());
-  if (size == 0) return NULL;
+  if (size == 0) {
+    return NULL;
+  }
   // always move the block
-  void* ptr = malloc(size, memflags, caller == 0 ? CALLER_PC : caller);
-  if (PrintMalloc) tty->print_cr("os::remalloc " SIZE_FORMAT " bytes, " PTR_FORMAT " --> " PTR_FORMAT, size, memblock, ptr);
+  void* ptr = os::malloc(size, memflags, caller == 0 ? CALLER_PC : caller);
+  if (PrintMalloc) {
+    tty->print_cr("os::remalloc " SIZE_FORMAT " bytes, " PTR_FORMAT " --> " PTR_FORMAT, size, memblock, ptr);
+  }
   // Copy to new memory if malloc didn't fail
   if ( ptr != NULL ) {
-    memcpy(ptr, memblock, MIN2(size, get_size(memblock)));
-    if (paranoid) verify_block(ptr);
+    GuardedMemory guarded(memblock);
+    memcpy(ptr, memblock, MIN2(size, guarded.get_user_size()));
+    if (paranoid) verify_memory(ptr);
     if ((intptr_t)ptr == (intptr_t)MallocCatchPtr) {
       tty->print_cr("os::realloc caught, " SIZE_FORMAT " bytes --> " PTR_FORMAT, size, ptr);
       breakpoint();
     }
-    free(memblock);
+    os::free(memblock);
   }
   return ptr;
 #endif
@@ -771,6 +677,7 @@
 
 
 void  os::free(void *memblock, MEMFLAGS memflags) {
+  address trackp = (address) memblock;
   NOT_PRODUCT(inc_stat_counter(&num_frees, 1));
 #ifdef ASSERT
   if (memblock == NULL) return;
@@ -778,34 +685,20 @@
     if (tty != NULL) tty->print_cr("os::free caught " PTR_FORMAT, memblock);
     breakpoint();
   }
-  verify_block(memblock);
+  verify_memory(memblock);
   NOT_PRODUCT(if (MallocVerifyInterval > 0) check_heap());
-  // Added by detlefs.
-  if (MallocCushion) {
-    u_char* ptr = (u_char*)memblock - space_before;
-    for (u_char* p = ptr; p < ptr + MallocCushion; p++) {
-      guarantee(*p == badResourceValue,
-                "Thing freed should be malloc result.");
-      *p = (u_char)freeBlockPad;
-    }
-    size_t size = get_size(memblock);
-    inc_stat_counter(&free_bytes, size);
-    u_char* end = ptr + space_before + size;
-    for (u_char* q = end; q < end + MallocCushion; q++) {
-      guarantee(*q == badResourceValue,
-                "Thing freed should be malloc result.");
-      *q = (u_char)freeBlockPad;
-    }
-    if (PrintMalloc && tty != NULL)
+
+  GuardedMemory guarded(memblock);
+  size_t size = guarded.get_user_size();
+  inc_stat_counter(&free_bytes, size);
+  memblock = guarded.release_for_freeing();
+  if (PrintMalloc && tty != NULL) {
       fprintf(stderr, "os::free " SIZE_FORMAT " bytes --> " PTR_FORMAT "\n", size, (uintptr_t)memblock);
-  } else if (PrintMalloc && tty != NULL) {
-    // tty->print_cr("os::free %p", memblock);
-    fprintf(stderr, "os::free " PTR_FORMAT "\n", (uintptr_t)memblock);
   }
 #endif
-  MemTracker::record_free((address)memblock, memflags);
+  MemTracker::record_free(trackp, memflags);
 
-  ::free((char*)memblock - space_before);
+  ::free(memblock);
 }
 
 void os::init_random(long initval) {
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/perfMemory.cpp
--- a/src/share/vm/runtime/perfMemory.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/perfMemory.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -28,6 +28,7 @@
 #include "runtime/java.hpp"
 #include "runtime/mutex.hpp"
 #include "runtime/mutexLocker.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/os.hpp"
 #include "runtime/perfData.hpp"
 #include "runtime/perfMemory.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/prefetch.inline.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/runtime/prefetch.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_RUNTIME_PREFETCH_INLINE_HPP
+#define SHARE_VM_RUNTIME_PREFETCH_INLINE_HPP
+
+#include "runtime/prefetch.hpp"
+
+// Linux
+#ifdef TARGET_OS_ARCH_linux_x86
+# include "prefetch_linux_x86.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_linux_sparc
+# include "prefetch_linux_sparc.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_linux_zero
+# include "prefetch_linux_zero.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_linux_arm
+# include "prefetch_linux_arm.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_linux_ppc
+# include "prefetch_linux_ppc.inline.hpp"
+#endif
+
+// Solaris
+#ifdef TARGET_OS_ARCH_solaris_x86
+# include "prefetch_solaris_x86.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_solaris_sparc
+# include "prefetch_solaris_sparc.inline.hpp"
+#endif
+
+// Windows
+#ifdef TARGET_OS_ARCH_windows_x86
+# include "prefetch_windows_x86.inline.hpp"
+#endif
+
+// AIX
+#ifdef TARGET_OS_ARCH_aix_ppc
+# include "prefetch_aix_ppc.inline.hpp"
+#endif
+
+// BSD
+#ifdef TARGET_OS_ARCH_bsd_x86
+# include "prefetch_bsd_x86.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_bsd_zero
+# include "prefetch_bsd_zero.inline.hpp"
+#endif
+
+#endif // SHARE_VM_RUNTIME_PREFETCH_INLINE_HPP
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/safepoint.cpp
--- a/src/share/vm/runtime/safepoint.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/safepoint.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -41,6 +41,7 @@
 #include "runtime/frame.inline.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/mutexLocker.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/osThread.hpp"
 #include "runtime/safepoint.hpp"
 #include "runtime/signature.hpp"
@@ -75,7 +76,7 @@
 #endif
 #if INCLUDE_ALL_GCS
 #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp"
-#include "gc_implementation/shared/concurrentGCThread.hpp"
+#include "gc_implementation/shared/suspendibleThreadSet.hpp"
 #endif // INCLUDE_ALL_GCS
 #ifdef COMPILER1
 #include "c1/c1_globals.hpp"
@@ -112,7 +113,7 @@
     // more-general mechanism below.  DLD (01/05).
     ConcurrentMarkSweepThread::synchronize(false);
   } else if (UseG1GC) {
-    ConcurrentGCThread::safepoint_synchronize();
+    SuspendibleThreadSet::synchronize();
   }
 #endif // INCLUDE_ALL_GCS
 
@@ -488,7 +489,7 @@
   if (UseConcMarkSweepGC) {
     ConcurrentMarkSweepThread::desynchronize(false);
   } else if (UseG1GC) {
-    ConcurrentGCThread::safepoint_desynchronize();
+    SuspendibleThreadSet::desynchronize();
   }
 #endif // INCLUDE_ALL_GCS
   // record this time so VMThread can keep track how much time has elasped
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/sharedRuntimeMath.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/runtime/sharedRuntimeMath.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_RUNTIME_SHAREDRUNTIMEMATH_HPP
+#define SHARE_VM_RUNTIME_SHAREDRUNTIMEMATH_HPP
+
+#include <math.h>
+
+// Used to access the lower/higher 32 bits of a double
+typedef union {
+    double d;
+    struct {
+#ifdef VM_LITTLE_ENDIAN
+      int lo;
+      int hi;
+#else
+      int hi;
+      int lo;
+#endif
+    } split;
+} DoubleIntConv;
+
+static inline int high(double d) {
+  DoubleIntConv x;
+  x.d = d;
+  return x.split.hi;
+}
+
+static inline int low(double d) {
+  DoubleIntConv x;
+  x.d = d;
+  return x.split.lo;
+}
+
+static inline void set_high(double* d, int high) {
+  DoubleIntConv conv;
+  conv.d = *d;
+  conv.split.hi = high;
+  *d = conv.d;
+}
+
+static inline void set_low(double* d, int low) {
+  DoubleIntConv conv;
+  conv.d = *d;
+  conv.split.lo = low;
+  *d = conv.d;
+}
+
+static double copysignA(double x, double y) {
+  DoubleIntConv convX;
+  convX.d = x;
+  convX.split.hi = (convX.split.hi & 0x7fffffff) | (high(y) & 0x80000000);
+  return convX.d;
+}
+
+/*
+ * ====================================================
+ * Copyright (c) 1998 Oracle and/or its affiliates. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+/*
+ * scalbn (double x, int n)
+ * scalbn(x,n) returns x* 2**n  computed by  exponent
+ * manipulation rather than by actually performing an
+ * exponentiation or a multiplication.
+ */
+
+static const double
+two54   =  1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
+twom54  =  5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
+hugeX  = 1.0e+300,
+tiny   = 1.0e-300;
+
+static double scalbnA(double x, int n) {
+  int  k,hx,lx;
+  hx = high(x);
+  lx = low(x);
+  k = (hx&0x7ff00000)>>20;              /* extract exponent */
+  if (k==0) {                           /* 0 or subnormal x */
+    if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
+    x *= two54;
+    hx = high(x);
+    k = ((hx&0x7ff00000)>>20) - 54;
+    if (n< -50000) return tiny*x;       /*underflow*/
+  }
+  if (k==0x7ff) return x+x;             /* NaN or Inf */
+  k = k+n;
+  if (k > 0x7fe) return hugeX*copysignA(hugeX,x); /* overflow  */
+  if (k > 0) {                          /* normal result */
+    set_high(&x, (hx&0x800fffff)|(k<<20));
+    return x;
+  }
+  if (k <= -54) {
+    if (n > 50000)      /* in case integer overflow in n+k */
+      return hugeX*copysignA(hugeX,x);  /*overflow*/
+    else return tiny*copysignA(tiny,x); /*underflow*/
+  }
+  k += 54;                              /* subnormal result */
+  set_high(&x, (hx&0x800fffff)|(k<<20));
+  return x*twom54;
+}
+
+#endif // SHARE_VM_RUNTIME_SHAREDRUNTIMEMATH_HPP
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/sharedRuntimeTrans.cpp
--- a/src/share/vm/runtime/sharedRuntimeTrans.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/sharedRuntimeTrans.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,81 +40,11 @@
 // generated; can not figure out how to turn down optimization for one
 // file in the IDE on Windows
 #ifdef WIN32
+# pragma warning( disable: 4748 ) // /GS can not protect parameters and local variables from local buffer overrun because optimizations are disabled in function
 # pragma optimize ( "", off )
 #endif
 
-#include <math.h>
-
-// VM_LITTLE_ENDIAN is #defined appropriately in the Makefiles
-// [jk] this is not 100% correct because the float word order may different
-// from the byte order (e.g. on ARM)
-#ifdef VM_LITTLE_ENDIAN
-# define __HI(x) *(1+(int*)&x)
-# define __LO(x) *(int*)&x
-#else
-# define __HI(x) *(int*)&x
-# define __LO(x) *(1+(int*)&x)
-#endif
-
-#if !defined(AIX)
-double copysign(double x, double y) {
-  __HI(x) = (__HI(x)&0x7fffffff)|(__HI(y)&0x80000000);
-  return x;
-}
-#endif
-
-/*
- * ====================================================
- * Copyright (c) 1998 Oracle and/or its affiliates. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-/*
- * scalbn (double x, int n)
- * scalbn(x,n) returns x* 2**n  computed by  exponent
- * manipulation rather than by actually performing an
- * exponentiation or a multiplication.
- */
-
-static const double
-two54   =  1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
-  twom54  =  5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
-  hugeX   = 1.0e+300,
-  tiny   = 1.0e-300;
-
-#if !defined(AIX)
-double scalbn (double x, int n) {
-  int  k,hx,lx;
-  hx = __HI(x);
-  lx = __LO(x);
-  k = (hx&0x7ff00000)>>20;              /* extract exponent */
-  if (k==0) {                           /* 0 or subnormal x */
-    if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
-    x *= two54;
-    hx = __HI(x);
-    k = ((hx&0x7ff00000)>>20) - 54;
-    if (n< -50000) return tiny*x;       /*underflow*/
-  }
-  if (k==0x7ff) return x+x;             /* NaN or Inf */
-  k = k+n;
-  if (k >  0x7fe) return hugeX*copysign(hugeX,x); /* overflow  */
-  if (k > 0)                            /* normal result */
-    {__HI(x) = (hx&0x800fffff)|(k<<20); return x;}
-  if (k <= -54) {
-    if (n > 50000)      /* in case integer overflow in n+k */
-      return hugeX*copysign(hugeX,x);   /*overflow*/
-    else return tiny*copysign(tiny,x);  /*underflow*/
-  }
-  k += 54;                              /* subnormal result */
-  __HI(x) = (hx&0x800fffff)|(k<<20);
-  return x*twom54;
-}
-#endif
+#include "runtime/sharedRuntimeMath.hpp"
 
 /* __ieee754_log(x)
  * Return the logrithm of x
@@ -185,8 +115,8 @@
   int k,hx,i,j;
   unsigned lx;
 
-  hx = __HI(x);               /* high word of x */
-  lx = __LO(x);               /* low  word of x */
+  hx = high(x);               /* high word of x */
+  lx = low(x);                /* low  word of x */
 
   k=0;
   if (hx < 0x00100000) {                   /* x < 2**-1022  */
@@ -194,13 +124,13 @@
       return -two54/zero;             /* log(+-0)=-inf */
     if (hx<0) return (x-x)/zero;   /* log(-#) = NaN */
     k -= 54; x *= two54; /* subnormal number, scale up x */
-    hx = __HI(x);             /* high word of x */
+    hx = high(x);             /* high word of x */
   }
   if (hx >= 0x7ff00000) return x+x;
   k += (hx>>20)-1023;
   hx &= 0x000fffff;
   i = (hx+0x95f64)&0x100000;
-  __HI(x) = hx|(i^0x3ff00000);        /* normalize x or x/2 */
+  set_high(&x, hx|(i^0x3ff00000)); /* normalize x or x/2 */
   k += (i>>20);
   f = x-1.0;
   if((0x000fffff&(2+hx))<3) {  /* |f| < 2**-20 */
@@ -279,8 +209,8 @@
   int i,k,hx;
   unsigned lx;
 
-  hx = __HI(x);       /* high word of x */
-  lx = __LO(x);       /* low word of x */
+  hx = high(x);       /* high word of x */
+  lx = low(x);        /* low word of x */
 
   k=0;
   if (hx < 0x00100000) {                  /* x < 2**-1022  */
@@ -288,14 +218,14 @@
       return -two54/zero;             /* log(+-0)=-inf */
     if (hx<0) return (x-x)/zero;        /* log(-#) = NaN */
     k -= 54; x *= two54; /* subnormal number, scale up x */
-    hx = __HI(x);                /* high word of x */
+    hx = high(x);                /* high word of x */
   }
   if (hx >= 0x7ff00000) return x+x;
   k += (hx>>20)-1023;
   i  = ((unsigned)k&0x80000000)>>31;
   hx = (hx&0x000fffff)|((0x3ff-i)<<20);
   y  = (double)(k+i);
-  __HI(x) = hx;
+  set_high(&x, hx);
   z  = y*log10_2lo + ivln10*__ieee754_log(x);
   return  z+y*log10_2hi;
 }
@@ -390,14 +320,14 @@
   int k=0,xsb;
   unsigned hx;
 
-  hx  = __HI(x);        /* high word of x */
+  hx  = high(x);                /* high word of x */
   xsb = (hx>>31)&1;             /* sign bit of x */
   hx &= 0x7fffffff;             /* high word of |x| */
 
   /* filter out non-finite argument */
   if(hx >= 0x40862E42) {                        /* if |x|>=709.78... */
     if(hx>=0x7ff00000) {
-      if(((hx&0xfffff)|__LO(x))!=0)
+      if(((hx&0xfffff)|low(x))!=0)
         return x+x;             /* NaN */
       else return (xsb==0)? x:0.0;      /* exp(+-inf)={inf,0} */
     }
@@ -428,10 +358,10 @@
   if(k==0)      return one-((x*c)/(c-2.0)-x);
   else          y = one-((lo-(x*c)/(2.0-c))-hi);
   if(k >= -1021) {
-    __HI(y) += (k<<20); /* add k to y's exponent */
+    set_high(&y, high(y) + (k<<20)); /* add k to y's exponent */
     return y;
   } else {
-    __HI(y) += ((k+1000)<<20);/* add k to y's exponent */
+    set_high(&y, high(y) + ((k+1000)<<20)); /* add k to y's exponent */
     return y*twom1000;
   }
 }
@@ -518,8 +448,8 @@
   unsigned lx,ly;
 
   i0 = ((*(int*)&one)>>29)^1; i1=1-i0;
-  hx = __HI(x); lx = __LO(x);
-  hy = __HI(y); ly = __LO(y);
+  hx = high(x); lx = low(x);
+  hy = high(y); ly = low(y);
   ix = hx&0x7fffffff;  iy = hy&0x7fffffff;
 
   /* y==zero: x**0 = 1 */
@@ -619,14 +549,14 @@
     u = ivln2_h*t;      /* ivln2_h has 21 sig. bits */
     v = t*ivln2_l-w*ivln2;
     t1 = u+v;
-    __LO(t1) = 0;
+    set_low(&t1, 0);
     t2 = v-(t1-u);
   } else {
     double ss,s2,s_h,s_l,t_h,t_l;
     n = 0;
     /* take care subnormal number */
     if(ix<0x00100000)
-      {ax *= two53; n -= 53; ix = __HI(ax); }
+      {ax *= two53; n -= 53; ix = high(ax); }
     n  += ((ix)>>20)-0x3ff;
     j  = ix&0x000fffff;
     /* determine interval */
@@ -634,17 +564,17 @@
     if(j<=0x3988E) k=0;         /* |x|<sqrt(3/2) */
     else if(j<0xBB67A) k=1;     /* |x|<sqrt(3)   */
     else {k=0;n+=1;ix -= 0x00100000;}
-    __HI(ax) = ix;
+    set_high(&ax, ix);
 
     /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
     u = ax-bp[k];               /* bp[0]=1.0, bp[1]=1.5 */
     v = one/(ax+bp[k]);
     ss = u*v;
     s_h = ss;
-    __LO(s_h) = 0;
+    set_low(&s_h, 0);
     /* t_h=ax+bp[k] High */
     t_h = zeroX;
-    __HI(t_h)=((ix>>1)|0x20000000)+0x00080000+(k<<18);
+    set_high(&t_h, ((ix>>1)|0x20000000)+0x00080000+(k<<18));
     t_l = ax - (t_h-bp[k]);
     s_l = v*((u-s_h*t_h)-s_h*t_l);
     /* compute log(ax) */
@@ -653,32 +583,32 @@
     r += s_l*(s_h+ss);
     s2  = s_h*s_h;
     t_h = 3.0+s2+r;
-    __LO(t_h) = 0;
+    set_low(&t_h, 0);
     t_l = r-((t_h-3.0)-s2);
     /* u+v = ss*(1+...) */
     u = s_h*t_h;
     v = s_l*t_h+t_l*ss;
     /* 2/(3log2)*(ss+...) */
     p_h = u+v;
-    __LO(p_h) = 0;
+    set_low(&p_h, 0);
     p_l = v-(p_h-u);
     z_h = cp_h*p_h;             /* cp_h+cp_l = 2/(3*log2) */
     z_l = cp_l*p_h+p_l*cp+dp_l[k];
     /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */
     t = (double)n;
     t1 = (((z_h+z_l)+dp_h[k])+t);
-    __LO(t1) = 0;
+    set_low(&t1, 0);
     t2 = z_l-(((t1-t)-dp_h[k])-z_h);
   }
 
   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
   y1  = y;
-  __LO(y1) = 0;
+  set_low(&y1, 0);
   p_l = (y-y1)*t1+y*t2;
   p_h = y1*t1;
   z = p_l+p_h;
-  j = __HI(z);
-  i = __LO(z);
+  j = high(z);
+  i = low(z);
   if (j>=0x40900000) {                          /* z >= 1024 */
     if(((j-0x40900000)|i)!=0)                   /* if z > 1024 */
       return s*hugeX*hugeX;                     /* overflow */
@@ -702,13 +632,13 @@
     n = j+(0x00100000>>(k+1));
     k = ((n&0x7fffffff)>>20)-0x3ff;     /* new k for n */
     t = zeroX;
-    __HI(t) = (n&~(0x000fffff>>k));
+    set_high(&t, (n&~(0x000fffff>>k)));
     n = ((n&0x000fffff)|0x00100000)>>(20-k);
     if(j<0) n = -n;
     p_h -= t;
   }
   t = p_l+p_h;
-  __LO(t) = 0;
+  set_low(&t, 0);
   u = t*lg2_h;
   v = (p_l-(t-p_h))*lg2+t*lg2_l;
   z = u+v;
@@ -717,10 +647,10 @@
   t1  = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
   r  = (z*t1)/(t1-two)-(w+z*w);
   z  = one-(r-z);
-  j  = __HI(z);
+  j  = high(z);
   j += (n<<20);
-  if((j>>20)<=0) z = scalbn(z,n);       /* subnormal output */
-  else __HI(z) += (n<<20);
+  if((j>>20)<=0) z = scalbnA(z,n);       /* subnormal output */
+  else set_high(&z, high(z) + (n<<20));
   return s*z;
 }
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/sharedRuntimeTrig.cpp
--- a/src/share/vm/runtime/sharedRuntimeTrig.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/sharedRuntimeTrig.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -63,63 +63,7 @@
 #define SAFEBUF
 #endif
 
-#include <math.h>
-
-// VM_LITTLE_ENDIAN is #defined appropriately in the Makefiles
-// [jk] this is not 100% correct because the float word order may different
-// from the byte order (e.g. on ARM)
-#ifdef VM_LITTLE_ENDIAN
-# define __HI(x) *(1+(int*)&x)
-# define __LO(x) *(int*)&x
-#else
-# define __HI(x) *(int*)&x
-# define __LO(x) *(1+(int*)&x)
-#endif
-
-static double copysignA(double x, double y) {
-  __HI(x) = (__HI(x)&0x7fffffff)|(__HI(y)&0x80000000);
-  return x;
-}
-
-/*
- * scalbn (double x, int n)
- * scalbn(x,n) returns x* 2**n  computed by  exponent
- * manipulation rather than by actually performing an
- * exponentiation or a multiplication.
- */
-
-static const double
-two54   =  1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
-twom54  =  5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
-hugeX  = 1.0e+300,
-tiny   = 1.0e-300;
-
-static double scalbnA (double x, int n) {
-  int  k,hx,lx;
-  hx = __HI(x);
-  lx = __LO(x);
-  k = (hx&0x7ff00000)>>20;              /* extract exponent */
-  if (k==0) {                           /* 0 or subnormal x */
-    if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
-    x *= two54;
-    hx = __HI(x);
-    k = ((hx&0x7ff00000)>>20) - 54;
-    if (n< -50000) return tiny*x;       /*underflow*/
-  }
-  if (k==0x7ff) return x+x;             /* NaN or Inf */
-  k = k+n;
-  if (k >  0x7fe) return hugeX*copysignA(hugeX,x); /* overflow  */
-  if (k > 0)                            /* normal result */
-    {__HI(x) = (hx&0x800fffff)|(k<<20); return x;}
-  if (k <= -54) {
-    if (n > 50000)      /* in case integer overflow in n+k */
-      return hugeX*copysignA(hugeX,x);  /*overflow*/
-    else return tiny*copysignA(tiny,x); /*underflow*/
-  }
-  k += 54;                              /* subnormal result */
-  __HI(x) = (hx&0x800fffff)|(k<<20);
-  return x*twom54;
-}
+#include "runtime/sharedRuntimeMath.hpp"
 
 /*
  * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
@@ -603,7 +547,7 @@
 {
         double z,r,v;
         int ix;
-        ix = __HI(x)&0x7fffffff;        /* high word of x */
+        ix = high(x)&0x7fffffff;                /* high word of x */
         if(ix<0x3e400000)                       /* |x| < 2**-27 */
            {if((int)x==0) return x;}            /* generate inexact */
         z       =  x*x;
@@ -658,9 +602,9 @@
 
 static double __kernel_cos(double x, double y)
 {
-  double a,h,z,r,qx;
+  double a,h,z,r,qx=0;
   int ix;
-  ix = __HI(x)&0x7fffffff;      /* ix = |x|'s high word*/
+  ix = high(x)&0x7fffffff;              /* ix = |x|'s high word*/
   if(ix<0x3e400000) {                   /* if x < 2**27 */
     if(((int)x)==0) return one;         /* generate inexact */
   }
@@ -672,8 +616,8 @@
     if(ix > 0x3fe90000) {               /* x > 0.78125 */
       qx = 0.28125;
     } else {
-      __HI(qx) = ix-0x00200000; /* x/4 */
-      __LO(qx) = 0;
+      set_high(&qx, ix-0x00200000); /* x/4 */
+      set_low(&qx, 0);
     }
     h = 0.5*z-qx;
     a = one-qx;
@@ -738,11 +682,11 @@
 {
   double z,r,v,w,s;
   int ix,hx;
-  hx = __HI(x);   /* high word of x */
+  hx = high(x);           /* high word of x */
   ix = hx&0x7fffffff;     /* high word of |x| */
   if(ix<0x3e300000) {                     /* x < 2**-28 */
     if((int)x==0) {                       /* generate inexact */
-      if (((ix | __LO(x)) | (iy + 1)) == 0)
+      if (((ix | low(x)) | (iy + 1)) == 0)
         return one / fabsd(x);
       else {
         if (iy == 1)
@@ -751,10 +695,10 @@
           double a, t;
 
           z = w = x + y;
-          __LO(z) = 0;
+          set_low(&z, 0);
           v = y - (z - x);
           t = a = -one / w;
-          __LO(t) = 0;
+          set_low(&t, 0);
           s = one + t * z;
           return t + a * (s + t * v);
         }
@@ -789,10 +733,10 @@
     /*  compute -1.0/(x+r) accurately */
     double a,t;
     z  = w;
-    __LO(z) = 0;
+    set_low(&z, 0);
     v  = r-(z - x);     /* z+v = r+x */
     t = a  = -1.0/w;    /* a = -1.0/w */
-    __LO(t) = 0;
+    set_low(&t, 0);
     s  = 1.0+t*z;
     return t+a*(s+t*v);
   }
@@ -841,7 +785,7 @@
   int n, ix;
 
   /* High word of x. */
-  ix = __HI(x);
+  ix = high(x);
 
   /* |x| ~< pi/4 */
   ix &= 0x7fffffff;
@@ -899,7 +843,7 @@
   int n, ix;
 
   /* High word of x. */
-  ix = __HI(x);
+  ix = high(x);
 
   /* |x| ~< pi/4 */
   ix &= 0x7fffffff;
@@ -956,7 +900,7 @@
   int n, ix;
 
   /* High word of x. */
-  ix = __HI(x);
+  ix = high(x);
 
   /* |x| ~< pi/4 */
   ix &= 0x7fffffff;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/stubRoutines.cpp
--- a/src/share/vm/runtime/stubRoutines.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/stubRoutines.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -125,6 +125,13 @@
 address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL;
 address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;
 
+address StubRoutines::_sha1_implCompress     = NULL;
+address StubRoutines::_sha1_implCompressMB   = NULL;
+address StubRoutines::_sha256_implCompress   = NULL;
+address StubRoutines::_sha256_implCompressMB = NULL;
+address StubRoutines::_sha512_implCompress   = NULL;
+address StubRoutines::_sha512_implCompressMB = NULL;
+
 address StubRoutines::_updateBytesCRC32 = NULL;
 address StubRoutines::_crc_table_adr = NULL;
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/stubRoutines.hpp
--- a/src/share/vm/runtime/stubRoutines.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/stubRoutines.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -207,6 +207,13 @@
   static address _cipherBlockChaining_encryptAESCrypt;
   static address _cipherBlockChaining_decryptAESCrypt;
 
+  static address _sha1_implCompress;
+  static address _sha1_implCompressMB;
+  static address _sha256_implCompress;
+  static address _sha256_implCompressMB;
+  static address _sha512_implCompress;
+  static address _sha512_implCompressMB;
+
   static address _updateBytesCRC32;
   static address _crc_table_adr;
 
@@ -356,6 +363,13 @@
   static address cipherBlockChaining_encryptAESCrypt()  { return _cipherBlockChaining_encryptAESCrypt; }
   static address cipherBlockChaining_decryptAESCrypt()  { return _cipherBlockChaining_decryptAESCrypt; }
 
+  static address sha1_implCompress()     { return _sha1_implCompress; }
+  static address sha1_implCompressMB()   { return _sha1_implCompressMB; }
+  static address sha256_implCompress()   { return _sha256_implCompress; }
+  static address sha256_implCompressMB() { return _sha256_implCompressMB; }
+  static address sha512_implCompress()   { return _sha512_implCompress; }
+  static address sha512_implCompressMB() { return _sha512_implCompressMB; }
+
   static address updateBytesCRC32()    { return _updateBytesCRC32; }
   static address crc_table_addr()      { return _crc_table_adr; }
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/sweeper.cpp
--- a/src/share/vm/runtime/sweeper.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/sweeper.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -33,8 +33,10 @@
 #include "runtime/atomic.hpp"
 #include "runtime/compilationPolicy.hpp"
 #include "runtime/mutexLocker.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/os.hpp"
 #include "runtime/sweeper.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vm_operations.hpp"
 #include "trace/tracing.hpp"
 #include "utilities/events.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/thread.cpp
--- a/src/share/vm/runtime/thread.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/thread.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -58,6 +58,7 @@
 #include "runtime/memprofiler.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/objectMonitor.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "runtime/osThread.hpp"
 #include "runtime/safepoint.hpp"
 #include "runtime/sharedRuntime.hpp"
@@ -833,7 +834,7 @@
   return false;
 }
 
-void Thread::oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf) {
+void Thread::oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf) {
   active_handles()->oops_do(f);
   // Do oop for ThreadShadow
   f->do_oop((oop*)&_pending_exception);
@@ -2729,7 +2730,7 @@
   }
 };
 
-void JavaThread::oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf) {
+void JavaThread::oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf) {
   // Verify that the deferred card marks have been flushed.
   assert(deferred_card_mark().is_empty(), "Should be empty during GC");
 
@@ -3252,7 +3253,7 @@
 #endif
 }
 
-void CompilerThread::oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf) {
+void CompilerThread::oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf) {
   JavaThread::oops_do(f, cld_f, cf);
   if (_scanned_nmethod != NULL && cf != NULL) {
     // Safepoints can occur when the sweeper is scanning an nmethod so
@@ -4166,22 +4167,22 @@
 // uses the Threads_lock to gurantee this property. It also makes sure that
 // all threads gets blocked when exiting or starting).
 
-void Threads::oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf) {
+void Threads::oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf) {
   ALL_JAVA_THREADS(p) {
     p->oops_do(f, cld_f, cf);
   }
   VMThread::vm_thread()->oops_do(f, cld_f, cf);
 }
 
-void Threads::possibly_parallel_oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf) {
+void Threads::possibly_parallel_oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf) {
   // Introduce a mechanism allowing parallel threads to claim threads as
   // root groups.  Overhead should be small enough to use all the time,
   // even in sequential code.
   SharedHeap* sh = SharedHeap::heap();
   // Cannot yet substitute active_workers for n_par_threads
   // because of G1CollectedHeap::verify() use of
-  // SharedHeap::process_strong_roots().  n_par_threads == 0 will
-  // turn off parallelism in process_strong_roots while active_workers
+  // SharedHeap::process_roots().  n_par_threads == 0 will
+  // turn off parallelism in process_roots while active_workers
   // is being used for parallelism elsewhere.
   bool is_par = sh->n_par_threads() > 0;
   assert(!is_par ||
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/thread.hpp
--- a/src/share/vm/runtime/thread.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/thread.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -436,18 +436,7 @@
   jlong allocated_bytes()               { return _allocated_bytes; }
   void set_allocated_bytes(jlong value) { _allocated_bytes = value; }
   void incr_allocated_bytes(jlong size) { _allocated_bytes += size; }
-  jlong cooked_allocated_bytes() {
-    jlong allocated_bytes = OrderAccess::load_acquire(&_allocated_bytes);
-    if (UseTLAB) {
-      size_t used_bytes = tlab().used_bytes();
-      if ((ssize_t)used_bytes > 0) {
-        // More-or-less valid tlab.  The load_acquire above should ensure
-        // that the result of the add is <= the instantaneous value
-        return allocated_bytes + used_bytes;
-      }
-    }
-    return allocated_bytes;
-  }
+  inline jlong cooked_allocated_bytes();
 
   TRACE_DATA* trace_data()              { return &_trace_data; }
 
@@ -483,13 +472,13 @@
   // Apply "cld_f->do_cld" to CLDs that are otherwise not kept alive.
   //   Used by JavaThread::oops_do.
   // Apply "cf->do_code_blob" (if !NULL) to all code blobs active in frames
-  virtual void oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf);
+  virtual void oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf);
 
   // Handles the parallel case for the method below.
 private:
   bool claim_oops_do_par_case(int collection_parity);
 public:
-  // Requires that "collection_parity" is that of the current strong roots
+  // Requires that "collection_parity" is that of the current roots
   // iteration.  If "is_par" is false, sets the parity of "this" to
   // "collection_parity", and returns "true".  If "is_par" is true,
   // uses an atomic instruction to set the current threads parity to
@@ -1046,12 +1035,8 @@
 #else
   // Use membars when accessing volatile _thread_state. See
   // Threads::create_vm() for size checks.
-  JavaThreadState thread_state() const           {
-    return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state);
-  }
-  void set_thread_state(JavaThreadState s)       {
-    OrderAccess::release_store((volatile jint*)&_thread_state, (jint)s);
-  }
+  inline JavaThreadState thread_state() const;
+  inline void set_thread_state(JavaThreadState s);
 #endif
   ThreadSafepointState *safepoint_state() const  { return _safepoint_state; }
   void set_safepoint_state(ThreadSafepointState *state) { _safepoint_state = state; }
@@ -1444,7 +1429,7 @@
   void frames_do(void f(frame*, const RegisterMap*));
 
   // Memory operations
-  void oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf);
+  void oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf);
 
   // Sweeper operations
   void nmethods_do(CodeBlobClosure* cf);
@@ -1775,7 +1760,7 @@
   // clearing/querying jni attach status
   bool is_attaching_via_jni() const { return _jni_attach_state == _attaching_via_jni; }
   bool has_attached_via_jni() const { return is_attaching_via_jni() || _jni_attach_state == _attached_via_jni; }
-  void set_done_attaching_via_jni() { _jni_attach_state = _attached_via_jni; OrderAccess::fence(); }
+  inline void set_done_attaching_via_jni();
 private:
   // This field is used to determine if a thread has claimed
   // a par_id: it is UINT_MAX if the thread has not claimed a par_id;
@@ -1875,7 +1860,7 @@
   // GC support
   // Apply "f->do_oop" to all root oops in "this".
   // Apply "cf->do_code_blob" (if !NULL) to all code blobs active in frames
-  void oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf);
+  void oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf);
 
 #ifndef PRODUCT
 private:
@@ -1942,9 +1927,9 @@
 
   // Apply "f->do_oop" to all root oops in all threads.
   // This version may only be called by sequential code.
-  static void oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf);
+  static void oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf);
   // This version may be called by sequential or parallel code.
-  static void possibly_parallel_oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf);
+  static void possibly_parallel_oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf);
   // This creates a list of GCTasks, one per thread.
   static void create_thread_roots_tasks(GCTaskQueue* q);
   // This creates a list of GCTasks, one per thread, for marking objects.
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/thread.inline.hpp
--- a/src/share/vm/runtime/thread.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/thread.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -46,4 +46,32 @@
 
 #undef SHARE_VM_RUNTIME_THREAD_INLINE_HPP_SCOPE
 
+inline jlong Thread::cooked_allocated_bytes() {
+  jlong allocated_bytes = OrderAccess::load_acquire(&_allocated_bytes);
+  if (UseTLAB) {
+    size_t used_bytes = tlab().used_bytes();
+    if ((ssize_t)used_bytes > 0) {
+      // More-or-less valid tlab. The load_acquire above should ensure
+      // that the result of the add is <= the instantaneous value.
+      return allocated_bytes + used_bytes;
+    }
+  }
+  return allocated_bytes;
+}
+
+#ifdef PPC64
+inline JavaThreadState JavaThread::thread_state() const    {
+  return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state);
+}
+
+inline void JavaThread::set_thread_state(JavaThreadState s) {
+  OrderAccess::release_store((volatile jint*)&_thread_state, (jint)s);
+}
+#endif
+
+inline void JavaThread::set_done_attaching_via_jni() {
+  _jni_attach_state = _attached_via_jni;
+  OrderAccess::fence();
+}
+
 #endif // SHARE_VM_RUNTIME_THREAD_INLINE_HPP
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/vmThread.cpp
--- a/src/share/vm/runtime/vmThread.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/vmThread.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -682,7 +682,7 @@
 }
 
 
-void VMThread::oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf) {
+void VMThread::oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf) {
   Thread::oops_do(f, cld_f, cf);
   _vm_queue->oops_do(f);
 }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/runtime/vmThread.hpp
--- a/src/share/vm/runtime/vmThread.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/runtime/vmThread.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -126,7 +126,7 @@
   static VMThread* vm_thread()                    { return _vm_thread; }
 
   // GC support
-  void oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf);
+  void oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf);
 
   // Debugging
   void print_on(outputStream* st) const;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/services/management.cpp
--- a/src/share/vm/services/management.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/services/management.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -39,6 +39,7 @@
 #include "runtime/jniHandles.hpp"
 #include "runtime/os.hpp"
 #include "runtime/serviceThread.hpp"
+#include "runtime/thread.inline.hpp"
 #include "services/classLoadingService.hpp"
 #include "services/diagnosticCommand.hpp"
 #include "services/diagnosticFramework.hpp"
@@ -1823,7 +1824,7 @@
               "This flag is not writeable.");
   }
 
-  bool succeed;
+  bool succeed = false;
   if (flag->is_bool()) {
     bool bvalue = (new_value.z == JNI_TRUE ? true : false);
     succeed = CommandLineFlags::boolAtPut(name, &bvalue, Flag::MANAGEMENT);
@@ -1855,6 +1856,9 @@
     }
     ccstr svalue = java_lang_String::as_utf8_string(str);
     succeed = CommandLineFlags::ccstrAtPut(name, &svalue, Flag::MANAGEMENT);
+    if (succeed) {
+      FREE_C_HEAP_ARRAY(char, svalue, mtInternal);
+    }
   }
   assert(succeed, "Setting flag should succeed");
 JVM_END
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/services/memTracker.cpp
--- a/src/share/vm/services/memTracker.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/services/memTracker.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -29,6 +29,7 @@
 #include "runtime/mutexLocker.hpp"
 #include "runtime/safepoint.hpp"
 #include "runtime/threadCritical.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vm_operations.hpp"
 #include "services/memPtr.hpp"
 #include "services/memReporter.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/services/memoryManager.cpp
--- a/src/share/vm/services/memoryManager.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/services/memoryManager.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -28,6 +28,7 @@
 #include "oops/oop.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/javaCalls.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "services/lowMemoryDetector.hpp"
 #include "services/management.hpp"
 #include "services/memoryManager.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/services/memoryPool.cpp
--- a/src/share/vm/services/memoryPool.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/services/memoryPool.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -29,6 +29,7 @@
 #include "oops/oop.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/javaCalls.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "services/lowMemoryDetector.hpp"
 #include "services/management.hpp"
 #include "services/memoryManager.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/services/threadService.cpp
--- a/src/share/vm/services/threadService.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/services/threadService.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -33,6 +33,7 @@
 #include "runtime/init.hpp"
 #include "runtime/thread.hpp"
 #include "runtime/vframe.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vmThread.hpp"
 #include "runtime/vm_operations.hpp"
 #include "services/threadService.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/trace/tracetypes.xml
--- a/src/share/vm/trace/tracetypes.xml	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/trace/tracetypes.xml	Tue Aug 19 20:41:28 2014 +0100
@@ -98,6 +98,7 @@
       <value type="SYMBOL" field="name" label="Name"/>
       <value type="SYMBOL" field="signature" label="Signature"/>
       <value type="SHORT" field="modifiers" label="Access modifiers"/>
+      <value type="BOOLEAN" field="hidden" label="Hidden"/>
     </content_type>
 
     <content_type id="UTFConstant" hr_name="UTF constant"
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/utilities/array.hpp
--- a/src/share/vm/utilities/array.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/utilities/array.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -28,6 +28,7 @@
 #include "memory/allocation.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/metaspace.hpp"
+#include "runtime/orderAccess.hpp"
 
 // correct linkage required to compile w/o warnings
 // (must be on file level - cannot be local)
@@ -304,6 +305,7 @@
   friend class MetadataFactory;
   friend class VMStructs;
   friend class MethodHandleCompiler;           // special case
+  friend class WhiteBox;
 protected:
   int _length;                                 // the number of array elements
   T   _data[1];                                // the array memory
@@ -325,6 +327,31 @@
 
   static size_t byte_sizeof(int length) { return sizeof(Array<T>) + MAX2(length - 1, 0) * sizeof(T); }
 
+  // WhiteBox API helper.
+  // Can't distinguish between array of length 0 and length 1,
+  // will always return 0 in those cases.
+  static int bytes_to_length(size_t bytes)       {
+    assert(is_size_aligned(bytes, BytesPerWord), "Must be, for now");
+
+    if (sizeof(Array<T>) >= bytes) {
+      return 0;
+    }
+
+    size_t left = bytes - sizeof(Array<T>);
+    assert(is_size_aligned(left, sizeof(T)), "Must be");
+
+    size_t elements = left / sizeof(T);
+    assert(elements <= (size_t)INT_MAX, err_msg("number of elements " SIZE_FORMAT "doesn't fit into an int.", elements));
+
+    int length = (int)elements;
+
+    assert((size_t)size(length) * BytesPerWord == bytes,
+        err_msg("Expected: " SIZE_FORMAT " got: " SIZE_FORMAT,
+                bytes, (size_t)size(length) * BytesPerWord));
+
+    return length;
+  }
+
   explicit Array(int length) : _length(length) {
     assert(length >= 0, "illegal length");
   }
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/utilities/globalDefinitions.hpp
--- a/src/share/vm/utilities/globalDefinitions.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/utilities/globalDefinitions.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -558,6 +558,27 @@
   return fabs(value);
 }
 
+//----------------------------------------------------------------------------------------------------
+// Special casts
+// Cast floats into same-size integers and vice-versa w/o changing bit-pattern
+typedef union {
+  jfloat f;
+  jint i;
+} FloatIntConv;
+
+typedef union {
+  jdouble d;
+  jlong l;
+  julong ul;
+} DoubleLongConv;
+
+inline jint    jint_cast    (jfloat  x)  { return ((FloatIntConv*)&x)->i; }
+inline jfloat  jfloat_cast  (jint    x)  { return ((FloatIntConv*)&x)->f; }
+
+inline jlong   jlong_cast   (jdouble x)  { return ((DoubleLongConv*)&x)->l;  }
+inline julong  julong_cast  (jdouble x)  { return ((DoubleLongConv*)&x)->ul; }
+inline jdouble jdouble_cast (jlong   x)  { return ((DoubleLongConv*)&x)->d;  }
+
 inline jint low (jlong value)                    { return jint(value); }
 inline jint high(jlong value)                    { return jint(value >> 32); }
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/utilities/globalDefinitions_gcc.hpp
--- a/src/share/vm/utilities/globalDefinitions_gcc.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/utilities/globalDefinitions_gcc.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -167,17 +167,6 @@
 typedef uint32_t juint;
 typedef uint64_t julong;
 
-//----------------------------------------------------------------------------------------------------
-// Special (possibly not-portable) casts
-// Cast floats into same-size integers and vice-versa w/o changing bit-pattern
-// %%%%%% These seem like standard C++ to me--how about factoring them out? - Ungar
-
-inline jint    jint_cast   (jfloat  x)           { return *(jint*   )&x; }
-inline jlong   jlong_cast  (jdouble x)           { return *(jlong*  )&x; }
-inline julong  julong_cast (jdouble x)           { return *(julong* )&x; }
-
-inline jfloat  jfloat_cast (jint    x)           { return *(jfloat* )&x; }
-inline jdouble jdouble_cast(jlong   x)           { return *(jdouble*)&x; }
 
 //----------------------------------------------------------------------------------------------------
 // Constant for jlong (specifying an long long canstant is C++ compiler specific)
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/utilities/globalDefinitions_sparcWorks.hpp
--- a/src/share/vm/utilities/globalDefinitions_sparcWorks.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/utilities/globalDefinitions_sparcWorks.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -183,15 +183,6 @@
 typedef unsigned int       juint;
 typedef unsigned long long julong;
 
-//----------------------------------------------------------------------------------------------------
-// Special (possibly not-portable) casts
-// Cast floats into same-size integers and vice-versa w/o changing bit-pattern
-
-inline jint    jint_cast   (jfloat  x)           { return *(jint*   )&x; }
-inline jlong   jlong_cast  (jdouble x)           { return *(jlong*  )&x; }
-
-inline jfloat  jfloat_cast (jint    x)           { return *(jfloat* )&x; }
-inline jdouble jdouble_cast(jlong   x)           { return *(jdouble*)&x; }
 
 //----------------------------------------------------------------------------------------------------
 // Constant for jlong (specifying an long long constant is C++ compiler specific)
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/utilities/globalDefinitions_visCPP.hpp
--- a/src/share/vm/utilities/globalDefinitions_visCPP.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/utilities/globalDefinitions_visCPP.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -116,16 +116,6 @@
 typedef unsigned int     juint;
 typedef unsigned __int64 julong;
 
-//----------------------------------------------------------------------------------------------------
-// Special (possibly not-portable) casts
-// Cast floats into same-size integers and vice-versa w/o changing bit-pattern
-
-inline jint    jint_cast   (jfloat  x)           { return *(jint*   )&x; }
-inline jlong   jlong_cast  (jdouble x)           { return *(jlong*  )&x; }
-
-inline jfloat  jfloat_cast (jint    x)           { return *(jfloat* )&x; }
-inline jdouble jdouble_cast(jlong   x)           { return *(jdouble*)&x; }
-
 
 //----------------------------------------------------------------------------------------------------
 // Non-standard stdlib-like stuff:
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/utilities/globalDefinitions_xlc.hpp
--- a/src/share/vm/utilities/globalDefinitions_xlc.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/utilities/globalDefinitions_xlc.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -114,16 +114,6 @@
 typedef uint32_t juint;
 typedef uint64_t julong;
 
-//----------------------------------------------------------------------------------------------------
-// Special (possibly not-portable) casts
-// Cast floats into same-size integers and vice-versa w/o changing bit-pattern
-// %%%%%% These seem like standard C++ to me--how about factoring them out? - Ungar
-
-inline jint    jint_cast   (jfloat  x)           { return *(jint*   )&x; }
-inline jlong   jlong_cast  (jdouble x)           { return *(jlong*  )&x; }
-
-inline jfloat  jfloat_cast (jint    x)           { return *(jfloat* )&x; }
-inline jdouble jdouble_cast(jlong   x)           { return *(jdouble*)&x; }
 
 //----------------------------------------------------------------------------------------------------
 // Constant for jlong (specifying an long long canstant is C++ compiler specific)
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/utilities/growableArray.hpp
--- a/src/share/vm/utilities/growableArray.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/utilities/growableArray.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -147,6 +147,9 @@
   }
 };
 
+template<class E> class GrowableArrayIterator;
+template<class E, class UnaryPredicate> class GrowableArrayFilterIterator;
+
 template<class E> class GrowableArray : public GenericGrowableArray {
   friend class VMStructs;
 
@@ -243,6 +246,14 @@
     return _data[_len-1];
   }
 
+  GrowableArrayIterator<E> begin() const {
+    return GrowableArrayIterator<E>(this, 0);
+  }
+
+  GrowableArrayIterator<E> end() const {
+    return GrowableArrayIterator<E>(this, length());
+  }
+
   void push(const E& elem) { append(elem); }
 
   E pop() {
@@ -412,4 +423,83 @@
     tty->print("}\n");
 }
 
+// Custom STL-style iterator to iterate over GrowableArrays
+// It is constructed by invoking GrowableArray::begin() and GrowableArray::end()
+template<class E> class GrowableArrayIterator : public StackObj {
+  friend class GrowableArray<E>;
+  template<class F, class UnaryPredicate> friend class GrowableArrayFilterIterator;
+
+ private:
+  const GrowableArray<E>* _array; // GrowableArray we iterate over
+  int _position;                  // The current position in the GrowableArray
+
+  // Private constructor used in GrowableArray::begin() and GrowableArray::end()
+  GrowableArrayIterator(const GrowableArray<E>* array, int position) : _array(array), _position(position) {
+    assert(0 <= position && position <= _array->length(), "illegal position");
+  }
+
+ public:
+  GrowableArrayIterator<E>& operator++()  { ++_position; return *this; }
+  E operator*()                           { return _array->at(_position); }
+
+  bool operator==(const GrowableArrayIterator<E>& rhs)  {
+    assert(_array == rhs._array, "iterator belongs to different array");
+    return _position == rhs._position;
+  }
+
+  bool operator!=(const GrowableArrayIterator<E>& rhs)  {
+    assert(_array == rhs._array, "iterator belongs to different array");
+    return _position != rhs._position;
+  }
+};
+
+// Custom STL-style iterator to iterate over elements of a GrowableArray that satisfy a given predicate
+template<class E, class UnaryPredicate> class GrowableArrayFilterIterator : public StackObj {
+  friend class GrowableArray<E>;
+
+ private:
+  const GrowableArray<E>* _array;   // GrowableArray we iterate over
+  int _position;                    // Current position in the GrowableArray
+  UnaryPredicate _predicate;        // Unary predicate the elements of the GrowableArray should satisfy
+
+ public:
+  GrowableArrayFilterIterator(const GrowableArrayIterator<E>& begin, UnaryPredicate filter_predicate)
+   : _array(begin._array), _position(begin._position), _predicate(filter_predicate) {
+    // Advance to first element satisfying the predicate
+    while(_position != _array->length() && !_predicate(_array->at(_position))) {
+      ++_position;
+    }
+  }
+
+  GrowableArrayFilterIterator<E, UnaryPredicate>& operator++() {
+    do {
+      // Advance to next element satisfying the predicate
+      ++_position;
+    } while(_position != _array->length() && !_predicate(_array->at(_position)));
+    return *this;
+  }
+
+  E operator*()   { return _array->at(_position); }
+
+  bool operator==(const GrowableArrayIterator<E>& rhs)  {
+    assert(_array == rhs._array, "iterator belongs to different array");
+    return _position == rhs._position;
+  }
+
+  bool operator!=(const GrowableArrayIterator<E>& rhs)  {
+    assert(_array == rhs._array, "iterator belongs to different array");
+    return _position != rhs._position;
+  }
+
+  bool operator==(const GrowableArrayFilterIterator<E, UnaryPredicate>& rhs)  {
+    assert(_array == rhs._array, "iterator belongs to different array");
+    return _position == rhs._position;
+  }
+
+  bool operator!=(const GrowableArrayFilterIterator<E, UnaryPredicate>& rhs)  {
+    assert(_array == rhs._array, "iterator belongs to different array");
+    return _position != rhs._position;
+  }
+};
+
 #endif // SHARE_VM_UTILITIES_GROWABLEARRAY_HPP
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/utilities/hashtable.inline.hpp
--- a/src/share/vm/utilities/hashtable.inline.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/utilities/hashtable.inline.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -26,6 +26,7 @@
 #define SHARE_VM_UTILITIES_HASHTABLE_INLINE_HPP
 
 #include "memory/allocation.inline.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "utilities/hashtable.hpp"
 #include "utilities/dtrace.hpp"
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/utilities/ostream.cpp
--- a/src/share/vm/utilities/ostream.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/utilities/ostream.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "compiler/compileLog.hpp"
+#include "gc_implementation/shared/gcId.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/arguments.hpp"
 #include "utilities/defaultStream.hpp"
@@ -240,6 +241,14 @@
   return;
 }
 
+void outputStream::gclog_stamp(const GCId& gc_id) {
+  date_stamp(PrintGCDateStamps);
+  stamp(PrintGCTimeStamps);
+  if (PrintGCID) {
+    print("#%u: ", gc_id.id());
+  }
+}
+
 outputStream& outputStream::indent() {
   while (_position < _indentation) sp();
   return *this;
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/utilities/ostream.hpp
--- a/src/share/vm/utilities/ostream.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/utilities/ostream.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -28,6 +28,7 @@
 #include "memory/allocation.hpp"
 #include "runtime/timer.hpp"
 
+class GCId;
 DEBUG_ONLY(class ResourceMark;)
 
 // Output streams for printing
@@ -107,6 +108,7 @@
    void date_stamp(bool guard) {
      date_stamp(guard, "", ": ");
    }
+   void gclog_stamp(const GCId& gc_id);
 
    // portable printing of 64 bit integers
    void print_jlong(jlong value);
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/utilities/taskqueue.hpp
--- a/src/share/vm/utilities/taskqueue.hpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/utilities/taskqueue.hpp	Tue Aug 19 20:41:28 2014 +0100
@@ -28,40 +28,8 @@
 #include "memory/allocation.hpp"
 #include "memory/allocation.inline.hpp"
 #include "runtime/mutex.hpp"
+#include "runtime/orderAccess.inline.hpp"
 #include "utilities/stack.hpp"
-#ifdef TARGET_OS_ARCH_linux_x86
-# include "orderAccess_linux_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_sparc
-# include "orderAccess_linux_sparc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_zero
-# include "orderAccess_linux_zero.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_solaris_x86
-# include "orderAccess_solaris_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_solaris_sparc
-# include "orderAccess_solaris_sparc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_windows_x86
-# include "orderAccess_windows_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_arm
-# include "orderAccess_linux_arm.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_linux_ppc
-# include "orderAccess_linux_ppc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_aix_ppc
-# include "orderAccess_aix_ppc.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_bsd_x86
-# include "orderAccess_bsd_x86.inline.hpp"
-#endif
-#ifdef TARGET_OS_ARCH_bsd_zero
-# include "orderAccess_bsd_zero.inline.hpp"
-#endif
 
 // Simple TaskQueue stats that are collected by default in debug builds.
 
diff -r f06c7b654d63 -r 03c5d509a811 src/share/vm/utilities/vmError.cpp
--- a/src/share/vm/utilities/vmError.cpp	Thu Jul 31 09:58:53 2014 +0100
+++ b/src/share/vm/utilities/vmError.cpp	Tue Aug 19 20:41:28 2014 +0100
@@ -30,7 +30,7 @@
 #include "runtime/frame.inline.hpp"
 #include "runtime/init.hpp"
 #include "runtime/os.hpp"
-#include "runtime/thread.hpp"
+#include "runtime/thread.inline.hpp"
 #include "runtime/vmThread.hpp"
 #include "runtime/vm_operations.hpp"
 #include "services/memTracker.hpp"
diff -r f06c7b654d63 -r 03c5d509a811 test/Makefile
--- a/test/Makefile	Thu Jul 31 09:58:53 2014 +0100
+++ b/test/Makefile	Tue Aug 19 20:41:28 2014 +0100
@@ -147,6 +147,11 @@
 all: jtreg_tests
 	@$(ECHO) "Testing completed successfully"
 
+# Support "hotspot_" prefixed test make targets too
+# The hotspot_% targets are for example invoked by the top level Makefile
+hotspot_%:
+	$(MAKE) $*
+
 # Prep for output
 prep: clean
 	@$(MKDIR) -p $(ABS_TEST_OUTPUT_DIR)
diff -r f06c7b654d63 -r 03c5d509a811 test/TEST.groups
--- a/test/TEST.groups	Thu Jul 31 09:58:53 2014 +0100
+++ b/test/TEST.groups	Tue Aug 19 20:41:28 2014 +0100
@@ -169,6 +169,8 @@
 #
 needs_full_vm_compact1 = \
   runtime/NMT \
+  gc/class_unloading/TestCMSClassUnloadingDisabledHWM.java \
+  gc/class_unloading/TestG1ClassUnloadingHWM.java \
   gc/g1/TestRegionAlignment.java \
   gc/g1/TestShrinkToOneRegion.java \
   gc/metaspace/G1AddMetaspaceDependency.java \
diff -r f06c7b654d63 -r 03c5d509a811 test/compiler/classUnloading/methodUnloading/TestMethodUnloading.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/classUnloading/methodUnloading/TestMethodUnloading.java	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import sun.hotspot.WhiteBox;
+
+import java.lang.reflect.Method;
+import java.net.URL;
+import java.net.URLClassLoader;
+
+/*
+ * @test MethodUnloadingTest
+ * @bug 8029443
+ * @summary "Tests the unloading of methods to to class unloading"
+ * @library /testlibrary /testlibrary/whitebox
+ * @build TestMethodUnloading
+ * @build WorkerClass
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:-BackgroundCompilation -XX:-UseCompressedOops -XX:+UseParallelGC -XX:CompileOnly=TestMethodUnloading::doWork TestMethodUnloading
+ */
+public class TestMethodUnloading {
+    private static final String workerClassName = "WorkerClass";
+    private static int work = -1;
+
+    private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
+    private static int COMP_LEVEL_SIMPLE = 1;
+    private static int COMP_LEVEL_FULL_OPTIMIZATION = 4;
+
+    /**
+     * Does some work by either using the workerClass or locally producing values.
+     * @param workerClass Class performing some work (will be unloaded)
+     * @param useWorker If true the workerClass is used
+     */
+    static private void doWork(Class<?> workerClass, boolean useWorker) throws InstantiationException, IllegalAccessException {
+        if (useWorker) {
+            // Create a new instance
+            Object worker = workerClass.newInstance();
+            // We would like to call a method of WorkerClass here but we cannot cast to WorkerClass
+            // because the class was loaded by a different class loader. One solution would be to use
+            // reflection but since we want C2 to implement the call as an optimized IC we call
+            // Object::hashCode() here which actually calls WorkerClass::hashCode().
+            // C2 will then implement this call as an optimized IC that points to a to-interpreter stub
+            // referencing the Method* for WorkerClass::hashCode().
+            work = worker.hashCode();
+            if (work != 42) {
+                new RuntimeException("Work not done");
+            }
+        } else {
+            // Do some important work here
+            work = 1;
+        }
+    }
+
+    /**
+     * Makes sure that method is compiled by forcing compilation if not yet compiled.
+     * @param m Method to be checked
+     */
+    static private void makeSureIsCompiled(Method m) {
+        // Make sure background compilation is disabled
+        if (WHITE_BOX.getBooleanVMFlag("BackgroundCompilation")) {
+            throw new RuntimeException("Background compilation enabled");
+        }
+
+        // Check if already compiled
+        if (!WHITE_BOX.isMethodCompiled(m)) {
+            // If not, try to compile it with C2
+            if(!WHITE_BOX.enqueueMethodForCompilation(m, COMP_LEVEL_FULL_OPTIMIZATION)) {
+                // C2 compiler not available, try to compile with C1
+                WHITE_BOX.enqueueMethodForCompilation(m, COMP_LEVEL_SIMPLE);
+            }
+            // Because background compilation is disabled, method should now be compiled
+            if(!WHITE_BOX.isMethodCompiled(m)) {
+                throw new RuntimeException(m + " not compiled");
+            }
+        }
+    }
+
+    /**
+     * This test creates stale Method* metadata in a to-interpreter stub of an optimized IC.
+     *
+     * The following steps are performed:
+     * (1) A workerClass is loaded by a custom class loader
+     * (2) The method doWork that calls a method of the workerClass is compiled. The call
+     *     is implemented as an optimized IC calling a to-interpreted stub. The to-interpreter
+     *     stub contains a Method* to a workerClass method.
+     * (3) Unloading of the workerClass is enforced. The to-interpreter stub now contains a dead Method*.
+     * (4) Depending on the implementation of the IC, the compiled version of doWork should still be
+     *     valid. We call it again without using the workerClass.
+     */
+    static public void main(String[] args) throws Exception {
+        // (1) Create a custom class loader with no parent class loader
+        URL url = TestMethodUnloading.class.getProtectionDomain().getCodeSource().getLocation();
+        URLClassLoader loader = new URLClassLoader(new URL[] {url}, null);
+
+        // Load worker class with custom class loader
+        Class<?> workerClass = Class.forName(workerClassName, true, loader);
+
+        // (2) Make sure all paths of doWork are profiled and compiled
+        for (int i = 0; i < 100000; ++i) {
+            doWork(workerClass, true);
+            doWork(workerClass, false);
+        }
+
+        // Make sure doWork is compiled now
+        Method doWork = TestMethodUnloading.class.getDeclaredMethod("doWork", Class.class, boolean.class);
+        makeSureIsCompiled(doWork);
+
+        // (3) Throw away class loader and reference to workerClass to allow unloading
+        loader.close();
+        loader = null;
+        workerClass = null;
+
+        // Force garbage collection to trigger unloading of workerClass
+        // Dead reference to WorkerClass::hashCode triggers JDK-8029443
+        WHITE_BOX.fullGC();
+
+        // (4) Depending on the implementation of the IC, the compiled version of doWork
+        // may still be valid here. Execute it without a workerClass.
+        doWork(null, false);
+        if (work != 1) {
+            throw new RuntimeException("Work not done");
+        }
+
+        doWork(Object.class, false);
+    }
+}
diff -r f06c7b654d63 -r 03c5d509a811 test/compiler/classUnloading/methodUnloading/WorkerClass.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/classUnloading/methodUnloading/WorkerClass.java	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * Worker class that is dynamically loaded/unloaded by TestMethodUnloading.
+ */
+public class WorkerClass {
+    /**
+     * We override hashCode here to be able to access this implementation
+     * via an Object reference (we cannot cast to WorkerClass).
+     */
+    @Override
+    public int hashCode() {
+        return 42;
+    }
+}
+
diff -r f06c7b654d63 -r 03c5d509a811 test/compiler/intrinsics/sha/TestSHA.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/sha/TestSHA.java	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8035968
+ * @summary C2 support for SHA on SPARC
+ *
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1   TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-224 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-256 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-384 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-512 TestSHA
+ *
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1   -Doffset=1 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-224 -Doffset=1 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-256 -Doffset=1 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-384 -Doffset=1 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-512 -Doffset=1 TestSHA
+ *
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1   -Dalgorithm2=SHA-256 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1   -Dalgorithm2=SHA-512 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-256 -Dalgorithm2=SHA-512 TestSHA
+ *
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1   -Dalgorithm2=MD5     TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=MD5     -Dalgorithm2=SHA-1   TestSHA
+ */
+
+import java.security.MessageDigest;
+import java.util.Arrays;
+
+public class TestSHA {
+    private static final int HASH_LEN = 64; /* up to 512-bit */
+    private static final int ALIGN = 8;     /* for different data alignments */
+
+    public static void main(String[] args) throws Exception {
+        String provider = System.getProperty("provider", "SUN");
+        String algorithm = System.getProperty("algorithm", "SHA-1");
+        String algorithm2 = System.getProperty("algorithm2", "");
+        int msgSize = Integer.getInteger("msgSize", 1024);
+        int offset = Integer.getInteger("offset", 0)  % ALIGN;
+        int iters = (args.length > 0 ? Integer.valueOf(args[0]) : 100000);
+        int warmupIters = (args.length > 1 ? Integer.valueOf(args[1]) : 20000);
+
+        testSHA(provider, algorithm, msgSize, offset, iters, warmupIters);
+
+        if (algorithm2.equals("") == false) {
+            testSHA(provider, algorithm2, msgSize, offset, iters, warmupIters);
+        }
+    }
+
+    static void testSHA(String provider, String algorithm, int msgSize,
+                        int offset, int iters, int warmupIters) throws Exception {
+        System.out.println("provider = " + provider);
+        System.out.println("algorithm = " + algorithm);
+        System.out.println("msgSize = " + msgSize + " bytes");
+        System.out.println("offset = " + offset);
+        System.out.println("iters = " + iters);
+
+        byte[] expectedHash = new byte[HASH_LEN];
+        byte[] hash = new byte[HASH_LEN];
+        byte[] data = new byte[msgSize + offset];
+        for (int i = 0; i < (msgSize + offset); i++) {
+            data[i] = (byte)(i & 0xff);
+        }
+
+        try {
+            MessageDigest sha = MessageDigest.getInstance(algorithm, provider);
+
+            /* do once, which doesn't use intrinsics */
+            sha.reset();
+            sha.update(data, offset, msgSize);
+            expectedHash = sha.digest();
+
+            /* warm up */
+            for (int i = 0; i < warmupIters; i++) {
+                sha.reset();
+                sha.update(data, offset, msgSize);
+                hash = sha.digest();
+            }
+
+            /* check result */
+            if (Arrays.equals(hash, expectedHash) == false) {
+                System.out.println("TestSHA Error: ");
+                showArray(expectedHash, "expectedHash");
+                showArray(hash,         "computedHash");
+                //System.exit(1);
+                throw new Exception("TestSHA Error");
+            } else {
+                showArray(hash, "hash");
+            }
+
+            /* measure performance */
+            long start = System.nanoTime();
+            for (int i = 0; i < iters; i++) {
+                sha.reset();
+                sha.update(data, offset, msgSize);
+                hash = sha.digest();
+            }
+            long end = System.nanoTime();
+            double total = (double)(end - start)/1e9;         /* in seconds */
+            double thruput = (double)msgSize*iters/1e6/total; /* in MB/s */
+            System.out.println("TestSHA runtime = " + total + " seconds");
+            System.out.println("TestSHA throughput = " + thruput + " MB/s");
+            System.out.println();
+        } catch (Exception e) {
+            System.out.println("Exception: " + e);
+            //System.exit(1);
+            throw new Exception(e);
+        }
+    }
+
+    static void showArray(byte b[], String name) {
+        System.out.format("%s [%d]: ", name, b.length);
+        for (int i = 0; i < Math.min(b.length, HASH_LEN); i++) {
+            System.out.format("%02x ", b[i] & 0xff);
+        }
+        System.out.println();
+    }
+}
diff -r f06c7b654d63 -r 03c5d509a811 test/compiler/osr/TestOSRWithNonEmptyStack.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/osr/TestOSRWithNonEmptyStack.java	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+
+import jdk.internal.org.objectweb.asm.ClassWriter;
+import jdk.internal.org.objectweb.asm.Label;
+import jdk.internal.org.objectweb.asm.MethodVisitor;
+import static jdk.internal.org.objectweb.asm.Opcodes.*;
+
+/**
+ * @test
+ * @bug 8051344
+ * @summary Force OSR compilation with non-empty stack at the OSR entry point.
+ * @compile -XDignore.symbol.file TestOSRWithNonEmptyStack.java
+ * @run main/othervm -XX:CompileOnly=TestCase.test TestOSRWithNonEmptyStack
+ */
+public class TestOSRWithNonEmptyStack extends ClassLoader {
+    private static final int CLASS_FILE_VERSION = 52;
+    private static final String CLASS_NAME = "TestCase";
+    private static final String METHOD_NAME = "test";
+    private static final int ITERATIONS = 1_000_000;
+
+    private static byte[] generateTestClass() {
+        ClassWriter cw = new ClassWriter(ClassWriter.COMPUTE_FRAMES);
+
+        cw.visit(TestOSRWithNonEmptyStack.CLASS_FILE_VERSION, ACC_PUBLIC,
+                TestOSRWithNonEmptyStack.CLASS_NAME, null, "java/lang/Object",
+                null);
+
+        TestOSRWithNonEmptyStack.generateConstructor(cw);
+        TestOSRWithNonEmptyStack.generateTestMethod(cw);
+
+        cw.visitEnd();
+        return cw.toByteArray();
+    }
+
+    private static void generateConstructor(ClassWriter classWriter) {
+        MethodVisitor mv = classWriter.visitMethod(ACC_PUBLIC, "<init>", "()V",
+                null, null);
+
+        mv.visitCode();
+
+        mv.visitVarInsn(ALOAD, 0);
+        mv.visitMethodInsn(INVOKESPECIAL, "java/lang/Object", "<init>", "()V",
+                false);
+        mv.visitInsn(RETURN);
+
+        mv.visitMaxs(0, 0);
+        mv.visitEnd();
+    }
+
+    private static void generateTestMethod(ClassWriter classWriter) {
+        MethodVisitor mv = classWriter.visitMethod(ACC_PUBLIC,
+                TestOSRWithNonEmptyStack.METHOD_NAME, "()V", null, null);
+        Label osrEntryPoint = new Label();
+
+        mv.visitCode();
+        // Push 'this' into stack before OSR entry point to bail out compilation
+        mv.visitVarInsn(ALOAD, 0);
+        // Setup loop counter
+        mv.visitInsn(ICONST_0);
+        mv.visitVarInsn(ISTORE, 1);
+        // Begin loop
+        mv.visitLabel(osrEntryPoint);
+        // Increment loop counter
+        mv.visitVarInsn(ILOAD, 1);
+        mv.visitInsn(ICONST_1);
+        mv.visitInsn(IADD);
+        // Duplicate it for loop condition check
+        mv.visitInsn(DUP);
+        mv.visitVarInsn(ISTORE, 1);
+        // Check loop condition
+        mv.visitLdcInsn(TestOSRWithNonEmptyStack.ITERATIONS);
+        mv.visitJumpInsn(IF_ICMPLT, osrEntryPoint);
+        // Pop 'this'.
+        mv.visitInsn(POP);
+        mv.visitInsn(RETURN);
+
+        mv.visitMaxs(0, 0);
+        mv.visitEnd();
+    }
+
+    private void run() {
+        byte[] bytecode = TestOSRWithNonEmptyStack.generateTestClass();
+
+        try {
+            Class klass = defineClass(TestOSRWithNonEmptyStack.CLASS_NAME,
+                    bytecode, 0, bytecode.length);
+
+            Constructor ctor = klass.getConstructor();
+            Method method = klass.getDeclaredMethod(
+                    TestOSRWithNonEmptyStack.METHOD_NAME);
+
+            Object testCase = ctor.newInstance();
+            method.invoke(testCase);
+        } catch (Exception e) {
+            throw new RuntimeException(
+                    "Test bug: generated class should be valid.", e);
+        }
+    }
+
+    public static void main(String args[]) {
+        new TestOSRWithNonEmptyStack().run();
+    }
+}
diff -r f06c7b654d63 -r 03c5d509a811 test/compiler/rtm/cli/TestRTMRetryCountOption.java
--- a/test/compiler/rtm/cli/TestRTMRetryCountOption.java	Thu Jul 31 09:58:53 2014 +0100
+++ b/test/compiler/rtm/cli/TestRTMRetryCountOption.java	Tue Aug 19 20:41:28 2014 +0100
@@ -35,7 +35,7 @@
     private static final String DEFAULT_VALUE = "5";
 
     private TestRTMRetryCountOption() {
-        super(Boolean.TRUE::booleanValue, "RTMRetryCount", false, true,
+        super(Boolean.TRUE::booleanValue, "RTMRetryCount", false, false,
                 TestRTMRetryCountOption.DEFAULT_VALUE,
                 "0", "10", "100", "1000");
     }
diff -r f06c7b654d63 -r 03c5d509a811 test/compiler/rtm/cli/TestUseRTMDeoptOptionOnSupportedConfig.java
--- a/test/compiler/rtm/cli/TestUseRTMDeoptOptionOnSupportedConfig.java	Thu Jul 31 09:58:53 2014 +0100
+++ b/test/compiler/rtm/cli/TestUseRTMDeoptOptionOnSupportedConfig.java	Tue Aug 19 20:41:28 2014 +0100
@@ -50,38 +50,25 @@
 
     @Override
     public void runTestCases() throws Throwable {
-        String experimentalOptionError
-                = CommandLineOptionTest.getExperimentalOptionErrorMessage(
-                "UseRTMDeopt");
-        // verify that option is experimental
+        // verify that option could be turned on
+        CommandLineOptionTest.verifySameJVMStartup(
+                null, null, ExitCode.OK, "-XX:+UseRTMDeopt");
+        // verify that option could be turned off
         CommandLineOptionTest.verifySameJVMStartup(
-                new String[] { experimentalOptionError }, null, ExitCode.FAIL,
-                "-XX:+UseRTMDeopt");
-        // verify that option could be turned on
-        CommandLineOptionTest.verifySameJVMStartup(null, null, ExitCode.OK,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                "-XX:+UseRTMDeopt");
-        // verify that option could be turned off
-        CommandLineOptionTest.verifySameJVMStartup(null, null, ExitCode.OK,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                "-XX:-UseRTMDeopt");
+                null, null, ExitCode.OK, "-XX:-UseRTMDeopt");
+        // verify default value
+        CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMDeopt",
+                TestUseRTMDeoptOptionOnSupportedConfig.DEFAULT_VALUE);
         // verify default value
         CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMDeopt",
                 TestUseRTMDeoptOptionOnSupportedConfig.DEFAULT_VALUE,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS);
-        // verify default value
-        CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMDeopt",
-                TestUseRTMDeoptOptionOnSupportedConfig.DEFAULT_VALUE,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
                 "-XX:+UseRTMLocking");
         // verify that option is off when UseRTMLocking is off
-        CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMDeopt", "false",
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                "-XX:-UseRTMLocking", "-XX:+UseRTMDeopt");
+        CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMDeopt",
+                "false", "-XX:-UseRTMLocking", "-XX:+UseRTMDeopt");
         // verify that option could be turned on
-        CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMDeopt", "true",
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                "-XX:+UseRTMLocking", "-XX:+UseRTMDeopt");
+        CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMDeopt",
+                "true", "-XX:+UseRTMLocking", "-XX:+UseRTMDeopt");
     }
 
     public static void main(String args[]) throws Throwable {
diff -r f06c7b654d63 -r 03c5d509a811 test/compiler/rtm/cli/TestUseRTMDeoptOptionOnUnsupportedConfig.java
--- a/test/compiler/rtm/cli/TestUseRTMDeoptOptionOnUnsupportedConfig.java	Thu Jul 31 09:58:53 2014 +0100
+++ b/test/compiler/rtm/cli/TestUseRTMDeoptOptionOnUnsupportedConfig.java	Tue Aug 19 20:41:28 2014 +0100
@@ -48,7 +48,7 @@
     private TestUseRTMDeoptOptionOnUnsupportedConfig() {
         super(new NotPredicate(new AndPredicate(new SupportedCPU(),
                         new SupportedVM())),
-                "UseRTMDeopt", true, true,
+                "UseRTMDeopt", true, false,
                 TestUseRTMDeoptOptionOnUnsupportedConfig.DEFAULT_VALUE, "true");
     }
 
@@ -57,14 +57,11 @@
         super.verifyJVMStartup();
         // verify default value
         CommandLineOptionTest.verifyOptionValueForSameVM(optionName,
-                defaultValue,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS);
+                defaultValue);
         // verify that until RTMLocking is not used, value
         // will be set to default false.
         CommandLineOptionTest.verifyOptionValueForSameVM(optionName,
-                defaultValue,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                "-XX:+UseRTMDeopt");
+                defaultValue, "-XX:+UseRTMDeopt");
     }
 
     public static void main(String args[]) throws Throwable {
diff -r f06c7b654d63 -r 03c5d509a811 test/compiler/rtm/cli/TestUseRTMLockingOptionOnSupportedConfig.java
--- a/test/compiler/rtm/cli/TestUseRTMLockingOptionOnSupportedConfig.java	Thu Jul 31 09:58:53 2014 +0100
+++ b/test/compiler/rtm/cli/TestUseRTMLockingOptionOnSupportedConfig.java	Tue Aug 19 20:41:28 2014 +0100
@@ -51,43 +51,31 @@
     @Override
     public void runTestCases() throws Throwable {
         String unrecongnizedOption
-                = CommandLineOptionTest.getUnrecognizedOptionErrorMessage(
-                "UseRTMLocking");
-        String experimentalOptionError
-                = CommandLineOptionTest.getExperimentalOptionErrorMessage(
+                =  CommandLineOptionTest.getUnrecognizedOptionErrorMessage(
                 "UseRTMLocking");
-        // verify that options is experimental
-        CommandLineOptionTest.verifySameJVMStartup(
-                new String[] { experimentalOptionError }, null, ExitCode.FAIL,
-                "-XX:+UseRTMLocking");
         // verify that there are no warning or error in VM output
         CommandLineOptionTest.verifySameJVMStartup(null,
                 new String[]{
                         RTMGenericCommandLineOptionTest.RTM_INSTR_ERROR,
                         unrecongnizedOption
-                }, ExitCode.OK,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                "-XX:+UseRTMLocking");
+                }, ExitCode.OK, "-XX:+UseRTMLocking"
+        );
 
         CommandLineOptionTest.verifySameJVMStartup(null,
                 new String[]{
                         RTMGenericCommandLineOptionTest.RTM_INSTR_ERROR,
                         unrecongnizedOption
-                }, ExitCode.OK,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                "-XX:-UseRTMLocking");
+                }, ExitCode.OK, "-XX:-UseRTMLocking"
+        );
         // verify that UseRTMLocking is of by default
         CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMLocking",
-                TestUseRTMLockingOptionOnSupportedConfig.DEFAULT_VALUE,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS);
+                TestUseRTMLockingOptionOnSupportedConfig.DEFAULT_VALUE);
         // verify that we can change UseRTMLocking value
         CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMLocking",
                 TestUseRTMLockingOptionOnSupportedConfig.DEFAULT_VALUE,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
                 "-XX:-UseRTMLocking");
         CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMLocking",
-                "true", CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                "-XX:+UseRTMLocking");
+                "true", "-XX:+UseRTMLocking");
     }
 
     public static void main(String args[]) throws Throwable {
diff -r f06c7b654d63 -r 03c5d509a811 test/compiler/rtm/cli/TestUseRTMLockingOptionOnUnsupportedCPU.java
--- a/test/compiler/rtm/cli/TestUseRTMLockingOptionOnUnsupportedCPU.java	Thu Jul 31 09:58:53 2014 +0100
+++ b/test/compiler/rtm/cli/TestUseRTMLockingOptionOnUnsupportedCPU.java	Tue Aug 19 20:41:28 2014 +0100
@@ -63,9 +63,7 @@
             CommandLineOptionTest.verifySameJVMStartup(
                     new String[] { errorMessage },
                     new String[] { unrecongnizedOption },
-                    ExitCode.FAIL,
-                    CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                    "-XX:+UseRTMLocking");
+                    ExitCode.FAIL, "-XX:+UseRTMLocking");
             // verify that we can pass -UseRTMLocking without
             // getting any error messages
             CommandLineOptionTest.verifySameJVMStartup(
@@ -73,27 +71,20 @@
                     new String[]{
                             errorMessage,
                             unrecongnizedOption
-                    }, ExitCode.OK,
-                    CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                    "-XX:-UseRTMLocking");
+                    }, ExitCode.OK, "-XX:-UseRTMLocking");
 
             // verify that UseRTMLocking is false by default
             CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMLocking",
-                    TestUseRTMLockingOptionOnUnsupportedCPU.DEFAULT_VALUE,
-                    CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS);
+                    TestUseRTMLockingOptionOnUnsupportedCPU.DEFAULT_VALUE);
         } else {
             // verify that on non-x86 CPUs RTMLocking could not be used
             CommandLineOptionTest.verifySameJVMStartup(
                     new String[] { unrecongnizedOption },
-                    null, ExitCode.FAIL,
-                    CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                    "-XX:+UseRTMLocking");
+                    null, ExitCode.FAIL, "-XX:+UseRTMLocking");
 
             CommandLineOptionTest.verifySameJVMStartup(
                     new String[] { unrecongnizedOption },
-                    null, ExitCode.FAIL,
-                    CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                    "-XX:-UseRTMLocking");
+                    null, ExitCode.FAIL, "-XX:-UseRTMLocking");
         }
     }
 
diff -r f06c7b654d63 -r 03c5d509a811 test/compiler/rtm/cli/TestUseRTMLockingOptionOnUnsupportedVM.java
--- a/test/compiler/rtm/cli/TestUseRTMLockingOptionOnUnsupportedVM.java	Thu Jul 31 09:58:53 2014 +0100
+++ b/test/compiler/rtm/cli/TestUseRTMLockingOptionOnUnsupportedVM.java	Tue Aug 19 20:41:28 2014 +0100
@@ -53,27 +53,17 @@
     public void runTestCases() throws Throwable {
         String errorMessage
                 = RTMGenericCommandLineOptionTest.RTM_UNSUPPORTED_VM_ERROR;
-        String experimentalOptionError
-                = CommandLineOptionTest.getExperimentalOptionErrorMessage(
-                "UseRTMLocking");
-        // verify that options is experimental
-        CommandLineOptionTest.verifySameJVMStartup(
-                new String[] { experimentalOptionError }, null, ExitCode.FAIL,
-                "-XX:+UseRTMLocking");
         // verify that we can't use +UseRTMLocking
         CommandLineOptionTest.verifySameJVMStartup(
                 new String[] { errorMessage }, null, ExitCode.FAIL,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
                 "-XX:+UseRTMLocking");
         // verify that we can turn it off
         CommandLineOptionTest.verifySameJVMStartup(null,
                 new String[] { errorMessage }, ExitCode.OK,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
                 "-XX:-UseRTMLocking");
         // verify that it is off by default
         CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMLocking",
-                TestUseRTMLockingOptionOnUnsupportedVM.DEFAULT_VALUE,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS);
+                TestUseRTMLockingOptionOnUnsupportedVM.DEFAULT_VALUE);
     }
 
     public static void main(String args[]) throws Throwable {
diff -r f06c7b654d63 -r 03c5d509a811 test/compiler/rtm/cli/TestUseRTMLockingOptionWithBiasedLocking.java
--- a/test/compiler/rtm/cli/TestUseRTMLockingOptionWithBiasedLocking.java	Thu Jul 31 09:58:53 2014 +0100
+++ b/test/compiler/rtm/cli/TestUseRTMLockingOptionWithBiasedLocking.java	Tue Aug 19 20:41:28 2014 +0100
@@ -53,22 +53,18 @@
         // verify that we will not get a warning
         CommandLineOptionTest.verifySameJVMStartup(null,
                 new String[] { warningMessage }, ExitCode.OK,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
                 "-XX:+UseRTMLocking", "-XX:-UseBiasedLocking");
         // verify that we will get a warning
         CommandLineOptionTest.verifySameJVMStartup(
                 new String[] { warningMessage }, null, ExitCode.OK,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
                 "-XX:+UseRTMLocking", "-XX:+UseBiasedLocking");
         // verify that UseBiasedLocking is false when we use rtm locking
         CommandLineOptionTest.verifyOptionValueForSameVM("UseBiasedLocking",
-                "false", CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                "-XX:+UseRTMLocking");
+                "false", "-XX:+UseRTMLocking");
         // verify that we can't turn on biased locking when
         // using rtm locking
         CommandLineOptionTest.verifyOptionValueForSameVM("UseBiasedLocking",
-                "false", CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                "-XX:+UseRTMLocking", "-XX:+UseBiasedLocking");
+                "false", "-XX:+UseRTMLocking", "-XX:+UseBiasedLocking");
     }
 
     public static void main(String args[]) throws Throwable {
diff -r f06c7b654d63 -r 03c5d509a811 test/gc/arguments/TestDynMaxHeapFreeRatio.java
--- a/test/gc/arguments/TestDynMaxHeapFreeRatio.java	Thu Jul 31 09:58:53 2014 +0100
+++ b/test/gc/arguments/TestDynMaxHeapFreeRatio.java	Tue Aug 19 20:41:28 2014 +0100
@@ -21,6 +21,11 @@
  * questions.
  */
 
+import static com.oracle.java.testlibrary.Asserts.assertEQ;
+import static com.oracle.java.testlibrary.Asserts.assertFalse;
+import static com.oracle.java.testlibrary.Asserts.assertTrue;
+import com.oracle.java.testlibrary.DynamicVMOption;
+
 /**
  * @test TestDynMaxHeapFreeRatio
  * @bug 8028391
@@ -33,32 +38,45 @@
  * @run main/othervm -XX:MinHeapFreeRatio=51 -XX:MaxHeapFreeRatio=52 TestDynMaxHeapFreeRatio
  * @run main/othervm -XX:MinHeapFreeRatio=75 -XX:MaxHeapFreeRatio=100 TestDynMaxHeapFreeRatio
  */
-import com.oracle.java.testlibrary.TestDynamicVMOption;
-import com.oracle.java.testlibrary.DynamicVMOptionChecker;
-
-public class TestDynMaxHeapFreeRatio extends TestDynamicVMOption {
-
-    public static final String MinFreeRatioFlagName = "MinHeapFreeRatio";
-    public static final String MaxFreeRatioFlagName = "MaxHeapFreeRatio";
-
-    public TestDynMaxHeapFreeRatio() {
-        super(MaxFreeRatioFlagName);
-    }
-
-    public void test() {
-
-        int minHeapFreeValue = DynamicVMOptionChecker.getIntValue(MinFreeRatioFlagName);
-        System.out.println(MinFreeRatioFlagName + " = " + minHeapFreeValue);
-
-        testPercentageValues();
-
-        checkInvalidValue(Integer.toString(minHeapFreeValue - 1));
-        checkValidValue(Integer.toString(minHeapFreeValue));
-        checkValidValue("100");
-    }
+public class TestDynMaxHeapFreeRatio {
 
     public static void main(String args[]) throws Exception {
-        new TestDynMaxHeapFreeRatio().test();
+
+        // low boundary value
+        int minValue = DynamicVMOption.getInt("MinHeapFreeRatio");
+        System.out.println("MinHeapFreeRatio= " + minValue);
+
+        String badValues[] = {
+            null,
+            "",
+            "not a number",
+            "8.5", "-0.01",
+            Integer.toString(Integer.MIN_VALUE),
+            Integer.toString(Integer.MAX_VALUE),
+            Integer.toString(minValue - 1),
+            "-1024", "-1", "101", "1997"
+        };
+
+        String goodValues[] = {
+            Integer.toString(minValue),
+            Integer.toString(minValue + 1),
+            Integer.toString((minValue + 100) / 2),
+            "99", "100"
+        };
+
+        DynamicVMOption option = new DynamicVMOption("MaxHeapFreeRatio");
+
+        assertTrue(option.isWriteable(), "Option " + option.name
+                + " is expected to be writable");
+
+        for (String v : badValues) {
+            assertFalse(option.isValidValue(v),
+                    "'" + v + "' is expected to be illegal for flag " + option.name);
+        }
+        for (String v : goodValues) {
+            option.setValue(v);
+            String newValue = option.getValue();
+            assertEQ(v, newValue);
+        }
     }
-
 }
diff -r f06c7b654d63 -r 03c5d509a811 test/gc/arguments/TestDynMinHeapFreeRatio.java
--- a/test/gc/arguments/TestDynMinHeapFreeRatio.java	Thu Jul 31 09:58:53 2014 +0100
+++ b/test/gc/arguments/TestDynMinHeapFreeRatio.java	Tue Aug 19 20:41:28 2014 +0100
@@ -33,30 +33,52 @@
  * @run main/othervm -XX:MinHeapFreeRatio=51 -XX:MaxHeapFreeRatio=52 TestDynMinHeapFreeRatio
  * @run main/othervm -XX:MinHeapFreeRatio=75 -XX:MaxHeapFreeRatio=100 TestDynMinHeapFreeRatio
  */
-import com.oracle.java.testlibrary.TestDynamicVMOption;
-import com.oracle.java.testlibrary.DynamicVMOptionChecker;
-
-public class TestDynMinHeapFreeRatio extends TestDynamicVMOption {
-
-    public static final String MinFreeRatioFlagName = "MinHeapFreeRatio";
-    public static final String MaxFreeRatioFlagName = "MaxHeapFreeRatio";
+import static com.oracle.java.testlibrary.Asserts.assertEQ;
+import static com.oracle.java.testlibrary.Asserts.assertFalse;
+import static com.oracle.java.testlibrary.Asserts.assertTrue;
+import com.oracle.java.testlibrary.DynamicVMOption;
 
-    public TestDynMinHeapFreeRatio() {
-        super(MinFreeRatioFlagName);
-    }
-
-    public void test() {
-        int maxHeapFreeValue = DynamicVMOptionChecker.getIntValue(MaxFreeRatioFlagName);
-        System.out.println(MaxFreeRatioFlagName + " = " + maxHeapFreeValue);
-
-        testPercentageValues();
-
-        checkInvalidValue(Integer.toString(maxHeapFreeValue + 1));
-        checkValidValue(Integer.toString(maxHeapFreeValue));
-        checkValidValue("0");
-    }
+public class TestDynMinHeapFreeRatio {
 
     public static void main(String args[]) throws Exception {
-        new TestDynMinHeapFreeRatio().test();
+
+        // high boundary value
+        int maxValue = DynamicVMOption.getInt("MaxHeapFreeRatio");
+        System.out.println("MaxHeapFreeRatio= " + maxValue);
+
+        String badValues[] = {
+            null,
+            "",
+            "not a number",
+            "8.5", "-0.01",
+            Integer.toString(Integer.MIN_VALUE),
+            Integer.toString(Integer.MAX_VALUE),
+            Integer.toString(maxValue + 1),
+            "-1024", "-1", "101", "1997"
+        };
+
+        String goodValues[] = {
+            Integer.toString(maxValue),
+            Integer.toString(maxValue - 1),
+            Integer.toString(maxValue / 2),
+            "0", "1"
+        };
+
+        // option under test
+        DynamicVMOption option = new DynamicVMOption("MinHeapFreeRatio");
+
+        assertTrue(option.isWriteable(), "Option " + option.name
+                + " is expected to be writable");
+
+        for (String v : badValues) {
+            assertFalse(option.isValidValue(v),
+                    "'" + v + "' is expected to be illegal for flag " + option.name);
+        }
+
+        for (String v : goodValues) {
+            option.setValue(v);
+            String newValue = option.getValue();
+            assertEQ(v, newValue);
+        }
     }
 }
diff -r f06c7b654d63 -r 03c5d509a811 test/gc/class_unloading/AllocateBeyondMetaspaceSize.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/class_unloading/AllocateBeyondMetaspaceSize.java	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import sun.hotspot.WhiteBox;
+
+class AllocateBeyondMetaspaceSize {
+  public static Object dummy;
+
+  public static void main(String [] args) {
+    if (args.length != 2) {
+      throw new IllegalArgumentException("Usage: <MetaspaceSize> <YoungGenSize>");
+    }
+
+    long metaspaceSize = Long.parseLong(args[0]);
+    long youngGenSize = Long.parseLong(args[1]);
+
+    run(metaspaceSize, youngGenSize);
+  }
+
+  private static void run(long metaspaceSize, long youngGenSize) {
+    WhiteBox wb = WhiteBox.getWhiteBox();
+
+    long allocationBeyondMetaspaceSize  = metaspaceSize * 2;
+    long metaspace = wb.allocateMetaspace(null, allocationBeyondMetaspaceSize);
+
+    triggerYoungGC(youngGenSize);
+
+    wb.freeMetaspace(null, metaspace, metaspace);
+  }
+
+  private static void triggerYoungGC(long youngGenSize) {
+    long approxAllocSize = 32 * 1024;
+    long numAllocations  = 2 * youngGenSize / approxAllocSize;
+
+    for (long i = 0; i < numAllocations; i++) {
+      dummy = new byte[(int)approxAllocSize];
+    }
+  }
+}
diff -r f06c7b654d63 -r 03c5d509a811 test/gc/class_unloading/TestCMSClassUnloadingEnabledHWM.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/class_unloading/TestCMSClassUnloadingEnabledHWM.java	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @key gc
+ * @bug 8049831
+ * @library /testlibrary /testlibrary/whitebox
+ * @build TestCMSClassUnloadingEnabledHWM AllocateBeyondMetaspaceSize
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run driver TestCMSClassUnloadingEnabledHWM
+ * @summary Test that -XX:-CMSClassUnloadingEnabled will trigger a Full GC when more than MetaspaceSize metadata is allocated.
+ */
+
+import com.oracle.java.testlibrary.OutputAnalyzer;
+import com.oracle.java.testlibrary.ProcessTools;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+public class TestCMSClassUnloadingEnabledHWM {
+  private static long MetaspaceSize = 32 * 1024 * 1024;
+  private static long YoungGenSize  = 32 * 1024 * 1024;
+
+  private static OutputAnalyzer run(boolean enableUnloading) throws Exception {
+    ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
+      "-Xbootclasspath/a:.",
+      "-XX:+WhiteBoxAPI",
+      "-XX:MetaspaceSize=" + MetaspaceSize,
+      "-Xmn" + YoungGenSize,
+      "-XX:+UseConcMarkSweepGC",
+      "-XX:" + (enableUnloading ? "+" : "-") + "CMSClassUnloadingEnabled",
+      "-XX:+PrintHeapAtGC",
+      "-XX:+PrintGCDetails",
+      "AllocateBeyondMetaspaceSize",
+      "" + MetaspaceSize,
+      "" + YoungGenSize);
+    return new OutputAnalyzer(pb.start());
+  }
+
+  public static OutputAnalyzer runWithCMSClassUnloading() throws Exception {
+    return run(true);
+  }
+
+  public static OutputAnalyzer runWithoutCMSClassUnloading() throws Exception {
+    return run(false);
+  }
+
+  public static void testWithoutCMSClassUnloading() throws Exception {
+    // -XX:-CMSClassUnloadingEnabled is used, so we expect a full GC instead of a concurrent cycle.
+    OutputAnalyzer out = runWithoutCMSClassUnloading();
+
+    out.shouldMatch(".*Full GC.*");
+    out.shouldNotMatch(".*CMS Initial Mark.*");
+  }
+
+  public static void testWithCMSClassUnloading() throws Exception {
+    // -XX:+CMSClassUnloadingEnabled is used, so we expect a concurrent cycle instead of a full GC.
+    OutputAnalyzer out = runWithCMSClassUnloading();
+
+    out.shouldMatch(".*CMS Initial Mark.*");
+    out.shouldNotMatch(".*Full GC.*");
+  }
+
+  public static void main(String args[]) throws Exception {
+    testWithCMSClassUnloading();
+    testWithoutCMSClassUnloading();
+  }
+}
+
diff -r f06c7b654d63 -r 03c5d509a811 test/gc/class_unloading/TestG1ClassUnloadingHWM.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/class_unloading/TestG1ClassUnloadingHWM.java	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @key gc
+ * @bug 8049831
+ * @library /testlibrary /testlibrary/whitebox
+ * @build TestG1ClassUnloadingHWM AllocateBeyondMetaspaceSize
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run driver TestG1ClassUnloadingHWM
+ * @summary Test that -XX:-ClassUnloadingWithConcurrentMark will trigger a Full GC when more than MetaspaceSize metadata is allocated.
+ */
+
+import com.oracle.java.testlibrary.OutputAnalyzer;
+import com.oracle.java.testlibrary.ProcessTools;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+public class TestG1ClassUnloadingHWM {
+  private static long MetaspaceSize = 32 * 1024 * 1024;
+  private static long YoungGenSize  = 32 * 1024 * 1024;
+
+  private static OutputAnalyzer run(boolean enableUnloading) throws Exception {
+    ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
+      "-Xbootclasspath/a:.",
+      "-XX:+WhiteBoxAPI",
+      "-XX:MetaspaceSize=" + MetaspaceSize,
+      "-Xmn" + YoungGenSize,
+      "-XX:+UseG1GC",
+      "-XX:" + (enableUnloading ? "+" : "-") + "ClassUnloadingWithConcurrentMark",
+      "-XX:+PrintHeapAtGC",
+      "-XX:+PrintGCDetails",
+      "AllocateBeyondMetaspaceSize",
+      "" + MetaspaceSize,
+      "" + YoungGenSize);
+    return new OutputAnalyzer(pb.start());
+  }
+
+  public static OutputAnalyzer runWithG1ClassUnloading() throws Exception {
+    return run(true);
+  }
+
+  public static OutputAnalyzer runWithoutG1ClassUnloading() throws Exception {
+    return run(false);
+  }
+
+  public static void testWithoutG1ClassUnloading() throws Exception {
+    // -XX:-ClassUnloadingWithConcurrentMark is used, so we expect a full GC instead of a concurrent cycle.
+    OutputAnalyzer out = runWithoutG1ClassUnloading();
+
+    out.shouldMatch(".*Full GC.*");
+    out.shouldNotMatch(".*initial-mark.*");
+  }
+
+  public static void testWithG1ClassUnloading() throws Exception {
+    // -XX:+ClassUnloadingWithConcurrentMark is used, so we expect a concurrent cycle instead of a full GC.
+    OutputAnalyzer out = runWithG1ClassUnloading();
+
+    out.shouldMatch(".*initial-mark.*");
+    out.shouldNotMatch(".*Full GC.*");
+  }
+
+  public static void main(String args[]) throws Exception {
+    testWithG1ClassUnloading();
+    testWithoutG1ClassUnloading();
+  }
+}
+
diff -r f06c7b654d63 -r 03c5d509a811 test/gc/g1/TestDeferredRSUpdate.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/g1/TestDeferredRSUpdate.java	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestDeferredRSUpdate
+ * @bug 8040977 8052170
+ * @summary Ensure that running with -XX:-G1DeferredRSUpdate does not crash the VM
+ * @key gc
+ * @library /testlibrary
+ */
+
+import com.oracle.java.testlibrary.ProcessTools;
+import com.oracle.java.testlibrary.OutputAnalyzer;
+
+public class TestDeferredRSUpdate {
+  public static void main(String[] args) throws Exception {
+    GCTest.main(args);
+
+    ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
+                                                              "-Xmx10M",
+                                                              "-XX:+PrintGCDetails",
+                                                              // G1DeferredRSUpdate is a develop option, but we cannot limit execution of this test to only debug VMs.
+                                                              "-XX:+IgnoreUnrecognizedVMOptions",
+                                                              "-XX:-G1DeferredRSUpdate",
+                                                              GCTest.class.getName());
+
+    OutputAnalyzer output = new OutputAnalyzer(pb.start());
+    output.shouldHaveExitValue(0);
+  }
+
+  static class GCTest {
+    private static Object[] garbage = new Object[32];
+
+    public static void main(String [] args) {
+      System.out.println("Creating garbage");
+      // Create 128MB of garbage. This should result in at least one minor GC, with
+      // some objects copied to old gen. As references from old to young are installed,
+      // the crash due to the use before initialize occurs.
+      Object prev = null;
+      Object prevPrev = null;
+      for (int i = 0; i < 1024; i++) {
+        Object[] next = new Object[32 * 1024];
+        next[0] = prev;
+        next[1] = prevPrev;
+
+        Object[] cur = (Object[]) garbage[i % garbage.length];
+        if (cur != null) {
+          cur[0] = null;
+          cur[1] = null;
+        }
+        garbage[i % garbage.length] = next;
+
+        prevPrev = prev;
+        prev = next;
+      }
+      System.out.println("Done");
+    }
+  }
+}
diff -r f06c7b654d63 -r 03c5d509a811 test/gc/g1/TestEagerReclaimHumongousRegions.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/g1/TestEagerReclaimHumongousRegions.java	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestEagerReclaimHumongousRegions
+ * @bug 8027959
+ * @summary Test to make sure that eager reclaim of humongous objects work. We simply try to fill
+ * up the heap with humongous objects that should be eagerly reclaimable to avoid Full GC.
+ * @key gc
+ * @library /testlibrary
+ */
+
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+import java.util.LinkedList;
+
+import com.oracle.java.testlibrary.OutputAnalyzer;
+import com.oracle.java.testlibrary.ProcessTools;
+import com.oracle.java.testlibrary.Asserts;
+
+class ReclaimRegionFast {
+    public static final int M = 1024*1024;
+
+    public static LinkedList<Object> garbageList = new LinkedList<Object>();
+
+    public static void genGarbage() {
+        for (int i = 0; i < 32*1024; i++) {
+            garbageList.add(new int[100]);
+        }
+        garbageList.clear();
+    }
+
+    // A large object referenced by a static.
+    static int[] filler = new int[10 * M];
+
+    public static void main(String[] args) {
+
+        int[] large = new int[M];
+
+        Object ref_from_stack = large;
+
+        for (int i = 0; i < 100; i++) {
+            // A large object that will be reclaimed eagerly.
+            large = new int[6*M];
+            genGarbage();
+            // Make sure that the compiler cannot completely remove
+            // the allocation of the large object until here.
+            System.out.println(large);
+        }
+
+        // Keep the reference to the first object alive.
+        System.out.println(ref_from_stack);
+    }
+}
+
+public class TestEagerReclaimHumongousRegions {
+    public static void main(String[] args) throws Exception {
+        ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
+            "-XX:+UseG1GC",
+            "-Xms128M",
+            "-Xmx128M",
+            "-Xmn16M",
+            "-XX:+PrintGC",
+            ReclaimRegionFast.class.getName());
+
+        Pattern p = Pattern.compile("Full GC");
+
+        OutputAnalyzer output = new OutputAnalyzer(pb.start());
+
+        int found = 0;
+        Matcher m = p.matcher(output.getStdout());
+        while (m.find()) { found++; }
+        System.out.println("Issued " + found + " Full GCs");
+        Asserts.assertLT(found, 10, "Found that " + found + " Full GCs were issued. This is larger than the bound. Eager reclaim seems to not work at all");
+
+        output.shouldHaveExitValue(0);
+    }
+}
diff -r f06c7b654d63 -r 03c5d509a811 test/gc/g1/TestEagerReclaimHumongousRegions2.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/g1/TestEagerReclaimHumongousRegions2.java	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestEagerReclaimHumongousRegions2
+ * @bug 8051973
+ * @summary Test to make sure that eager reclaim of humongous objects correctly clears
+ * mark bitmaps at reclaim.
+ * @key gc
+ * @library /testlibrary
+ */
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.Random;
+
+import com.oracle.java.testlibrary.OutputAnalyzer;
+import com.oracle.java.testlibrary.ProcessTools;
+
+// An object that has a few references to other instances to slow down marking.
+class ObjectWithSomeRefs {
+    public ObjectWithSomeRefs other1;
+    public ObjectWithSomeRefs other2;
+    public ObjectWithSomeRefs other3;
+    public ObjectWithSomeRefs other4;
+}
+
+class ReclaimRegionFast {
+    public static final int M = 1024*1024;
+
+    public static LinkedList<Object> garbageList = new LinkedList<Object>();
+
+    public static void genGarbage(Object large) {
+        for (int i = 0; i < 64*1024; i++) {
+            Object[] garbage = new Object[50];
+            garbage[0] = large;
+            garbageList.add(garbage);
+        }
+        garbageList.clear();
+    }
+
+    public static ArrayList<ObjectWithSomeRefs> longList = new ArrayList<ObjectWithSomeRefs>();
+
+    public static void main(String[] args) {
+
+        for (int i = 0; i < 16*1024; i++) {
+             longList.add(new ObjectWithSomeRefs());
+        }
+
+        Random rnd = new Random();
+        for (int i = 0; i < longList.size(); i++) {
+             int len = longList.size();
+             longList.get(i).other1 = longList.get(rnd.nextInt(len));
+             longList.get(i).other2 = longList.get(rnd.nextInt(len));
+             longList.get(i).other3 = longList.get(rnd.nextInt(len));
+             longList.get(i).other4 = longList.get(rnd.nextInt(len));
+        }
+
+        int[] large1 = new int[M];
+        int[] large2 = null;
+        int[] large3 = null;
+        int[] large4 = null;
+
+        Object ref_from_stack = large1;
+
+        for (int i = 0; i < 20; i++) {
+            // A set of large objects that will be reclaimed eagerly - and hopefully marked.
+            large1 = new int[M - 20];
+            large2 = new int[M - 20];
+            large3 = new int[M - 20];
+            large4 = new int[M - 20];
+            genGarbage(large1);
+            // Make sure that the compiler cannot completely remove
+            // the allocation of the large object until here.
+            System.out.println(large1 + " " + large2 + " " + large3 + " " + large4);
+        }
+
+        // Keep the reference to the first object alive.
+        System.out.println(ref_from_stack);
+    }
+}
+
+public class TestEagerReclaimHumongousRegions2 {
+    public static void main(String[] args) throws Exception {
+        ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
+            "-XX:+UseG1GC",
+            "-Xms128M",
+            "-Xmx128M",
+            "-Xmn2M",
+            "-XX:G1HeapRegionSize=1M",
+            "-XX:InitiatingHeapOccupancyPercent=0", // Want to have as much as possible initial marks.
+            "-XX:+PrintGC",
+            "-XX:+VerifyAfterGC",
+            "-XX:ConcGCThreads=1", // Want to make marking as slow as possible.
+            "-XX:+IgnoreUnrecognizedVMOptions", // G1VerifyBitmaps is develop only.
+            "-XX:+G1VerifyBitmaps",
+            ReclaimRegionFast.class.getName());
+        OutputAnalyzer output = new OutputAnalyzer(pb.start());
+        output.shouldHaveExitValue(0);
+    }
+}
+
diff -r f06c7b654d63 -r 03c5d509a811 test/gc/g1/TestGCLogMessages.java
--- a/test/gc/g1/TestGCLogMessages.java	Thu Jul 31 09:58:53 2014 +0100
+++ b/test/gc/g1/TestGCLogMessages.java	Tue Aug 19 20:41:28 2014 +0100
@@ -22,8 +22,8 @@
  */
 
 /*
- * @test TestPrintGCDetails
- * @bug 8035406 8027295 8035398
+ * @test TestGCLogMessages
+ * @bug 8035406 8027295 8035398 8019342 8027959
  * @summary Ensure that the PrintGCDetails output for a minor GC with G1
  * includes the expected necessary messages.
  * @key gc
@@ -48,10 +48,13 @@
     OutputAnalyzer output = new OutputAnalyzer(pb.start());
 
     output.shouldNotContain("[Redirty Cards");
+    output.shouldNotContain("[Parallel Redirty");
+    output.shouldNotContain("[Redirtied Cards");
     output.shouldNotContain("[Code Root Purge");
     output.shouldNotContain("[String Dedup Fixup");
     output.shouldNotContain("[Young Free CSet");
     output.shouldNotContain("[Non-Young Free CSet");
+    output.shouldNotContain("[Humongous Reclaim");
     output.shouldHaveExitValue(0);
 
     pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
@@ -63,10 +66,16 @@
     output = new OutputAnalyzer(pb.start());
 
     output.shouldContain("[Redirty Cards");
+    output.shouldNotContain("[Parallel Redirty");
+    output.shouldNotContain("[Redirtied Cards");
     output.shouldContain("[Code Root Purge");
     output.shouldContain("[String Dedup Fixup");
     output.shouldNotContain("[Young Free CSet");
     output.shouldNotContain("[Non-Young Free CSet");
+    output.shouldContain("[Humongous Reclaim");
+    output.shouldNotContain("[Humongous Total");
+    output.shouldNotContain("[Humongous Candidate");
+    output.shouldNotContain("[Humongous Reclaimed");
     output.shouldHaveExitValue(0);
 
     pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
@@ -80,16 +89,16 @@
     output = new OutputAnalyzer(pb.start());
 
     output.shouldContain("[Redirty Cards");
+    output.shouldContain("[Parallel Redirty");
+    output.shouldContain("[Redirtied Cards");
     output.shouldContain("[Code Root Purge");
     output.shouldContain("[String Dedup Fixup");
     output.shouldContain("[Young Free CSet");
     output.shouldContain("[Non-Young Free CSet");
-
-    // also check evacuation failure messages once
-    output.shouldNotContain("[Evacuation Failure");
-    output.shouldNotContain("[Recalculate Used");
-    output.shouldNotContain("[Remove Self Forwards");
-    output.shouldNotContain("[Restore RemSet");
+    output.shouldContain("[Humongous Reclaim");
+    output.shouldContain("[Humongous Total");
+    output.shouldContain("[Humongous Candidate");
+    output.shouldContain("[Humongous Reclaimed");
     output.shouldHaveExitValue(0);
   }
 
diff -r f06c7b654d63 -r 03c5d509a811 test/gc/g1/TestSummarizeRSetStatsTools.java
--- a/test/gc/g1/TestSummarizeRSetStatsTools.java	Thu Jul 31 09:58:53 2014 +0100
+++ b/test/gc/g1/TestSummarizeRSetStatsTools.java	Tue Aug 19 20:41:28 2014 +0100
@@ -88,7 +88,6 @@
         ArrayList<String> finalargs = new ArrayList<String>();
         String[] defaultArgs = new String[] {
             "-XX:+UseG1GC",
-            "-XX:+UseCompressedOops",
             "-Xmn4m",
             "-Xmx20m",
             "-XX:InitiatingHeapOccupancyPercent=100", // we don't want the additional GCs due to initial marking
diff -r f06c7b654d63 -r 03c5d509a811 test/gc/logging/TestGCId.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/logging/TestGCId.java	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestGCId
+ * @bug 8043607
+ * @summary Ensure that the GCId is logged
+ * @key gc
+ * @library /testlibrary
+ */
+
+import com.oracle.java.testlibrary.ProcessTools;
+import com.oracle.java.testlibrary.OutputAnalyzer;
+
+public class TestGCId {
+  public static void main(String[] args) throws Exception {
+    testGCId("UseParallelGC", "PrintGC");
+    testGCId("UseParallelGC", "PrintGCDetails");
+
+    testGCId("UseG1GC", "PrintGC");
+    testGCId("UseG1GC", "PrintGCDetails");
+
+    testGCId("UseConcMarkSweepGC", "PrintGC");
+    testGCId("UseConcMarkSweepGC", "PrintGCDetails");
+
+    testGCId("UseSerialGC", "PrintGC");
+    testGCId("UseSerialGC", "PrintGCDetails");
+  }
+
+  private static void verifyContainsGCIDs(OutputAnalyzer output) {
+    output.shouldMatch("^#0: \\[");
+    output.shouldMatch("^#1: \\[");
+    output.shouldHaveExitValue(0);
+  }
+
+  private static void verifyContainsNoGCIDs(OutputAnalyzer output) {
+    output.shouldNotMatch("^#[0-9]+: \\[");
+    output.shouldHaveExitValue(0);
+  }
+
+  private static void testGCId(String gcFlag, String logFlag) throws Exception {
+    // GCID logging enabled
+    ProcessBuilder pb_enabled =
+      ProcessTools.createJavaProcessBuilder("-XX:+" + gcFlag, "-XX:+" + logFlag, "-Xmx10M", "-XX:+PrintGCID", GCTest.class.getName());
+    verifyContainsGCIDs(new OutputAnalyzer(pb_enabled.start()));
+
+    // GCID logging disabled
+    ProcessBuilder pb_disabled =
+      ProcessTools.createJavaProcessBuilder("-XX:+" + gcFlag, "-XX:+" + logFlag, "-Xmx10M", "-XX:-PrintGCID", GCTest.class.getName());
+    verifyContainsNoGCIDs(new OutputAnalyzer(pb_disabled.start()));
+
+    // GCID logging default
+    ProcessBuilder pb_default =
+      ProcessTools.createJavaProcessBuilder("-XX:+" + gcFlag, "-XX:+" + logFlag, "-Xmx10M", GCTest.class.getName());
+    verifyContainsNoGCIDs(new OutputAnalyzer(pb_default.start()));
+  }
+
+  static class GCTest {
+    private static byte[] garbage;
+    public static void main(String [] args) {
+      System.out.println("Creating garbage");
+      // create 128MB of garbage. This should result in at least one GC
+      for (int i = 0; i < 1024; i++) {
+        garbage = new byte[128 * 1024];
+      }
+      // do a system gc to get one more gc
+      System.gc();
+      System.out.println("Done");
+    }
+  }
+}
diff -r f06c7b654d63 -r 03c5d509a811 test/runtime/7116786/Test7116786.java
--- a/test/runtime/7116786/Test7116786.java	Thu Jul 31 09:58:53 2014 +0100
+++ b/test/runtime/7116786/Test7116786.java	Tue Aug 19 20:41:28 2014 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -147,7 +147,8 @@
                  "no stackmap frame at jump location or bad jump",
                  "Inconsistent stackmap frames at branch target "),
 
-        new Case("case15", "stackMapTable.cpp", true, "check_new_object",
+        /* Backward jump with uninit is allowed starting with JDK 8 */
+        new Case("case15", "stackMapTable.cpp", false, "check_new_object",
                  "backward jump with uninit",
                  "Uninitialized object exists on backward branch "),
 
diff -r f06c7b654d63 -r 03c5d509a811 test/testlibrary/com/oracle/java/testlibrary/DynamicVMOption.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/testlibrary/com/oracle/java/testlibrary/DynamicVMOption.java	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.java.testlibrary;
+
+import com.sun.management.HotSpotDiagnosticMXBean;
+import java.lang.management.ManagementFactory;
+
+/**
+ * A utility class to work with VM options which could be altered during
+ * execution.
+ *
+ * This class is a wrapper around {@code com.sun.management.VMOption}.
+ * It provides more convenient interface to read/write the values.
+ *
+ */
+public class DynamicVMOption {
+
+    private final HotSpotDiagnosticMXBean mxBean;
+
+    /**
+     * VM option name, like "MinHeapFreeRatio".
+     */
+    public final String name;
+
+    /**
+     * Creates an instance of DynamicVMOption.
+     *
+     * @param name the VM option name
+     */
+    public DynamicVMOption(String name) {
+        this.name = name;
+        mxBean = ManagementFactory.getPlatformMXBean(HotSpotDiagnosticMXBean.class);
+    }
+
+    /**
+     * Sets a new value for the option.
+     * Trying to set not applicable value will cause IllegalArgumentException.
+     * Behavior with null is undefined, most likely NPE will be thrown.
+     *
+     * @param newValue the value to be set
+     * @see #getValue()
+     * @throws IllegalArgumentException if newValue is not applicable to the option
+     */
+    public final void setValue(String newValue) {
+        mxBean.setVMOption(name, newValue);
+    }
+
+    /**
+     * Returns the value of option.
+     *
+     * @return the current option value
+     * @see #setValue(java.lang.String)
+     */
+    public final String getValue() {
+        return mxBean.getVMOption(name).getValue();
+    }
+
+    /**
+     * Returns true, if option is writable, false otherwise.
+     *
+     * @return true, if option is writable, false otherwise
+     */
+    public final boolean isWriteable() {
+        return mxBean.getVMOption(name).isWriteable();
+    }
+
+    /**
+     * Checks if the given value is applicable for the option.
+     *
+     * This method tries to set the option to the new value. If no exception
+     * has been thrown the value is treated as valid.
+     *
+     * Calling this method will not change the option value. After an attempt
+     * to set a new value, the option will be restored to its previous value.
+     *
+     * @param value the value to verify
+     * @return true if option could be set to the given value
+     */
+    public boolean isValidValue(String value) {
+        boolean isValid = true;
+        String oldValue = getValue();
+        try {
+            setValue(value);
+        } catch (NullPointerException e) {
+            if (value == null) {
+                isValid = false;
+            }
+        } catch (IllegalArgumentException e) {
+            isValid = false;
+        } finally {
+            setValue(oldValue);
+        }
+        return isValid;
+    }
+
+    /**
+     * Returns the value of the given VM option as String.
+     *
+     * This is a simple shortcut for {@code new DynamicVMOption(name).getValue()}
+     *
+     * @param name the name of VM option
+     * @return value as a string
+     * @see #getValue()
+     */
+    public static String getString(String name) {
+        return new DynamicVMOption(name).getValue();
+    }
+
+    /**
+     * Returns the value of the given option as int.
+     *
+     * @param name the name of VM option
+     * @return value parsed as integer
+     * @see #getString(java.lang.String)
+     *
+     */
+    public static int getInt(String name) {
+        return Integer.parseInt(getString(name));
+    }
+
+    /**
+     * Sets the VM option to a new value.
+     *
+     * This is a simple shortcut for {@code new DynamicVMOption(name).setValue(value)}
+     *
+     * @param name the name of VM option
+     * @param value the value to be set
+     * @see #setValue(java.lang.String)
+     */
+    public static void setString(String name, String value) {
+        new DynamicVMOption(name).setValue(value);
+    }
+
+    /**
+     * Sets the VM option value to a new integer value.
+     *
+     * @param name the name of VM option
+     * @param value the integer value to be set
+     * @see #setString(java.lang.String, java.lang.String)
+     */
+    public static void setInt(String name, int value) {
+        new DynamicVMOption(name).setValue(Integer.toString(value));
+    }
+
+}
diff -r f06c7b654d63 -r 03c5d509a811 test/testlibrary/com/oracle/java/testlibrary/DynamicVMOptionChecker.java
--- a/test/testlibrary/com/oracle/java/testlibrary/DynamicVMOptionChecker.java	Thu Jul 31 09:58:53 2014 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package com.oracle.java.testlibrary;
-
-import com.sun.management.HotSpotDiagnosticMXBean;
-import com.sun.management.VMOption;
-import java.lang.management.ManagementFactory;
-
-/**
- * Simple class to check writeability, invalid and valid values for VMOption
- */
-public class DynamicVMOptionChecker {
-
-    /**
-     * Reads VM option from PlatformMXBean and parse it to integer value
-     *
-     * @param name of option
-     * @return parsed value
-     */
-    public static int getIntValue(String name) {
-
-        VMOption option = ManagementFactory.
-                getPlatformMXBean(HotSpotDiagnosticMXBean.class).
-                getVMOption(name);
-
-        return Integer.parseInt(option.getValue());
-    }
-
-    /**
-     * Sets VM option value
-     *
-     * @param name of option
-     * @param value to set
-     */
-    public static void setIntValue(String name, int value) {
-        ManagementFactory.getPlatformMXBean(HotSpotDiagnosticMXBean.class).setVMOption(name, Integer.toString(value));
-    }
-
-    /**
-     * Checks that VM option is dynamically writable
-     *
-     * @param name
-     * @throws RuntimeException if option if not writable
-     * @return always true
-     */
-    public static boolean checkIsWritable(String name) {
-        VMOption option = ManagementFactory.
-                getPlatformMXBean(HotSpotDiagnosticMXBean.class).
-                getVMOption(name);
-
-        if (!option.isWriteable()) {
-            throw new RuntimeException(name + " is not writable");
-        }
-
-        return true;
-    }
-
-    /**
-     * Checks that value cannot be set
-     *
-     * @param name of flag
-     * @param value string representation of value to set
-     * @throws RuntimeException on error - when expected exception hasn't been thrown
-     */
-    public static void checkInvalidValue(String name, String value) {
-        // should throw
-        try {
-            ManagementFactory.
-                    getPlatformMXBean(HotSpotDiagnosticMXBean.class).
-                    setVMOption(name, value);
-
-        } catch (IllegalArgumentException e) {
-            return;
-        }
-
-        throw new RuntimeException("Expected IllegalArgumentException was not thrown, " + name + "= " + value);
-    }
-
-    /**
-     * Checks that value can be set
-     *
-     * @param name of flag to set
-     * @param value string representation of value to set
-     * @throws RuntimeException on error - when value in VM is not equal to origin
-     */
-    public static void checkValidValue(String name, String value) {
-        ManagementFactory.
-                getPlatformMXBean(HotSpotDiagnosticMXBean.class).
-                setVMOption(name, value);
-
-        VMOption option = ManagementFactory.
-                getPlatformMXBean(HotSpotDiagnosticMXBean.class).
-                getVMOption(name);
-
-        if (!option.getValue().equals(value)) {
-            throw new RuntimeException("Actual value of " + name + " \"" + option.getValue()
-                    + "\" not equal origin \"" + value + "\"");
-        }
-    }
-
-}
diff -r f06c7b654d63 -r 03c5d509a811 test/testlibrary/com/oracle/java/testlibrary/TestDynamicVMOption.java
--- a/test/testlibrary/com/oracle/java/testlibrary/TestDynamicVMOption.java	Thu Jul 31 09:58:53 2014 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package com.oracle.java.testlibrary;
-
-/**
- * Simple class to check writeability, invalid and valid values for concrete VMOption
- */
-public class TestDynamicVMOption {
-
-    private final String name;
-    private final int value;
-
-    /**
-     * Constructor
-     *
-     * @param name of VM option to test
-     */
-    public TestDynamicVMOption(String name) {
-        this.name = name;
-        this.value = DynamicVMOptionChecker.getIntValue(name);
-        System.out.println(this.name + " = " + this.value);
-    }
-
-    /**
-     * Checks that this value can accept valid percentage values and cannot accept invalid percentage values
-     *
-     * @throws RuntimeException
-     */
-    public void testPercentageValues() {
-        checkInvalidValue(Integer.toString(Integer.MIN_VALUE));
-        checkInvalidValue(Integer.toString(Integer.MAX_VALUE));
-        checkInvalidValue("-10");
-        checkInvalidValue("190");
-    }
-
-    /**
-     * Reads VM option from PlatformMXBean and parse it to integer value
-     *
-     * @return value
-     */
-    public int getIntValue() {
-        return DynamicVMOptionChecker.getIntValue(this.name);
-    }
-
-    /**
-     * Sets VM option value
-     *
-     * @param value to set
-     */
-    public void setIntValue(int value) {
-        DynamicVMOptionChecker.setIntValue(this.name, value);
-    }
-
-    /**
-     * Checks that this VM option is dynamically writable
-     *
-     * @throws RuntimeException if option if not writable
-     * @return true
-     */
-    public boolean checkIsWritable() throws RuntimeException {
-        return DynamicVMOptionChecker.checkIsWritable(this.name);
-    }
-
-    /**
-     * Checks that value for this VM option cannot be set
-     *
-     * @param value to check
-     * @throws RuntimeException on error - when expected exception hasn't been thrown
-     */
-    public void checkInvalidValue(String value) {
-        DynamicVMOptionChecker.checkInvalidValue(this.name, value);
-    }
-
-    /**
-     * Checks that value for this VM option can be set
-     *
-     * @param value to check
-     * @throws RuntimeException on error - when value in VM is not equal to origin
-     */
-    public void checkValidValue(String value) {
-        DynamicVMOptionChecker.checkValidValue(this.name, value);
-    }
-
-}
diff -r f06c7b654d63 -r 03c5d509a811 test/testlibrary/whitebox/sun/hotspot/WhiteBox.java
--- a/test/testlibrary/whitebox/sun/hotspot/WhiteBox.java	Thu Jul 31 09:58:53 2014 +0100
+++ b/test/testlibrary/whitebox/sun/hotspot/WhiteBox.java	Tue Aug 19 20:41:28 2014 +0100
@@ -129,7 +129,7 @@
   }
   public native int     getCompileQueueSize(int compLevel);
   public native boolean testSetForceInlineMethod(Executable method, boolean value);
-  public boolean        enqueueMethodForCompilation(Executable method, int compLevel) {
+  public        boolean enqueueMethodForCompilation(Executable method, int compLevel) {
     return enqueueMethodForCompilation(method, compLevel, -1 /*InvocationEntryBci*/);
   }
   public native boolean enqueueMethodForCompilation(Executable method, int compLevel, int entry_bci);
@@ -142,6 +142,8 @@
 
   // Memory
   public native void readReservedMemory();
+  public native long allocateMetaspace(ClassLoader classLoader, long size);
+  public native void freeMetaspace(ClassLoader classLoader, long addr, long size);
 
   // force Full GC
   public native void fullGC();
@@ -154,4 +156,17 @@
   // CPU features
   public native String getCPUFeatures();
 
+  // VM flags
+  public native void    setBooleanVMFlag(String name, boolean value);
+  public native void    setIntxVMFlag(String name, long value);
+  public native void    setUintxVMFlag(String name, long value);
+  public native void    setUint64VMFlag(String name, long value);
+  public native void    setStringVMFlag(String name, String value);
+  public native void    setDoubleVMFlag(String name, double value);
+  public native Boolean getBooleanVMFlag(String name);
+  public native Long    getIntxVMFlag(String name);
+  public native Long    getUintxVMFlag(String name);
+  public native Long    getUint64VMFlag(String name);
+  public native String  getStringVMFlag(String name);
+  public native Double  getDoubleVMFlag(String name);
 }
diff -r f06c7b654d63 -r 03c5d509a811 test/testlibrary_tests/whitebox/vm_flags/BooleanTest.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/testlibrary_tests/whitebox/vm_flags/BooleanTest.java	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test BooleanTest
+ * @bug 8028756
+ * @library /testlibrary /testlibrary/whitebox
+ * @build BooleanTest
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm/timeout=600 -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI BooleanTest
+ * @summary testing of WB::set/getBooleanVMFlag()
+ * @author igor.ignatyev@oracle.com
+ */
+
+import sun.hotspot.WhiteBox;
+import com.oracle.java.testlibrary.*;
+import sun.management.*;
+import com.sun.management.*;
+
+public class BooleanTest {
+    private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
+    private static final Boolean[] TESTS = {true, false, true, true, false};
+    private static final String TEST_NAME = "BooleanTest";
+    private static final String FLAG_NAME = "PrintCompilation";
+    private static final String METHOD = TEST_NAME + "::method";
+    private static final String METHOD1 = METHOD + "1";
+    private static final String METHOD2 = METHOD + "2";
+
+    public static void main(String[] args) throws Exception {
+        if (args.length == 0) {
+            VmFlagTest.runTest(FLAG_NAME, TESTS,
+                VmFlagTest.WHITE_BOX::setBooleanVMFlag,
+                VmFlagTest.WHITE_BOX::getBooleanVMFlag);
+            testFunctional(false);
+            testFunctional(true);
+        } else {
+            boolean value = Boolean.valueOf(args[0]);
+            method1();
+            VmFlagTest.WHITE_BOX.setBooleanVMFlag(FLAG_NAME, value);
+            method2();
+        }
+    }
+
+    private static void testFunctional(boolean value) throws Exception {
+        ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
+            "-Xbootclasspath/a:.",
+            "-XX:+UnlockDiagnosticVMOptions",
+            "-XX:+WhiteBoxAPI",
+            "-Xcomp",
+            "-XX:CompileCommand=compileonly," + METHOD + "*",
+            "-XX:" + (value ? "-" : "+") + FLAG_NAME,
+            TEST_NAME,
+            "" + value);
+        OutputAnalyzer out = new OutputAnalyzer(pb.start());
+        if (value) {
+            out.shouldNotContain(METHOD1);
+            out.shouldContain(METHOD2);
+        } else {
+            out.shouldContain(METHOD1);
+            out.shouldNotContain(METHOD2);
+        }
+    }
+
+    private static void method1() { }
+    private static void method2() { }
+}
+
diff -r f06c7b654d63 -r 03c5d509a811 test/testlibrary_tests/whitebox/vm_flags/DoubleTest.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/testlibrary_tests/whitebox/vm_flags/DoubleTest.java	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test DoubleTest
+ * @bug 8028756
+ * @library /testlibrary /testlibrary/whitebox
+ * @build DoubleTest
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm/timeout=600 -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI DoubleTest
+ * @summary testing of WB::set/getDoubleVMFlag()
+ * @author igor.ignatyev@oracle.com
+ */
+
+public class DoubleTest {
+    private static final String FLAG_NAME = null;
+    private static final Double[] TESTS = {0d, -0d, -1d, 1d,
+            Double.MAX_VALUE, Double.MIN_VALUE, Double.NaN,
+            Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY};
+
+    public static void main(String[] args) throws Exception {
+        VmFlagTest.runTest(FLAG_NAME, TESTS,
+            VmFlagTest.WHITE_BOX::setDoubleVMFlag,
+            VmFlagTest.WHITE_BOX::getDoubleVMFlag);
+    }
+}
+
diff -r f06c7b654d63 -r 03c5d509a811 test/testlibrary_tests/whitebox/vm_flags/IntxTest.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/testlibrary_tests/whitebox/vm_flags/IntxTest.java	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test IntxTest
+ * @bug 8028756
+ * @library /testlibrary /testlibrary/whitebox
+ * @build IntxTest
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm/timeout=600 -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI IntxTest
+ * @summary testing of WB::set/getIntxVMFlag()
+ * @author igor.ignatyev@oracle.com
+ */
+
+public class IntxTest {
+    private static final String FLAG_NAME = "OnStackReplacePercentage";
+    private static final Long[] TESTS = {0L, 100L, -1L,
+            (long) Integer.MAX_VALUE, (long) Integer.MIN_VALUE};
+
+    public static void main(String[] args) throws Exception {
+        VmFlagTest.runTest(FLAG_NAME, TESTS,
+            VmFlagTest.WHITE_BOX::setIntxVMFlag,
+            VmFlagTest.WHITE_BOX::getIntxVMFlag);
+    }
+}
+
diff -r f06c7b654d63 -r 03c5d509a811 test/testlibrary_tests/whitebox/vm_flags/StringTest.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/testlibrary_tests/whitebox/vm_flags/StringTest.java	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test StringTest
+ * @bug 8028756
+ * @library /testlibrary /testlibrary/whitebox
+ * @build StringTest
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm/timeout=600 -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI StringTest
+ * @summary testing of WB::set/getStringVMFlag()
+ * @author igor.ignatyev@oracle.com
+ */
+
+public class StringTest {
+    private static final String FLAG_NAME = "CompileOnly";
+    private static final String[] TESTS = {"StringTest::*", ""};
+
+    public static void main(String[] args) throws Exception {
+        VmFlagTest.runTest(FLAG_NAME, TESTS,
+            VmFlagTest.WHITE_BOX::setStringVMFlag,
+            VmFlagTest.WHITE_BOX::getStringVMFlag);
+    }
+}
+
diff -r f06c7b654d63 -r 03c5d509a811 test/testlibrary_tests/whitebox/vm_flags/Uint64Test.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/testlibrary_tests/whitebox/vm_flags/Uint64Test.java	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test Uint64Test
+ * @bug 8028756
+ * @library /testlibrary /testlibrary/whitebox
+ * @build Uint64Test
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm/timeout=600 -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI Uint64Test
+ * @summary testing of WB::set/getUint64VMFlag()
+ * @author igor.ignatyev@oracle.com
+ */
+
+public class Uint64Test {
+    private static final String FLAG_NAME = "MaxRAM";
+    private static final Long[] TESTS = {0L, 100L, (long) Integer.MAX_VALUE,
+            -1L, Long.MAX_VALUE, Long.MIN_VALUE};
+
+    public static void main(String[] args) throws Exception {
+        VmFlagTest.runTest(FLAG_NAME, TESTS,
+            VmFlagTest.WHITE_BOX::setUint64VMFlag,
+            VmFlagTest.WHITE_BOX::getUint64VMFlag);
+    }
+}
+
diff -r f06c7b654d63 -r 03c5d509a811 test/testlibrary_tests/whitebox/vm_flags/UintxTest.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/testlibrary_tests/whitebox/vm_flags/UintxTest.java	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test UintxTest
+ * @bug 8028756
+ * @library /testlibrary /testlibrary/whitebox
+ * @build UintxTest
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm/timeout=600 -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI UintxTest
+ * @summary testing of WB::set/getUintxVMFlag()
+ * @author igor.ignatyev@oracle.com
+ */
+import com.oracle.java.testlibrary.Platform;
+
+public class UintxTest {
+    private static final String FLAG_NAME = "TypeProfileLevel";
+    private static final Long[] TESTS = {0L, 100L, (long) Integer.MAX_VALUE,
+        (1L << 32L) - 1L, 1L << 32L};
+    private static final Long[] EXPECTED_64 = TESTS;
+    private static final Long[] EXPECTED_32 = {0L, 100L,
+        (long) Integer.MAX_VALUE, (1L << 32L) - 1L, 0L};
+
+    public static void main(String[] args) throws Exception {
+        VmFlagTest.runTest(FLAG_NAME, TESTS,
+            Platform.is64bit() ? EXPECTED_64 : EXPECTED_32,
+            VmFlagTest.WHITE_BOX::setUintxVMFlag,
+            VmFlagTest.WHITE_BOX::getUintxVMFlag);
+    }
+}
+
diff -r f06c7b654d63 -r 03c5d509a811 test/testlibrary_tests/whitebox/vm_flags/VmFlagTest.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/testlibrary_tests/whitebox/vm_flags/VmFlagTest.java	Tue Aug 19 20:41:28 2014 +0100
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import java.util.Objects;
+import java.util.function.BiConsumer;
+import java.util.function.Function;
+import sun.hotspot.WhiteBox;
+import sun.management.*;
+import com.sun.management.*;
+import com.oracle.java.testlibrary.*;
+
+public final class VmFlagTest<T> {
+    public static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
+
+    private static final String NONEXISTENT_FLAG = "NonexistentFlag";
+    private final String flagName;
+    private final BiConsumer<T, T> test;
+    private final BiConsumer<String, T> set;
+    private final Function<String, T> get;
+
+    protected VmFlagTest(String flagName, BiConsumer<String, T> set,
+            Function<String, T> get, boolean isPositive) {
+        this.flagName = flagName;
+        this.set = set;
+        this.get = get;
+        if (isPositive) {
+            test = this::testPositive;
+        } else {
+            test = this::testNegative;
+        }
+    }
+
+    private void setNewValue(T value) {
+        set.accept(flagName, value);
+    }
+
+    private T getValue() {
+        T t = get.apply(flagName);
+        System.out.println("T = " + t);
+        return t;
+    }
+
+    protected static <T> void runTest(String existentFlag, T[] tests,
+            BiConsumer<String, T> set, Function<String, T> get) {
+        runTest(existentFlag, tests, tests, set, get);
+    }
+
+    protected static <T> void runTest(String existentFlag, T[] tests,
+            T[] results, BiConsumer<String, T> set, Function<String, T> get) {
+        if (existentFlag != null) {
+            new VmFlagTest(existentFlag, set, get, true).test(tests, results);
+        }
+        new VmFlagTest(NONEXISTENT_FLAG, set, get, false).test(tests, results);
+    }
+
+    public final void test(T[] tests, T[] results) {
+        Asserts.assertEQ(tests.length, results.length, "[TESTBUG] tests.length != results.length");
+        for (int i = 0, n = tests.length ; i < n; ++i) {
+            test.accept(tests[i], results[i]);
+        }
+    }
+
+    protected String getVMOptionAsString() {
+        HotSpotDiagnosticMXBean diagnostic
+                = ManagementFactoryHelper.getDiagnosticMXBean();
+        VMOption tmp;
+        try {
+            tmp = diagnostic.getVMOption(flagName);
+        } catch (IllegalArgumentException e) {
+            tmp = null;
+        }
+        return tmp == null ? null : tmp.getValue();
+    }
+
+    private void testPositive(T value, T expected) {
+        Asserts.assertEQ(getVMOptionAsString(), asString(getValue()));
+        setNewValue(value);
+        String newValue = getVMOptionAsString();
+        Asserts.assertEQ(newValue, asString(expected));
+        Asserts.assertEQ(getVMOptionAsString(), asString(getValue()));
+    }
+
+    private void testNegative(T value, T expected) {
+        String oldValue = getVMOptionAsString();
+        Asserts.assertEQ(oldValue, asString(getValue()));
+        setNewValue(value);
+        String newValue = getVMOptionAsString();
+        Asserts.assertEQ(oldValue, newValue);
+    }
+
+    private String asString(Object value) {
+        return value == null ? null : "" + value;
+    }
+}
+