changeset 20320:897333c7e587 hs25.40-b05

Merge
author amurillo
date Thu, 14 Aug 2014 12:55:30 -0700
parents afac3987537e (current diff) 29a5c2fd2d2e (diff)
children f52cb9164759
files
diffstat 192 files changed, 7208 insertions(+), 2779 deletions(-) [+]
line wrap: on
line diff
--- a/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegion.java	Wed Aug 13 14:49:46 2014 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegion.java	Thu Aug 14 12:55:30 2014 -0700
@@ -24,23 +24,26 @@
 
 package sun.jvm.hotspot.gc_implementation.g1;
 
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Observable;
 import java.util.Observer;
-
 import sun.jvm.hotspot.debugger.Address;
-import sun.jvm.hotspot.memory.ContiguousSpace;
+import sun.jvm.hotspot.memory.CompactibleSpace;
+import sun.jvm.hotspot.memory.MemRegion;
 import sun.jvm.hotspot.runtime.VM;
+import sun.jvm.hotspot.types.AddressField;
 import sun.jvm.hotspot.types.CIntegerField;
 import sun.jvm.hotspot.types.Type;
 import sun.jvm.hotspot.types.TypeDataBase;
 
 // Mirror class for HeapRegion. Currently we don't actually include
-// any of its fields but only iterate over it (which we get "for free"
-// as HeapRegion ultimately inherits from ContiguousSpace).
+// any of its fields but only iterate over it.
 
-public class HeapRegion extends ContiguousSpace {
+public class HeapRegion extends CompactibleSpace {
     // static int GrainBytes;
     static private CIntegerField grainBytesField;
+    static private AddressField topField;
 
     static {
         VM.registerVMInitializedObserver(new Observer() {
@@ -54,6 +57,8 @@
         Type type = db.lookupType("HeapRegion");
 
         grainBytesField = type.getCIntegerField("GrainBytes");
+        topField = type.getAddressField("_top");
+
     }
 
     static public long grainBytes() {
@@ -63,4 +68,25 @@
     public HeapRegion(Address addr) {
         super(addr);
     }
+
+    public Address top() {
+        return topField.getValue(addr);
+    }
+
+    @Override
+    public List getLiveRegions() {
+        List res = new ArrayList();
+        res.add(new MemRegion(bottom(), top()));
+        return res;
+    }
+
+    @Override
+    public long used() {
+        return top().minus(bottom());
+    }
+
+    @Override
+    public long free() {
+        return end().minus(top());
+    }
 }
--- a/make/hotspot_version	Wed Aug 13 14:49:46 2014 -0700
+++ b/make/hotspot_version	Thu Aug 14 12:55:30 2014 -0700
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=25
 HS_MINOR_VER=40
-HS_BUILD_NUMBER=04
+HS_BUILD_NUMBER=05
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/src/cpu/ppc/vm/compiledIC_ppc.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/ppc/vm/compiledIC_ppc.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -50,34 +50,6 @@
   return is_icholder_entry(call->destination());
 }
 
-//-----------------------------------------------------------------------------
-// High-level access to an inline cache. Guaranteed to be MT-safe.
-
-CompiledIC::CompiledIC(nmethod* nm, NativeCall* call)
-  : _ic_call(call)
-{
-  address ic_call = call->instruction_address();
-
-  assert(ic_call != NULL, "ic_call address must be set");
-  assert(nm != NULL, "must pass nmethod");
-  assert(nm->contains(ic_call), "must be in nmethod");
-
-  // Search for the ic_call at the given address.
-  RelocIterator iter(nm, ic_call, ic_call+1);
-  bool ret = iter.next();
-  assert(ret == true, "relocInfo must exist at this address");
-  assert(iter.addr() == ic_call, "must find ic_call");
-  if (iter.type() == relocInfo::virtual_call_type) {
-    virtual_call_Relocation* r = iter.virtual_call_reloc();
-    _is_optimized = false;
-    _value = nativeMovConstReg_at(r->cached_value());
-  } else {
-    assert(iter.type() == relocInfo::opt_virtual_call_type, "must be a virtual call");
-    _is_optimized = true;
-    _value = NULL;
-  }
-}
-
 // ----------------------------------------------------------------------------
 
 // A PPC CompiledStaticCall looks like this:
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -123,6 +123,7 @@
     fpop2_op3    = 0x35,
     impdep1_op3  = 0x36,
     aes3_op3     = 0x36,
+    sha_op3      = 0x36,
     alignaddr_op3  = 0x36,
     faligndata_op3 = 0x36,
     flog3_op3    = 0x36,
@@ -223,7 +224,11 @@
     mwtos_opf          = 0x119,
 
     aes_kexpand0_opf   = 0x130,
-    aes_kexpand2_opf   = 0x131
+    aes_kexpand2_opf   = 0x131,
+
+    sha1_opf           = 0x141,
+    sha256_opf         = 0x142,
+    sha512_opf         = 0x143
   };
 
   enum op5s {
@@ -595,6 +600,11 @@
   // AES crypto instructions supported only on certain processors
   static void aes_only() { assert( VM_Version::has_aes(), "This instruction only works on SPARC with AES instructions support"); }
 
+  // SHA crypto instructions supported only on certain processors
+  static void sha1_only()   { assert( VM_Version::has_sha1(),   "This instruction only works on SPARC with SHA1"); }
+  static void sha256_only() { assert( VM_Version::has_sha256(), "This instruction only works on SPARC with SHA256"); }
+  static void sha512_only() { assert( VM_Version::has_sha512(), "This instruction only works on SPARC with SHA512"); }
+
   // instruction only in VIS1
   static void vis1_only() { assert( VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }
 
@@ -1179,7 +1189,6 @@
                                                u_field(3, 29, 25) | immed(true) | simm(simm13a, 13)); }
   inline void wrfprs( Register d) { v9_only(); emit_int32( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); }
 
-
   //  VIS1 instructions
 
   void alignaddr( Register s1, Register s2, Register d ) { vis1_only(); emit_int32( op(arith_op) | rd(d) | op3(alignaddr_op3) | rs1(s1) | opf(alignaddr_opf) | rs2(s2)); }
@@ -1203,6 +1212,12 @@
   void movwtos( Register s, FloatRegister d ) { vis3_only();  emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); }
   void movxtod( Register s, FloatRegister d ) { vis3_only();  emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); }
 
+  // Crypto SHA instructions
+
+  void sha1()   { sha1_only();    emit_int32( op(arith_op) | op3(sha_op3) | opf(sha1_opf)); }
+  void sha256() { sha256_only();  emit_int32( op(arith_op) | op3(sha_op3) | opf(sha256_opf)); }
+  void sha512() { sha512_only();  emit_int32( op(arith_op) | op3(sha_op3) | opf(sha512_opf)); }
+
   // Creation
   Assembler(CodeBuffer* code) : AbstractAssembler(code) {
 #ifdef CHECK_DELAY
--- a/src/cpu/sparc/vm/compiledIC_sparc.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/sparc/vm/compiledIC_sparc.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -50,34 +50,6 @@
   return is_icholder_entry(call->destination());
 }
 
-//-----------------------------------------------------------------------------
-// High-level access to an inline cache. Guaranteed to be MT-safe.
-
-CompiledIC::CompiledIC(nmethod* nm, NativeCall* call)
-  : _ic_call(call)
-{
-  address ic_call = call->instruction_address();
-
-  assert(ic_call != NULL, "ic_call address must be set");
-  assert(nm != NULL, "must pass nmethod");
-  assert(nm->contains(ic_call), "must be in nmethod");
-
-  // Search for the ic_call at the given address.
-  RelocIterator iter(nm, ic_call, ic_call+1);
-  bool ret = iter.next();
-  assert(ret == true, "relocInfo must exist at this address");
-  assert(iter.addr() == ic_call, "must find ic_call");
-  if (iter.type() == relocInfo::virtual_call_type) {
-    virtual_call_Relocation* r = iter.virtual_call_reloc();
-    _is_optimized = false;
-    _value = nativeMovConstReg_at(r->cached_value());
-  } else {
-    assert(iter.type() == relocInfo::opt_virtual_call_type, "must be a virtual call");
-    _is_optimized = true;
-    _value = NULL;
-  }
-}
-
 // ----------------------------------------------------------------------------
 
 #define __ _masm.
--- a/src/cpu/sparc/vm/sparc.ad	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/sparc/vm/sparc.ad	Thu Aug 14 12:55:30 2014 -0700
@@ -6184,7 +6184,11 @@
   ins_cost(DEFAULT_COST * 3/2);
   format %{ "SET    $con,$dst\t! non-oop ptr" %}
   ins_encode %{
-    __ set($con$$constant, $dst$$Register);
+    if (_opnds[1]->constant_reloc() == relocInfo::metadata_type) {
+      __ set_metadata_constant((Metadata*)$con$$constant, $dst$$Register);
+    } else {
+      __ set($con$$constant, $dst$$Register);
+    }
   %}
   ins_pipe(loadConP);
 %}
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -4575,6 +4575,219 @@
     return start;
   }
 
+  address generate_sha1_implCompress(bool multi_block, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    Label L_sha1_loop, L_sha1_unaligned_input, L_sha1_unaligned_input_loop;
+    int i;
+
+    Register buf   = O0; // byte[] source+offset
+    Register state = O1; // int[]  SHA.state
+    Register ofs   = O2; // int    offset
+    Register limit = O3; // int    limit
+
+    // load state into F0-F4
+    for (i = 0; i < 5; i++) {
+      __ ldf(FloatRegisterImpl::S, state, i*4, as_FloatRegister(i));
+    }
+
+    __ andcc(buf, 7, G0);
+    __ br(Assembler::notZero, false, Assembler::pn, L_sha1_unaligned_input);
+    __ delayed()->nop();
+
+    __ BIND(L_sha1_loop);
+    // load buf into F8-F22
+    for (i = 0; i < 8; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8));
+    }
+    __ sha1();
+    if (multi_block) {
+      __ add(ofs, 64, ofs);
+      __ add(buf, 64, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha1_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F4 into state and return
+    for (i = 0; i < 4; i++) {
+      __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::S, F4, state, 0x10);
+
+    __ BIND(L_sha1_unaligned_input);
+    __ alignaddr(buf, G0, buf);
+
+    __ BIND(L_sha1_unaligned_input_loop);
+    // load buf into F8-F22
+    for (i = 0; i < 9; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8));
+    }
+    for (i = 0; i < 8; i++) {
+      __ faligndata(as_FloatRegister(i*2 + 8), as_FloatRegister(i*2 + 10), as_FloatRegister(i*2 + 8));
+    }
+    __ sha1();
+    if (multi_block) {
+      __ add(ofs, 64, ofs);
+      __ add(buf, 64, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha1_unaligned_input_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F4 into state and return
+    for (i = 0; i < 4; i++) {
+      __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::S, F4, state, 0x10);
+
+    return start;
+  }
+
+  address generate_sha256_implCompress(bool multi_block, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    Label L_sha256_loop, L_sha256_unaligned_input, L_sha256_unaligned_input_loop;
+    int i;
+
+    Register buf   = O0; // byte[] source+offset
+    Register state = O1; // int[]  SHA2.state
+    Register ofs   = O2; // int    offset
+    Register limit = O3; // int    limit
+
+    // load state into F0-F7
+    for (i = 0; i < 8; i++) {
+      __ ldf(FloatRegisterImpl::S, state, i*4, as_FloatRegister(i));
+    }
+
+    __ andcc(buf, 7, G0);
+    __ br(Assembler::notZero, false, Assembler::pn, L_sha256_unaligned_input);
+    __ delayed()->nop();
+
+    __ BIND(L_sha256_loop);
+    // load buf into F8-F22
+    for (i = 0; i < 8; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8));
+    }
+    __ sha256();
+    if (multi_block) {
+      __ add(ofs, 64, ofs);
+      __ add(buf, 64, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha256_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F7 into state and return
+    for (i = 0; i < 7; i++) {
+      __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::S, F7, state, 0x1c);
+
+    __ BIND(L_sha256_unaligned_input);
+    __ alignaddr(buf, G0, buf);
+
+    __ BIND(L_sha256_unaligned_input_loop);
+    // load buf into F8-F22
+    for (i = 0; i < 9; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8));
+    }
+    for (i = 0; i < 8; i++) {
+      __ faligndata(as_FloatRegister(i*2 + 8), as_FloatRegister(i*2 + 10), as_FloatRegister(i*2 + 8));
+    }
+    __ sha256();
+    if (multi_block) {
+      __ add(ofs, 64, ofs);
+      __ add(buf, 64, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha256_unaligned_input_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F7 into state and return
+    for (i = 0; i < 7; i++) {
+      __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::S, F7, state, 0x1c);
+
+    return start;
+  }
+
+  address generate_sha512_implCompress(bool multi_block, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    Label L_sha512_loop, L_sha512_unaligned_input, L_sha512_unaligned_input_loop;
+    int i;
+
+    Register buf   = O0; // byte[] source+offset
+    Register state = O1; // long[] SHA5.state
+    Register ofs   = O2; // int    offset
+    Register limit = O3; // int    limit
+
+    // load state into F0-F14
+    for (i = 0; i < 8; i++) {
+      __ ldf(FloatRegisterImpl::D, state, i*8, as_FloatRegister(i*2));
+    }
+
+    __ andcc(buf, 7, G0);
+    __ br(Assembler::notZero, false, Assembler::pn, L_sha512_unaligned_input);
+    __ delayed()->nop();
+
+    __ BIND(L_sha512_loop);
+    // load buf into F16-F46
+    for (i = 0; i < 16; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 16));
+    }
+    __ sha512();
+    if (multi_block) {
+      __ add(ofs, 128, ofs);
+      __ add(buf, 128, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha512_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F14 into state and return
+    for (i = 0; i < 7; i++) {
+      __ stf(FloatRegisterImpl::D, as_FloatRegister(i*2), state, i*8);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::D, F14, state, 0x38);
+
+    __ BIND(L_sha512_unaligned_input);
+    __ alignaddr(buf, G0, buf);
+
+    __ BIND(L_sha512_unaligned_input_loop);
+    // load buf into F16-F46
+    for (i = 0; i < 17; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 16));
+    }
+    for (i = 0; i < 16; i++) {
+      __ faligndata(as_FloatRegister(i*2 + 16), as_FloatRegister(i*2 + 18), as_FloatRegister(i*2 + 16));
+    }
+    __ sha512();
+    if (multi_block) {
+      __ add(ofs, 128, ofs);
+      __ add(buf, 128, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha512_unaligned_input_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F14 into state and return
+    for (i = 0; i < 7; i++) {
+      __ stf(FloatRegisterImpl::D, as_FloatRegister(i*2), state, i*8);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::D, F14, state, 0x38);
+
+    return start;
+  }
+
   void generate_initial() {
     // Generates all stubs and initializes the entry points
 
@@ -4647,6 +4860,20 @@
       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
     }
+
+    // generate SHA1/SHA256/SHA512 intrinsics code
+    if (UseSHA1Intrinsics) {
+      StubRoutines::_sha1_implCompress     = generate_sha1_implCompress(false,   "sha1_implCompress");
+      StubRoutines::_sha1_implCompressMB   = generate_sha1_implCompress(true,    "sha1_implCompressMB");
+    }
+    if (UseSHA256Intrinsics) {
+      StubRoutines::_sha256_implCompress   = generate_sha256_implCompress(false, "sha256_implCompress");
+      StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true,  "sha256_implCompressMB");
+    }
+    if (UseSHA512Intrinsics) {
+      StubRoutines::_sha512_implCompress   = generate_sha512_implCompress(false, "sha512_implCompress");
+      StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true,  "sha512_implCompressMB");
+    }
   }
 
 
--- a/src/cpu/sparc/vm/stubRoutines_sparc.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/sparc/vm/stubRoutines_sparc.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -41,7 +41,7 @@
 enum /* platform_dependent_constants */ {
   // %%%%%%%% May be able to shrink this a lot
   code_size1 = 20000,           // simply increase if too small (assembler will crash if too small)
-  code_size2 = 22000            // simply increase if too small (assembler will crash if too small)
+  code_size2 = 23000            // simply increase if too small (assembler will crash if too small)
 };
 
 class Sparc {
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -234,7 +234,7 @@
   assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size");
 
   char buf[512];
-  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                (has_v9() ? ", v9" : (has_v8() ? ", v8" : "")),
                (has_hardware_popc() ? ", popc" : ""),
                (has_vis1() ? ", vis1" : ""),
@@ -243,6 +243,9 @@
                (has_blk_init() ? ", blk_init" : ""),
                (has_cbcond() ? ", cbcond" : ""),
                (has_aes() ? ", aes" : ""),
+               (has_sha1() ? ", sha1" : ""),
+               (has_sha256() ? ", sha256" : ""),
+               (has_sha512() ? ", sha512" : ""),
                (is_ultra3() ? ", ultra3" : ""),
                (is_sun4v() ? ", sun4v" : ""),
                (is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")),
@@ -301,6 +304,58 @@
     }
   }
 
+  // SHA1, SHA256, and SHA512 instructions were added to SPARC T-series at different times
+  if (has_sha1() || has_sha256() || has_sha512()) {
+    if (UseVIS > 0) { // SHA intrinsics use VIS1 instructions
+      if (FLAG_IS_DEFAULT(UseSHA)) {
+        FLAG_SET_DEFAULT(UseSHA, true);
+      }
+    } else {
+      if (UseSHA) {
+        warning("SPARC SHA intrinsics require VIS1 instruction support. Intrinsics will be disabled.");
+        FLAG_SET_DEFAULT(UseSHA, false);
+      }
+    }
+  } else if (UseSHA) {
+    warning("SHA instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseSHA, false);
+  }
+
+  if (!UseSHA) {
+    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+  } else {
+    if (has_sha1()) {
+      if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
+        FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
+      }
+    } else if (UseSHA1Intrinsics) {
+      warning("SHA1 instruction is not available on this CPU.");
+      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+    }
+    if (has_sha256()) {
+      if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
+        FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
+      }
+    } else if (UseSHA256Intrinsics) {
+      warning("SHA256 instruction (for SHA-224 and SHA-256) is not available on this CPU.");
+      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+    }
+
+    if (has_sha512()) {
+      if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
+        FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
+      }
+    } else if (UseSHA512Intrinsics) {
+      warning("SHA512 instruction (for SHA-384 and SHA-512) is not available on this CPU.");
+      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+    }
+    if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA, false);
+    }
+  }
+
   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
     (cache_line_size > ContendedPaddingWidth))
     ContendedPaddingWidth = cache_line_size;
--- a/src/cpu/sparc/vm/vm_version_sparc.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -50,7 +50,10 @@
     T_family             = 16,
     T1_model             = 17,
     sparc5_instructions  = 18,
-    aes_instructions     = 19
+    aes_instructions     = 19,
+    sha1_instruction     = 20,
+    sha256_instruction   = 21,
+    sha512_instruction   = 22
   };
 
   enum Feature_Flag_Set {
@@ -77,6 +80,9 @@
     T1_model_m              = 1 << T1_model,
     sparc5_instructions_m   = 1 << sparc5_instructions,
     aes_instructions_m      = 1 << aes_instructions,
+    sha1_instruction_m      = 1 << sha1_instruction,
+    sha256_instruction_m    = 1 << sha256_instruction,
+    sha512_instruction_m    = 1 << sha512_instruction,
 
     generic_v8_m        = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
     generic_v9_m        = generic_v8_m | v9_instructions_m,
@@ -129,6 +135,9 @@
   static bool has_cbcond()              { return (_features & cbcond_instructions_m) != 0; }
   static bool has_sparc5_instr()        { return (_features & sparc5_instructions_m) != 0; }
   static bool has_aes()                 { return (_features & aes_instructions_m) != 0; }
+  static bool has_sha1()                { return (_features & sha1_instruction_m) != 0; }
+  static bool has_sha256()              { return (_features & sha256_instruction_m) != 0; }
+  static bool has_sha512()              { return (_features & sha512_instruction_m) != 0; }
 
   static bool supports_compare_and_exchange()
                                         { return has_v9(); }
--- a/src/cpu/x86/vm/assembler_x86.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -3854,6 +3854,15 @@
 }
 
 // Carry-Less Multiplication Quadword
+void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
+  assert(VM_Version::supports_clmul(), "");
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
+  emit_int8(0x44);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8((unsigned char)mask);
+}
+
+// Carry-Less Multiplication Quadword
 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
   bool vector256 = false;
--- a/src/cpu/x86/vm/assembler_x86.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1837,6 +1837,7 @@
   void vpbroadcastd(XMMRegister dst, XMMRegister src);
 
   // Carry-Less Multiplication Quadword
+  void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
   void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
 
   // AVX instruction which is used to clear upper 128 bits of YMM registers and
--- a/src/cpu/x86/vm/compiledIC_x86.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/x86/vm/compiledIC_x86.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -47,34 +47,6 @@
   return is_icholder_entry(call->destination());
 }
 
-//-----------------------------------------------------------------------------
-// High-level access to an inline cache. Guaranteed to be MT-safe.
-
-CompiledIC::CompiledIC(nmethod* nm, NativeCall* call)
-  : _ic_call(call)
-{
-  address ic_call = call->instruction_address();
-
-  assert(ic_call != NULL, "ic_call address must be set");
-  assert(nm != NULL, "must pass nmethod");
-  assert(nm->contains(ic_call), "must be in nmethod");
-
-  // Search for the ic_call at the given address.
-  RelocIterator iter(nm, ic_call, ic_call+1);
-  bool ret = iter.next();
-  assert(ret == true, "relocInfo must exist at this address");
-  assert(iter.addr() == ic_call, "must find ic_call");
-  if (iter.type() == relocInfo::virtual_call_type) {
-    virtual_call_Relocation* r = iter.virtual_call_reloc();
-    _is_optimized = false;
-    _value = nativeMovConstReg_at(r->cached_value());
-  } else {
-    assert(iter.type() == relocInfo::opt_virtual_call_type, "must be a virtual call");
-    _is_optimized = true;
-    _value = NULL;
-  }
-}
-
 // ----------------------------------------------------------------------------
 
 #define __ _masm.
--- a/src/cpu/x86/vm/globals_x86.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/x86/vm/globals_x86.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -130,16 +130,16 @@
           "Use fast-string operation for zeroing: rep stosb")               \
                                                                             \
   /* Use Restricted Transactional Memory for lock eliding */                \
-  experimental(bool, UseRTMLocking, false,                                  \
+  product(bool, UseRTMLocking, false,                                       \
           "Enable RTM lock eliding for inflated locks in compiled code")    \
                                                                             \
   experimental(bool, UseRTMForStackLocks, false,                            \
           "Enable RTM lock eliding for stack locks in compiled code")       \
                                                                             \
-  experimental(bool, UseRTMDeopt, false,                                    \
+  product(bool, UseRTMDeopt, false,                                         \
           "Perform deopt and recompilation based on RTM abort ratio")       \
                                                                             \
-  experimental(uintx, RTMRetryCount, 5,                                     \
+  product(uintx, RTMRetryCount, 5,                                          \
           "Number of RTM retries on lock abort or busy")                    \
                                                                             \
   experimental(intx, RTMSpinLoopCount, 100,                                 \
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -7316,17 +7316,34 @@
  * Fold 128-bit data chunk
  */
 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) {
-  vpclmulhdq(xtmp, xK, xcrc); // [123:64]
-  vpclmulldq(xcrc, xK, xcrc); // [63:0]
-  vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */);
-  pxor(xcrc, xtmp);
+  if (UseAVX > 0) {
+    vpclmulhdq(xtmp, xK, xcrc); // [123:64]
+    vpclmulldq(xcrc, xK, xcrc); // [63:0]
+    vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */);
+    pxor(xcrc, xtmp);
+  } else {
+    movdqa(xtmp, xcrc);
+    pclmulhdq(xtmp, xK);   // [123:64]
+    pclmulldq(xcrc, xK);   // [63:0]
+    pxor(xcrc, xtmp);
+    movdqu(xtmp, Address(buf, offset));
+    pxor(xcrc, xtmp);
+  }
 }
 
 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) {
-  vpclmulhdq(xtmp, xK, xcrc);
-  vpclmulldq(xcrc, xK, xcrc);
-  pxor(xcrc, xbuf);
-  pxor(xcrc, xtmp);
+  if (UseAVX > 0) {
+    vpclmulhdq(xtmp, xK, xcrc);
+    vpclmulldq(xcrc, xK, xcrc);
+    pxor(xcrc, xbuf);
+    pxor(xcrc, xtmp);
+  } else {
+    movdqa(xtmp, xcrc);
+    pclmulhdq(xtmp, xK);
+    pclmulldq(xcrc, xK);
+    pxor(xcrc, xbuf);
+    pxor(xcrc, xtmp);
+  }
 }
 
 /**
@@ -7444,9 +7461,17 @@
   // Fold 128 bits in xmm1 down into 32 bits in crc register.
   BIND(L_fold_128b);
   movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr()));
-  vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
-  vpand(xmm3, xmm0, xmm2, false /* vector256 */);
-  vpclmulqdq(xmm0, xmm0, xmm3, 0x1);
+  if (UseAVX > 0) {
+    vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
+    vpand(xmm3, xmm0, xmm2, false /* vector256 */);
+    vpclmulqdq(xmm0, xmm0, xmm3, 0x1);
+  } else {
+    movdqa(xmm2, xmm0);
+    pclmulqdq(xmm2, xmm1, 0x1);
+    movdqa(xmm3, xmm0);
+    pand(xmm3, xmm2);
+    pclmulqdq(xmm0, xmm3, 0x1);
+  }
   psrldq(xmm1, 8);
   psrldq(xmm2, 4);
   pxor(xmm0, xmm1);
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -966,6 +966,16 @@
   void mulss(XMMRegister dst, Address src)        { Assembler::mulss(dst, src); }
   void mulss(XMMRegister dst, AddressLiteral src);
 
+  // Carry-Less Multiplication Quadword
+  void pclmulldq(XMMRegister dst, XMMRegister src) {
+    // 0x00 - multiply lower 64 bits [0:63]
+    Assembler::pclmulqdq(dst, src, 0x00);
+  }
+  void pclmulhdq(XMMRegister dst, XMMRegister src) {
+    // 0x11 - multiply upper 64 bits [64:127]
+    Assembler::pclmulqdq(dst, src, 0x11);
+  }
+
   void sqrtsd(XMMRegister dst, XMMRegister src)    { Assembler::sqrtsd(dst, src); }
   void sqrtsd(XMMRegister dst, Address src)        { Assembler::sqrtsd(dst, src); }
   void sqrtsd(XMMRegister dst, AddressLiteral src);
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -568,7 +568,7 @@
     FLAG_SET_DEFAULT(UseCLMUL, false);
   }
 
-  if (UseCLMUL && (UseAVX > 0) && (UseSSE > 2)) {
+  if (UseCLMUL && (UseSSE > 2)) {
     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
       UseCRC32Intrinsics = true;
     }
@@ -590,6 +590,17 @@
     FLAG_SET_DEFAULT(UseAESIntrinsics, false);
   }
 
+  if (UseSHA) {
+    warning("SHA instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseSHA, false);
+  }
+  if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
+    warning("SHA intrinsics are not available on this CPU");
+    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+  }
+
   // Adjust RTM (Restricted Transactional Memory) flags
   if (!supports_rtm() && UseRTMLocking) {
     // Can't continue because UseRTMLocking affects UseBiasedLocking flag
@@ -803,6 +814,21 @@
         }
       }
     }
+    if ((cpu_family() == 0x06) &&
+        ((extended_cpu_model() == 0x36) || // Centerton
+         (extended_cpu_model() == 0x37) || // Silvermont
+         (extended_cpu_model() == 0x4D))) {
+#ifdef COMPILER2
+      if (FLAG_IS_DEFAULT(OptoScheduling)) {
+        OptoScheduling = true;
+      }
+#endif
+      if (supports_sse4_2()) { // Silvermont
+        if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
+          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
+        }
+      }
+    }
   }
 
   // Use count leading zeros count instruction if available.
@@ -890,23 +916,25 @@
   AllocatePrefetchDistance = allocate_prefetch_distance();
   AllocatePrefetchStyle    = allocate_prefetch_style();
 
-  if( is_intel() && cpu_family() == 6 && supports_sse3() ) {
-    if( AllocatePrefetchStyle == 2 ) { // watermark prefetching on Core
+  if (is_intel() && cpu_family() == 6 && supports_sse3()) {
+    if (AllocatePrefetchStyle == 2) { // watermark prefetching on Core
 #ifdef _LP64
       AllocatePrefetchDistance = 384;
 #else
       AllocatePrefetchDistance = 320;
 #endif
     }
-    if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus
+    if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
       AllocatePrefetchDistance = 192;
       AllocatePrefetchLines = 4;
+    }
 #ifdef COMPILER2
-      if (AggressiveOpts && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
+    if (supports_sse4_2()) {
+      if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
       }
+    }
 #endif
-    }
   }
   assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
 
--- a/src/cpu/zero/vm/compiledIC_zero.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/zero/vm/compiledIC_zero.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -58,34 +58,6 @@
   return is_icholder_entry(call->destination());
 }
 
-//-----------------------------------------------------------------------------
-// High-level access to an inline cache. Guaranteed to be MT-safe.
-
-CompiledIC::CompiledIC(nmethod* nm, NativeCall* call)
-  : _ic_call(call)
-{
-  address ic_call = call->instruction_address();
-
-  assert(ic_call != NULL, "ic_call address must be set");
-  assert(nm != NULL, "must pass nmethod");
-  assert(nm->contains(ic_call), "must be in nmethod");
-
-  // Search for the ic_call at the given address.
-  RelocIterator iter(nm, ic_call, ic_call+1);
-  bool ret = iter.next();
-  assert(ret == true, "relocInfo must exist at this address");
-  assert(iter.addr() == ic_call, "must find ic_call");
-  if (iter.type() == relocInfo::virtual_call_type) {
-    virtual_call_Relocation* r = iter.virtual_call_reloc();
-    _is_optimized = false;
-    _value = nativeMovConstReg_at(r->cached_value());
-  } else {
-    assert(iter.type() == relocInfo::opt_virtual_call_type, "must be a virtual call");
-    _is_optimized = true;
-    _value = NULL;
-  }
-}
-
 // ----------------------------------------------------------------------------
 
 void CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {
--- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -137,6 +137,21 @@
 #endif
     if (av & AV_SPARC_AES)       features |= aes_instructions_m;
 
+#ifndef AV_SPARC_SHA1
+#define AV_SPARC_SHA1   0x00400000  /* sha1 instruction supported */
+#endif
+    if (av & AV_SPARC_SHA1)         features |= sha1_instruction_m;
+
+#ifndef AV_SPARC_SHA256
+#define AV_SPARC_SHA256 0x00800000  /* sha256 instruction supported */
+#endif
+    if (av & AV_SPARC_SHA256)       features |= sha256_instruction_m;
+
+#ifndef AV_SPARC_SHA512
+#define AV_SPARC_SHA512 0x01000000  /* sha512 instruction supported */
+#endif
+    if (av & AV_SPARC_SHA512)       features |= sha512_instruction_m;
+
   } else {
     // getisax(2) failed, use the old legacy code.
 #ifndef PRODUCT
--- a/src/share/vm/c1/c1_Runtime1.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/c1/c1_Runtime1.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1018,6 +1018,7 @@
               n_copy->set_data((intx) (load_klass()));
             } else {
               assert(mirror() != NULL, "klass not set");
+              // Don't need a G1 pre-barrier here since we assert above that data isn't an oop.
               n_copy->set_data(cast_from_oop<intx>(mirror()));
             }
 
--- a/src/share/vm/ci/ciEnv.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/ci/ciEnv.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -184,6 +184,10 @@
     }
   }
 
+  void ensure_metadata_alive(ciMetadata* m) {
+    _factory->ensure_metadata_alive(m);
+  }
+
   ciInstance* get_instance(oop o) {
     if (o == NULL) return NULL;
     return get_object(o)->as_instance();
--- a/src/share/vm/ci/ciKlass.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/ci/ciKlass.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -43,6 +43,7 @@
   friend class ciMethod;
   friend class ciMethodData;
   friend class ciObjArrayKlass;
+  friend class ciReceiverTypeData;
 
 private:
   ciSymbol* _name;
--- a/src/share/vm/ci/ciMethodData.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/ci/ciMethodData.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -170,6 +170,7 @@
     Klass* k = data->as_ReceiverTypeData()->receiver(row);
     if (k != NULL) {
       ciKlass* klass = CURRENT_ENV->get_klass(k);
+      CURRENT_ENV->ensure_metadata_alive(klass);
       set_receiver(row, klass);
     }
   }
@@ -191,6 +192,7 @@
 void ciSpeculativeTrapData::translate_from(const ProfileData* data) {
   Method* m = data->as_SpeculativeTrapData()->method();
   ciMethod* ci_m = CURRENT_ENV->get_method(m);
+  CURRENT_ENV->ensure_metadata_alive(ci_m);
   set_method(ci_m);
 }
 
--- a/src/share/vm/ci/ciMethodData.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/ci/ciMethodData.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -70,6 +70,7 @@
     Klass* v = TypeEntries::valid_klass(k);
     if (v != NULL) {
       ciKlass* klass = CURRENT_ENV->get_klass(v);
+      CURRENT_ENV->ensure_metadata_alive(klass);
       return with_status(klass, k);
     }
     return with_status(NULL, k);
--- a/src/share/vm/ci/ciObjectFactory.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/ci/ciObjectFactory.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -46,6 +46,9 @@
 #include "oops/oop.inline.hpp"
 #include "oops/oop.inline2.hpp"
 #include "runtime/fieldType.hpp"
+#if INCLUDE_ALL_GCS
+# include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#endif
 
 // ciObjectFactory
 //
@@ -374,6 +377,37 @@
   return NULL;
 }
 
+// ------------------------------------------------------------------
+// ciObjectFactory::ensure_metadata_alive
+//
+// Ensure that the metadata wrapped by the ciMetadata is kept alive by GC.
+// This is primarily useful for metadata which is considered as weak roots
+// by the GC but need to be strong roots if reachable from a current compilation.
+//
+void ciObjectFactory::ensure_metadata_alive(ciMetadata* m) {
+  ASSERT_IN_VM; // We're handling raw oops here.
+
+#if INCLUDE_ALL_GCS
+  if (!UseG1GC) {
+    return;
+  }
+  Klass* metadata_owner_klass;
+  if (m->is_klass()) {
+    metadata_owner_klass = m->as_klass()->get_Klass();
+  } else if (m->is_method()) {
+    metadata_owner_klass = m->as_method()->get_Method()->constants()->pool_holder();
+  } else {
+    fatal("Not implemented for other types of metadata");
+  }
+
+  oop metadata_holder = metadata_owner_klass->klass_holder();
+  if (metadata_holder != NULL) {
+    G1SATBCardTableModRefBS::enqueue(metadata_holder);
+  }
+
+#endif
+}
+
 //------------------------------------------------------------------
 // ciObjectFactory::get_unloaded_method
 //
--- a/src/share/vm/ci/ciObjectFactory.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/ci/ciObjectFactory.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -75,6 +75,8 @@
   ciObject* create_new_object(oop o);
   ciMetadata* create_new_object(Metadata* o);
 
+  void ensure_metadata_alive(ciMetadata* m);
+
   static bool is_equal(NonPermObject* p, oop key) {
     return p->object()->get_oop() == key;
   }
--- a/src/share/vm/classfile/classLoaderData.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/classLoaderData.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -73,7 +73,11 @@
 
 ClassLoaderData::ClassLoaderData(Handle h_class_loader, bool is_anonymous, Dependencies dependencies) :
   _class_loader(h_class_loader()),
-  _is_anonymous(is_anonymous), _keep_alive(is_anonymous), // initially
+  _is_anonymous(is_anonymous),
+  // An anonymous class loader data doesn't have anything to keep
+  // it from being unloaded during parsing of the anonymous class.
+  // The null-class-loader should always be kept alive.
+  _keep_alive(is_anonymous || h_class_loader.is_null()),
   _metaspace(NULL), _unloading(false), _klasses(NULL),
   _claimed(0), _jmethod_ids(NULL), _handles(NULL), _deallocate_list(NULL),
   _next(NULL), _dependencies(dependencies),
@@ -317,12 +321,45 @@
   }
 }
 
+#ifdef ASSERT
+class AllAliveClosure : public OopClosure {
+  BoolObjectClosure* _is_alive_closure;
+  bool _found_dead;
+ public:
+  AllAliveClosure(BoolObjectClosure* is_alive_closure) : _is_alive_closure(is_alive_closure), _found_dead(false) {}
+  template <typename T> void do_oop_work(T* p) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+      if (!_is_alive_closure->do_object_b(obj)) {
+        _found_dead = true;
+      }
+    }
+  }
+  void do_oop(oop* p)       { do_oop_work<oop>(p); }
+  void do_oop(narrowOop* p) { do_oop_work<narrowOop>(p); }
+  bool found_dead()         { return _found_dead; }
+};
+#endif
+
+oop ClassLoaderData::keep_alive_object() const {
+  assert(!keep_alive(), "Don't use with CLDs that are artificially kept alive");
+  return is_anonymous() ? _klasses->java_mirror() : class_loader();
+}
+
 bool ClassLoaderData::is_alive(BoolObjectClosure* is_alive_closure) const {
-  bool alive =
-    is_anonymous() ?
-       is_alive_closure->do_object_b(_klasses->java_mirror()) :
-       class_loader() == NULL || is_alive_closure->do_object_b(class_loader());
-  assert(!alive || claimed(), "must be claimed");
+  bool alive = keep_alive() // null class loader and incomplete anonymous klasses.
+      || is_alive_closure->do_object_b(keep_alive_object());
+
+#ifdef ASSERT
+  if (alive) {
+    AllAliveClosure all_alive_closure(is_alive_closure);
+    KlassToOopClosure klass_closure(&all_alive_closure);
+    const_cast<ClassLoaderData*>(this)->oops_do(&all_alive_closure, &klass_closure, false);
+    assert(!all_alive_closure.found_dead(), err_msg("Found dead oop in alive cld: " PTR_FORMAT, p2i(this)));
+  }
+#endif
+
   return alive;
 }
 
@@ -601,11 +638,36 @@
 
 void ClassLoaderDataGraph::always_strong_oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) {
   if (ClassUnloading) {
-    ClassLoaderData::the_null_class_loader_data()->oops_do(f, klass_closure, must_claim);
-    // keep any special CLDs alive.
-    ClassLoaderDataGraph::keep_alive_oops_do(f, klass_closure, must_claim);
+    keep_alive_oops_do(f, klass_closure, must_claim);
   } else {
-    ClassLoaderDataGraph::oops_do(f, klass_closure, must_claim);
+    oops_do(f, klass_closure, must_claim);
+  }
+}
+
+void ClassLoaderDataGraph::cld_do(CLDClosure* cl) {
+  for (ClassLoaderData* cld = _head; cl != NULL && cld != NULL; cld = cld->next()) {
+    cl->do_cld(cld);
+  }
+}
+
+void ClassLoaderDataGraph::roots_cld_do(CLDClosure* strong, CLDClosure* weak) {
+  for (ClassLoaderData* cld = _head;  cld != NULL; cld = cld->_next) {
+    CLDClosure* closure = cld->keep_alive() ? strong : weak;
+    if (closure != NULL) {
+      closure->do_cld(cld);
+    }
+  }
+}
+
+void ClassLoaderDataGraph::keep_alive_cld_do(CLDClosure* cl) {
+  roots_cld_do(cl, NULL);
+}
+
+void ClassLoaderDataGraph::always_strong_cld_do(CLDClosure* cl) {
+  if (ClassUnloading) {
+    keep_alive_cld_do(cl);
+  } else {
+    cld_do(cl);
   }
 }
 
@@ -660,6 +722,16 @@
   return array;
 }
 
+bool ClassLoaderDataGraph::unload_list_contains(const void* x) {
+  assert(SafepointSynchronize::is_at_safepoint(), "only safe to call at safepoint");
+  for (ClassLoaderData* cld = _unloading; cld != NULL; cld = cld->next()) {
+    if (cld->metaspace_or_null() != NULL && cld->metaspace_or_null()->contains(x)) {
+      return true;
+    }
+  }
+  return false;
+}
+
 #ifndef PRODUCT
 bool ClassLoaderDataGraph::contains_loader_data(ClassLoaderData* loader_data) {
   for (ClassLoaderData* data = _head; data != NULL; data = data->next()) {
@@ -689,7 +761,7 @@
   bool has_redefined_a_class = JvmtiExport::has_redefined_a_class();
   MetadataOnStackMark md_on_stack;
   while (data != NULL) {
-    if (data->keep_alive() || data->is_alive(is_alive_closure)) {
+    if (data->is_alive(is_alive_closure)) {
       if (has_redefined_a_class) {
         data->classes_do(InstanceKlass::purge_previous_versions);
       }
@@ -780,6 +852,60 @@
   return _rw_metaspace;
 }
 
+ClassLoaderDataGraphKlassIteratorAtomic::ClassLoaderDataGraphKlassIteratorAtomic()
+    : _next_klass(NULL) {
+  ClassLoaderData* cld = ClassLoaderDataGraph::_head;
+  Klass* klass = NULL;
+
+  // Find the first klass in the CLDG.
+  while (cld != NULL) {
+    klass = cld->_klasses;
+    if (klass != NULL) {
+      _next_klass = klass;
+      return;
+    }
+    cld = cld->next();
+  }
+}
+
+Klass* ClassLoaderDataGraphKlassIteratorAtomic::next_klass_in_cldg(Klass* klass) {
+  Klass* next = klass->next_link();
+  if (next != NULL) {
+    return next;
+  }
+
+  // No more klasses in the current CLD. Time to find a new CLD.
+  ClassLoaderData* cld = klass->class_loader_data();
+  while (next == NULL) {
+    cld = cld->next();
+    if (cld == NULL) {
+      break;
+    }
+    next = cld->_klasses;
+  }
+
+  return next;
+}
+
+Klass* ClassLoaderDataGraphKlassIteratorAtomic::next_klass() {
+  Klass* head = (Klass*)_next_klass;
+
+  while (head != NULL) {
+    Klass* next = next_klass_in_cldg(head);
+
+    Klass* old_head = (Klass*)Atomic::cmpxchg_ptr(next, &_next_klass, head);
+
+    if (old_head == head) {
+      return head; // Won the CAS.
+    }
+
+    head = old_head;
+  }
+
+  // Nothing more for the iterator to hand out.
+  assert(head == NULL, err_msg("head is " PTR_FORMAT ", expected not null:", p2i(head)));
+  return NULL;
+}
 
 ClassLoaderDataGraphMetaspaceIterator::ClassLoaderDataGraphMetaspaceIterator() {
   _data = ClassLoaderDataGraph::_head;
--- a/src/share/vm/classfile/classLoaderData.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/classLoaderData.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -31,7 +31,6 @@
 #include "memory/metaspaceCounters.hpp"
 #include "runtime/mutex.hpp"
 #include "utilities/growableArray.hpp"
-
 #if INCLUDE_TRACE
 # include "utilities/ticks.hpp"
 #endif
@@ -59,6 +58,7 @@
 class ClassLoaderDataGraph : public AllStatic {
   friend class ClassLoaderData;
   friend class ClassLoaderDataGraphMetaspaceIterator;
+  friend class ClassLoaderDataGraphKlassIteratorAtomic;
   friend class VMStructs;
  private:
   // All CLDs (except the null CLD) can be reached by walking _head->_next->...
@@ -75,9 +75,16 @@
   static ClassLoaderData* find_or_create(Handle class_loader, TRAPS);
   static void purge();
   static void clear_claimed_marks();
+  // oops do
   static void oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim);
+  static void keep_alive_oops_do(OopClosure* blk, KlassClosure* klass_closure, bool must_claim);
   static void always_strong_oops_do(OopClosure* blk, KlassClosure* klass_closure, bool must_claim);
-  static void keep_alive_oops_do(OopClosure* blk, KlassClosure* klass_closure, bool must_claim);
+  // cld do
+  static void cld_do(CLDClosure* cl);
+  static void roots_cld_do(CLDClosure* strong, CLDClosure* weak);
+  static void keep_alive_cld_do(CLDClosure* cl);
+  static void always_strong_cld_do(CLDClosure* cl);
+  // klass do
   static void classes_do(KlassClosure* klass_closure);
   static void classes_do(void f(Klass* const));
   static void loaded_classes_do(KlassClosure* klass_closure);
@@ -102,6 +109,7 @@
   static void dump() { dump_on(tty); }
   static void verify();
 
+  static bool unload_list_contains(const void* x);
 #ifndef PRODUCT
   static bool contains_loader_data(ClassLoaderData* loader_data);
 #endif
@@ -134,6 +142,7 @@
   };
 
   friend class ClassLoaderDataGraph;
+  friend class ClassLoaderDataGraphKlassIteratorAtomic;
   friend class ClassLoaderDataGraphMetaspaceIterator;
   friend class MetaDataFactory;
   friend class Method;
@@ -149,7 +158,7 @@
                            // classes in the class loader are allocated.
   Mutex* _metaspace_lock;  // Locks the metaspace for allocations and setup.
   bool _unloading;         // true if this class loader goes away
-  bool _keep_alive;        // if this CLD can be unloaded for anonymous loaders
+  bool _keep_alive;        // if this CLD is kept alive without a keep_alive_object().
   bool _is_anonymous;      // if this CLD is for an anonymous class
   volatile int _claimed;   // true if claimed, for example during GC traces.
                            // To avoid applying oop closure more than once.
@@ -195,7 +204,6 @@
 
   void unload();
   bool keep_alive() const       { return _keep_alive; }
-  bool is_alive(BoolObjectClosure* is_alive_closure) const;
   void classes_do(void f(Klass*));
   void loaded_classes_do(KlassClosure* klass_closure);
   void classes_do(void f(InstanceKlass*));
@@ -207,6 +215,9 @@
   MetaWord* allocate(size_t size);
 
  public:
+
+  bool is_alive(BoolObjectClosure* is_alive_closure) const;
+
   // Accessors
   Metaspace* metaspace_or_null() const     { return _metaspace; }
 
@@ -240,13 +251,16 @@
 
   oop class_loader() const      { return _class_loader; }
 
+  // The object the GC is using to keep this ClassLoaderData alive.
+  oop keep_alive_object() const;
+
   // Returns true if this class loader data is for a loader going away.
   bool is_unloading() const     {
     assert(!(is_the_null_class_loader_data() && _unloading), "The null class loader can never be unloaded");
     return _unloading;
   }
-  // Anonymous class loader data doesn't have anything to keep them from
-  // being unloaded during parsing the anonymous class.
+
+  // Used to make sure that this CLD is not unloaded.
   void set_keep_alive(bool value) { _keep_alive = value; }
 
   unsigned int identity_hash() {
@@ -287,6 +301,16 @@
   void initialize_shared_metaspaces();
 };
 
+// An iterator that distributes Klasses to parallel worker threads.
+class ClassLoaderDataGraphKlassIteratorAtomic : public StackObj {
+  volatile Klass* _next_klass;
+ public:
+  ClassLoaderDataGraphKlassIteratorAtomic();
+  Klass* next_klass();
+ private:
+  static Klass* next_klass_in_cldg(Klass* klass);
+};
+
 class ClassLoaderDataGraphMetaspaceIterator : public StackObj {
   ClassLoaderData* _data;
  public:
--- a/src/share/vm/classfile/dictionary.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/dictionary.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -199,6 +199,26 @@
   return class_was_unloaded;
 }
 
+void Dictionary::roots_oops_do(OopClosure* strong, OopClosure* weak) {
+  // Skip the strong roots probe marking if the closures are the same.
+  if (strong == weak) {
+    oops_do(strong);
+    return;
+  }
+
+  for (int index = 0; index < table_size(); index++) {
+    for (DictionaryEntry *probe = bucket(index);
+                          probe != NULL;
+                          probe = probe->next()) {
+      Klass* e = probe->klass();
+      ClassLoaderData* loader_data = probe->loader_data();
+      if (is_strongly_reachable(loader_data, e)) {
+        probe->set_strongly_reachable();
+      }
+    }
+  }
+  _pd_cache_table->roots_oops_do(strong, weak);
+}
 
 void Dictionary::always_strong_oops_do(OopClosure* blk) {
   // Follow all system classes and temporary placeholders in dictionary; only
@@ -490,6 +510,23 @@
   }
 }
 
+void ProtectionDomainCacheTable::roots_oops_do(OopClosure* strong, OopClosure* weak) {
+  for (int index = 0; index < table_size(); index++) {
+    for (ProtectionDomainCacheEntry* probe = bucket(index);
+                                     probe != NULL;
+                                     probe = probe->next()) {
+      if (probe->is_strongly_reachable()) {
+        probe->reset_strongly_reachable();
+        probe->oops_do(strong);
+      } else {
+        if (weak != NULL) {
+          probe->oops_do(weak);
+        }
+      }
+    }
+  }
+}
+
 uint ProtectionDomainCacheTable::bucket_size() {
   return sizeof(ProtectionDomainCacheEntry);
 }
--- a/src/share/vm/classfile/dictionary.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/dictionary.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -89,6 +89,7 @@
   // GC support
   void oops_do(OopClosure* f);
   void always_strong_oops_do(OopClosure* blk);
+  void roots_oops_do(OopClosure* strong, OopClosure* weak);
 
   void always_strong_classes_do(KlassClosure* closure);
 
@@ -218,6 +219,7 @@
   // GC support
   void oops_do(OopClosure* f);
   void always_strong_oops_do(OopClosure* f);
+  void roots_oops_do(OopClosure* strong, OopClosure* weak);
 
   static uint bucket_size();
 
--- a/src/share/vm/classfile/javaClasses.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/javaClasses.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -463,12 +463,11 @@
   return true;
 }
 
-void java_lang_String::print(Handle java_string, outputStream* st) {
-  oop          obj    = java_string();
-  assert(obj->klass() == SystemDictionary::String_klass(), "must be java_string");
-  typeArrayOop value  = java_lang_String::value(obj);
-  int          offset = java_lang_String::offset(obj);
-  int          length = java_lang_String::length(obj);
+void java_lang_String::print(oop java_string, outputStream* st) {
+  assert(java_string->klass() == SystemDictionary::String_klass(), "must be java_string");
+  typeArrayOop value  = java_lang_String::value(java_string);
+  int          offset = java_lang_String::offset(java_string);
+  int          length = java_lang_String::length(java_string);
 
   int end = MIN2(length, 100);
   if (value == NULL) {
--- a/src/share/vm/classfile/javaClasses.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/javaClasses.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -208,7 +208,7 @@
   }
 
   // Debugging
-  static void print(Handle java_string, outputStream* st);
+  static void print(oop java_string, outputStream* st);
   friend class JavaClasses;
 };
 
--- a/src/share/vm/classfile/metadataOnStackMark.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/metadataOnStackMark.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -47,8 +47,11 @@
   if (_marked_objects == NULL) {
     _marked_objects = new (ResourceObj::C_HEAP, mtClass) GrowableArray<Metadata*>(1000, true);
   }
+
   Threads::metadata_do(Metadata::mark_on_stack);
-  CodeCache::alive_nmethods_do(nmethod::mark_on_stack);
+  if (JvmtiExport::has_redefined_a_class()) {
+    CodeCache::alive_nmethods_do(nmethod::mark_on_stack);
+  }
   CompileBroker::mark_on_stack();
   JvmtiCurrentBreakpoints::metadata_do(Metadata::mark_on_stack);
   ThreadService::metadata_do(Metadata::mark_on_stack);
--- a/src/share/vm/classfile/symbolTable.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/symbolTable.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -36,6 +36,7 @@
 #include "runtime/mutexLocker.hpp"
 #include "utilities/hashtable.inline.hpp"
 #if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc_implementation/g1/g1StringDedup.hpp"
 #endif
 
@@ -704,11 +705,26 @@
   return lookup(chars, length);
 }
 
+// Tell the GC that this string was looked up in the StringTable.
+static void ensure_string_alive(oop string) {
+  // A lookup in the StringTable could return an object that was previously
+  // considered dead. The SATB part of G1 needs to get notified about this
+  // potential resurrection, otherwise the marking might not find the object.
+#if INCLUDE_ALL_GCS
+  if (UseG1GC && string != NULL) {
+    G1SATBCardTableModRefBS::enqueue(string);
+  }
+#endif
+}
 
 oop StringTable::lookup(jchar* name, int len) {
   unsigned int hash = hash_string(name, len);
   int index = the_table()->hash_to_index(hash);
-  return the_table()->lookup(index, name, len, hash);
+  oop string = the_table()->lookup(index, name, len, hash);
+
+  ensure_string_alive(string);
+
+  return string;
 }
 
 
@@ -719,7 +735,10 @@
   oop found_string = the_table()->lookup(index, name, len, hashValue);
 
   // Found
-  if (found_string != NULL) return found_string;
+  if (found_string != NULL) {
+    ensure_string_alive(found_string);
+    return found_string;
+  }
 
   debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
   assert(!Universe::heap()->is_in_reserved(name),
@@ -744,11 +763,17 @@
 
   // Grab the StringTable_lock before getting the_table() because it could
   // change at safepoint.
-  MutexLocker ml(StringTable_lock, THREAD);
+  oop added_or_found;
+  {
+    MutexLocker ml(StringTable_lock, THREAD);
+    // Otherwise, add to symbol to table
+    added_or_found = the_table()->basic_add(index, string, name, len,
+                                  hashValue, CHECK_NULL);
+  }
 
-  // Otherwise, add to symbol to table
-  return the_table()->basic_add(index, string, name, len,
-                                hashValue, CHECK_NULL);
+  ensure_string_alive(added_or_found);
+
+  return added_or_found;
 }
 
 oop StringTable::intern(Symbol* symbol, TRAPS) {
--- a/src/share/vm/classfile/systemDictionary.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/systemDictionary.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1613,13 +1613,7 @@
 // system dictionary and follows the remaining classes' contents.
 
 void SystemDictionary::always_strong_oops_do(OopClosure* blk) {
-  blk->do_oop(&_java_system_loader);
-  blk->do_oop(&_system_loader_lock_obj);
-
-  dictionary()->always_strong_oops_do(blk);
-
-  // Visit extra methods
-  invoke_method_table()->oops_do(blk);
+  roots_oops_do(blk, NULL);
 }
 
 void SystemDictionary::always_strong_classes_do(KlassClosure* closure) {
@@ -1686,6 +1680,17 @@
   return unloading_occurred;
 }
 
+void SystemDictionary::roots_oops_do(OopClosure* strong, OopClosure* weak) {
+  strong->do_oop(&_java_system_loader);
+  strong->do_oop(&_system_loader_lock_obj);
+
+  // Adjust dictionary
+  dictionary()->roots_oops_do(strong, weak);
+
+  // Visit extra methods
+  invoke_method_table()->oops_do(strong);
+}
+
 void SystemDictionary::oops_do(OopClosure* f) {
   f->do_oop(&_java_system_loader);
   f->do_oop(&_system_loader_lock_obj);
--- a/src/share/vm/classfile/systemDictionary.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/systemDictionary.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -335,6 +335,7 @@
 
   // Applies "f->do_oop" to all root oops in the system dictionary.
   static void oops_do(OopClosure* f);
+  static void roots_oops_do(OopClosure* strong, OopClosure* weak);
 
   // System loader lock
   static oop system_loader_lock()           { return _system_loader_lock_obj; }
--- a/src/share/vm/classfile/vmSymbols.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/vmSymbols.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -789,6 +789,26 @@
    do_name(     decrypt_name,                                      "decrypt")                                           \
    do_signature(byteArray_int_int_byteArray_int_signature,         "([BII[BI)I")                                        \
                                                                                                                         \
+  /* support for sun.security.provider.SHA */                                                                           \
+  do_class(sun_security_provider_sha,                              "sun/security/provider/SHA")                         \
+  do_intrinsic(_sha_implCompress, sun_security_provider_sha, implCompress_name, implCompress_signature, F_R)            \
+   do_name(     implCompress_name,                                 "implCompress")                                      \
+   do_signature(implCompress_signature,                            "([BI)V")                                            \
+                                                                                                                        \
+  /* support for sun.security.provider.SHA2 */                                                                          \
+  do_class(sun_security_provider_sha2,                             "sun/security/provider/SHA2")                        \
+  do_intrinsic(_sha2_implCompress, sun_security_provider_sha2, implCompress_name, implCompress_signature, F_R)          \
+                                                                                                                        \
+  /* support for sun.security.provider.SHA5 */                                                                          \
+  do_class(sun_security_provider_sha5,                             "sun/security/provider/SHA5")                        \
+  do_intrinsic(_sha5_implCompress, sun_security_provider_sha5, implCompress_name, implCompress_signature, F_R)          \
+                                                                                                                        \
+  /* support for sun.security.provider.DigestBase */                                                                    \
+  do_class(sun_security_provider_digestbase,                       "sun/security/provider/DigestBase")                  \
+  do_intrinsic(_digestBase_implCompressMB, sun_security_provider_digestbase, implCompressMB_name, implCompressMB_signature, F_R)   \
+   do_name(     implCompressMB_name,                               "implCompressMultiBlock")                            \
+   do_signature(implCompressMB_signature,                          "([BII)I")                                           \
+                                                                                                                        \
   /* support for java.util.zip */                                                                                       \
   do_class(java_util_zip_CRC32,           "java/util/zip/CRC32")                                                        \
   do_intrinsic(_updateCRC32,               java_util_zip_CRC32,   update_name, int2_int_signature,               F_SN)  \
--- a/src/share/vm/code/codeCache.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/code/codeCache.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -337,6 +337,11 @@
 // Walk the list of methods which might contain non-perm oops.
 void CodeCache::scavenge_root_nmethods_do(CodeBlobClosure* f) {
   assert_locked_or_safepoint(CodeCache_lock);
+
+  if (UseG1GC) {
+    return;
+  }
+
   debug_only(mark_scavenge_root_nmethods());
 
   for (nmethod* cur = scavenge_root_nmethods(); cur != NULL; cur = cur->scavenge_root_link()) {
@@ -362,6 +367,11 @@
 
 void CodeCache::add_scavenge_root_nmethod(nmethod* nm) {
   assert_locked_or_safepoint(CodeCache_lock);
+
+  if (UseG1GC) {
+    return;
+  }
+
   nm->set_on_scavenge_root_list();
   nm->set_scavenge_root_link(_scavenge_root_nmethods);
   set_scavenge_root_nmethods(nm);
@@ -370,6 +380,11 @@
 
 void CodeCache::drop_scavenge_root_nmethod(nmethod* nm) {
   assert_locked_or_safepoint(CodeCache_lock);
+
+  if (UseG1GC) {
+    return;
+  }
+
   print_trace("drop_scavenge_root", nm);
   nmethod* last = NULL;
   nmethod* cur = scavenge_root_nmethods();
@@ -391,6 +406,11 @@
 
 void CodeCache::prune_scavenge_root_nmethods() {
   assert_locked_or_safepoint(CodeCache_lock);
+
+  if (UseG1GC) {
+    return;
+  }
+
   debug_only(mark_scavenge_root_nmethods());
 
   nmethod* last = NULL;
@@ -423,6 +443,10 @@
 
 #ifndef PRODUCT
 void CodeCache::asserted_non_scavengable_nmethods_do(CodeBlobClosure* f) {
+  if (UseG1GC) {
+    return;
+  }
+
   // While we are here, verify the integrity of the list.
   mark_scavenge_root_nmethods();
   for (nmethod* cur = scavenge_root_nmethods(); cur != NULL; cur = cur->scavenge_root_link()) {
@@ -463,9 +487,36 @@
 }
 #endif //PRODUCT
 
+void CodeCache::verify_clean_inline_caches() {
+#ifdef ASSERT
+  FOR_ALL_ALIVE_BLOBS(cb) {
+    if (cb->is_nmethod()) {
+      nmethod* nm = (nmethod*)cb;
+      assert(!nm->is_unloaded(), "Tautology");
+      nm->verify_clean_inline_caches();
+      nm->verify();
+    }
+  }
+#endif
+}
+
+void CodeCache::verify_icholder_relocations() {
+#ifdef ASSERT
+  // make sure that we aren't leaking icholders
+  int count = 0;
+  FOR_ALL_BLOBS(cb) {
+    if (cb->is_nmethod()) {
+      nmethod* nm = (nmethod*)cb;
+      count += nm->verify_icholder_relocations();
+    }
+  }
+
+  assert(count + InlineCacheBuffer::pending_icholder_count() + CompiledICHolder::live_not_claimed_count() ==
+         CompiledICHolder::live_count(), "must agree");
+#endif
+}
 
 void CodeCache::gc_prologue() {
-  assert(!nmethod::oops_do_marking_is_active(), "oops_do_marking_epilogue must be called");
 }
 
 void CodeCache::gc_epilogue() {
@@ -478,41 +529,15 @@
         nm->cleanup_inline_caches();
       }
       DEBUG_ONLY(nm->verify());
-      nm->fix_oop_relocations();
+      DEBUG_ONLY(nm->verify_oop_relocations());
     }
   }
   set_needs_cache_clean(false);
   prune_scavenge_root_nmethods();
-  assert(!nmethod::oops_do_marking_is_active(), "oops_do_marking_prologue must be called");
 
-#ifdef ASSERT
-  // make sure that we aren't leaking icholders
-  int count = 0;
-  FOR_ALL_BLOBS(cb) {
-    if (cb->is_nmethod()) {
-      RelocIterator iter((nmethod*)cb);
-      while(iter.next()) {
-        if (iter.type() == relocInfo::virtual_call_type) {
-          if (CompiledIC::is_icholder_call_site(iter.virtual_call_reloc())) {
-            CompiledIC *ic = CompiledIC_at(iter.reloc());
-            if (TraceCompiledIC) {
-              tty->print("noticed icholder " INTPTR_FORMAT " ", p2i(ic->cached_icholder()));
-              ic->print();
-            }
-            assert(ic->cached_icholder() != NULL, "must be non-NULL");
-            count++;
-          }
-        }
-      }
-    }
-  }
-
-  assert(count + InlineCacheBuffer::pending_icholder_count() + CompiledICHolder::live_not_claimed_count() ==
-         CompiledICHolder::live_count(), "must agree");
-#endif
+  verify_icholder_relocations();
 }
 
-
 void CodeCache::verify_oops() {
   MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
   VerifyOopClosure voc;
--- a/src/share/vm/code/codeCache.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/code/codeCache.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -134,10 +134,6 @@
   // to) any unmarked codeBlobs in the cache.  Sets "marked_for_unloading"
   // to "true" iff some code got unloaded.
   static void do_unloading(BoolObjectClosure* is_alive, bool unloading_occurred);
-  static void oops_do(OopClosure* f) {
-    CodeBlobToOopClosure oopc(f, /*do_marking=*/ false);
-    blobs_do(&oopc);
-  }
   static void asserted_non_scavengable_nmethods_do(CodeBlobClosure* f = NULL) PRODUCT_RETURN;
   static void scavenge_root_nmethods_do(CodeBlobClosure* f);
 
@@ -172,6 +168,9 @@
   static void set_needs_cache_clean(bool v)      { _needs_cache_clean = v;    }
   static void clear_inline_caches();             // clear all inline caches
 
+  static void verify_clean_inline_caches();
+  static void verify_icholder_relocations();
+
   // Deoptimization
   static int  mark_for_deoptimization(DepChange& changes);
 #ifdef HOTSWAP
--- a/src/share/vm/code/compiledIC.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/code/compiledIC.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -99,13 +99,13 @@
   }
 
   {
-  MutexLockerEx pl(Patching_lock, Mutex::_no_safepoint_check_flag);
+    MutexLockerEx pl(SafepointSynchronize::is_at_safepoint() ? NULL : Patching_lock, Mutex::_no_safepoint_check_flag);
 #ifdef ASSERT
-  CodeBlob* cb = CodeCache::find_blob_unsafe(_ic_call);
-  assert(cb != NULL && cb->is_nmethod(), "must be nmethod");
+    CodeBlob* cb = CodeCache::find_blob_unsafe(_ic_call);
+    assert(cb != NULL && cb->is_nmethod(), "must be nmethod");
 #endif
-  _ic_call->set_destination_mt_safe(entry_point);
-}
+     _ic_call->set_destination_mt_safe(entry_point);
+  }
 
   if (is_optimized() || is_icstub) {
     // Optimized call sites don't have a cache value and ICStub call
@@ -159,6 +159,50 @@
 //-----------------------------------------------------------------------------
 // High-level access to an inline cache. Guaranteed to be MT-safe.
 
+void CompiledIC::initialize_from_iter(RelocIterator* iter) {
+  assert(iter->addr() == _ic_call->instruction_address(), "must find ic_call");
+
+  if (iter->type() == relocInfo::virtual_call_type) {
+    virtual_call_Relocation* r = iter->virtual_call_reloc();
+    _is_optimized = false;
+    _value = nativeMovConstReg_at(r->cached_value());
+  } else {
+    assert(iter->type() == relocInfo::opt_virtual_call_type, "must be a virtual call");
+    _is_optimized = true;
+    _value = NULL;
+  }
+}
+
+CompiledIC::CompiledIC(nmethod* nm, NativeCall* call)
+  : _ic_call(call)
+{
+  address ic_call = _ic_call->instruction_address();
+
+  assert(ic_call != NULL, "ic_call address must be set");
+  assert(nm != NULL, "must pass nmethod");
+  assert(nm->contains(ic_call), "must be in nmethod");
+
+  // Search for the ic_call at the given address.
+  RelocIterator iter(nm, ic_call, ic_call+1);
+  bool ret = iter.next();
+  assert(ret == true, "relocInfo must exist at this address");
+  assert(iter.addr() == ic_call, "must find ic_call");
+
+  initialize_from_iter(&iter);
+}
+
+CompiledIC::CompiledIC(RelocIterator* iter)
+  : _ic_call(nativeCall_at(iter->addr()))
+{
+  address ic_call = _ic_call->instruction_address();
+
+  nmethod* nm = iter->code();
+  assert(ic_call != NULL, "ic_call address must be set");
+  assert(nm != NULL, "must pass nmethod");
+  assert(nm->contains(ic_call), "must be in nmethod");
+
+  initialize_from_iter(iter);
+}
 
 bool CompiledIC::set_to_megamorphic(CallInfo* call_info, Bytecodes::Code bytecode, TRAPS) {
   assert(CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "");
@@ -485,7 +529,7 @@
 void CompiledStaticCall::set_to_clean() {
   assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
   // Reset call site
-  MutexLockerEx pl(Patching_lock, Mutex::_no_safepoint_check_flag);
+  MutexLockerEx pl(SafepointSynchronize::is_at_safepoint() ? NULL : Patching_lock, Mutex::_no_safepoint_check_flag);
 #ifdef ASSERT
   CodeBlob* cb = CodeCache::find_blob_unsafe(this);
   assert(cb != NULL && cb->is_nmethod(), "must be nmethod");
--- a/src/share/vm/code/compiledIC.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/code/compiledIC.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -150,6 +150,9 @@
   bool          _is_optimized;  // an optimized virtual call (i.e., no compiled IC)
 
   CompiledIC(nmethod* nm, NativeCall* ic_call);
+  CompiledIC(RelocIterator* iter);
+
+  void initialize_from_iter(RelocIterator* iter);
 
   static bool is_icholder_entry(address entry);
 
@@ -183,6 +186,7 @@
   friend CompiledIC* CompiledIC_before(nmethod* nm, address return_addr);
   friend CompiledIC* CompiledIC_at(nmethod* nm, address call_site);
   friend CompiledIC* CompiledIC_at(Relocation* call_site);
+  friend CompiledIC* CompiledIC_at(RelocIterator* reloc_iter);
 
   // This is used to release CompiledICHolder*s from nmethods that
   // are about to be freed.  The callsite might contain other stale
@@ -263,6 +267,13 @@
   return c_ic;
 }
 
+inline CompiledIC* CompiledIC_at(RelocIterator* reloc_iter) {
+  assert(reloc_iter->type() == relocInfo::virtual_call_type ||
+      reloc_iter->type() == relocInfo::opt_virtual_call_type, "wrong reloc. info");
+  CompiledIC* c_ic = new CompiledIC(reloc_iter);
+  c_ic->verify();
+  return c_ic;
+}
 
 //-----------------------------------------------------------------------------
 // The CompiledStaticCall represents a call to a static method in the compiled
--- a/src/share/vm/code/dependencies.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/code/dependencies.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -407,56 +407,66 @@
 // for the sake of the compiler log, print out current dependencies:
 void Dependencies::log_all_dependencies() {
   if (log() == NULL)  return;
-  ciBaseObject* args[max_arg_count];
+  ResourceMark rm;
   for (int deptv = (int)FIRST_TYPE; deptv < (int)TYPE_LIMIT; deptv++) {
     DepType dept = (DepType)deptv;
     GrowableArray<ciBaseObject*>* deps = _deps[dept];
-    if (deps->length() == 0)  continue;
+    int deplen = deps->length();
+    if (deplen == 0) {
+      continue;
+    }
     int stride = dep_args(dept);
+    GrowableArray<ciBaseObject*>* ciargs = new GrowableArray<ciBaseObject*>(stride);
     for (int i = 0; i < deps->length(); i += stride) {
       for (int j = 0; j < stride; j++) {
         // flush out the identities before printing
-        args[j] = deps->at(i+j);
+        ciargs->push(deps->at(i+j));
       }
-      write_dependency_to(log(), dept, stride, args);
+      write_dependency_to(log(), dept, ciargs);
+      ciargs->clear();
     }
+    guarantee(deplen == deps->length(), "deps array cannot grow inside nested ResoureMark scope");
   }
 }
 
 void Dependencies::write_dependency_to(CompileLog* log,
                                        DepType dept,
-                                       int nargs, DepArgument args[],
+                                       GrowableArray<DepArgument>* args,
                                        Klass* witness) {
   if (log == NULL) {
     return;
   }
+  ResourceMark rm;
   ciEnv* env = ciEnv::current();
-  ciBaseObject* ciargs[max_arg_count];
-  assert(nargs <= max_arg_count, "oob");
-  for (int j = 0; j < nargs; j++) {
-    if (args[j].is_oop()) {
-      ciargs[j] = env->get_object(args[j].oop_value());
+  GrowableArray<ciBaseObject*>* ciargs = new GrowableArray<ciBaseObject*>(args->length());
+  for (GrowableArrayIterator<DepArgument> it = args->begin(); it != args->end(); ++it) {
+    DepArgument arg = *it;
+    if (arg.is_oop()) {
+      ciargs->push(env->get_object(arg.oop_value()));
     } else {
-      ciargs[j] = env->get_metadata(args[j].metadata_value());
+      ciargs->push(env->get_metadata(arg.metadata_value()));
     }
   }
-  Dependencies::write_dependency_to(log, dept, nargs, ciargs, witness);
+  int argslen = ciargs->length();
+  Dependencies::write_dependency_to(log, dept, ciargs, witness);
+  guarantee(argslen == ciargs->length(), "ciargs array cannot grow inside nested ResoureMark scope");
 }
 
 void Dependencies::write_dependency_to(CompileLog* log,
                                        DepType dept,
-                                       int nargs, ciBaseObject* args[],
+                                       GrowableArray<ciBaseObject*>* args,
                                        Klass* witness) {
-  if (log == NULL)  return;
-  assert(nargs <= max_arg_count, "oob");
-  int argids[max_arg_count];
-  int ctxkj = dep_context_arg(dept);  // -1 if no context arg
-  int j;
-  for (j = 0; j < nargs; j++) {
-    if (args[j]->is_object()) {
-      argids[j] = log->identify(args[j]->as_object());
+  if (log == NULL) {
+    return;
+  }
+  ResourceMark rm;
+  GrowableArray<int>* argids = new GrowableArray<int>(args->length());
+  for (GrowableArrayIterator<ciBaseObject*> it = args->begin(); it != args->end(); ++it) {
+    ciBaseObject* obj = *it;
+    if (obj->is_object()) {
+      argids->push(log->identify(obj->as_object()));
     } else {
-      argids[j] = log->identify(args[j]->as_metadata());
+      argids->push(log->identify(obj->as_metadata()));
     }
   }
   if (witness != NULL) {
@@ -465,16 +475,17 @@
     log->begin_elem("dependency");
   }
   log->print(" type='%s'", dep_name(dept));
-  if (ctxkj >= 0) {
-    log->print(" ctxk='%d'", argids[ctxkj]);
+  const int ctxkj = dep_context_arg(dept);  // -1 if no context arg
+  if (ctxkj >= 0 && ctxkj < argids->length()) {
+    log->print(" ctxk='%d'", argids->at(ctxkj));
   }
   // write remaining arguments, if any.
-  for (j = 0; j < nargs; j++) {
+  for (int j = 0; j < argids->length(); j++) {
     if (j == ctxkj)  continue;  // already logged
     if (j == 1) {
-      log->print(  " x='%d'",    argids[j]);
+      log->print(  " x='%d'",    argids->at(j));
     } else {
-      log->print(" x%d='%d'", j, argids[j]);
+      log->print(" x%d='%d'", j, argids->at(j));
     }
   }
   if (witness != NULL) {
@@ -486,9 +497,12 @@
 
 void Dependencies::write_dependency_to(xmlStream* xtty,
                                        DepType dept,
-                                       int nargs, DepArgument args[],
+                                       GrowableArray<DepArgument>* args,
                                        Klass* witness) {
-  if (xtty == NULL)  return;
+  if (xtty == NULL) {
+    return;
+  }
+  ResourceMark rm;
   ttyLocker ttyl;
   int ctxkj = dep_context_arg(dept);  // -1 if no context arg
   if (witness != NULL) {
@@ -498,23 +512,24 @@
   }
   xtty->print(" type='%s'", dep_name(dept));
   if (ctxkj >= 0) {
-    xtty->object("ctxk", args[ctxkj].metadata_value());
+    xtty->object("ctxk", args->at(ctxkj).metadata_value());
   }
   // write remaining arguments, if any.
-  for (int j = 0; j < nargs; j++) {
+  for (int j = 0; j < args->length(); j++) {
     if (j == ctxkj)  continue;  // already logged
+    DepArgument arg = args->at(j);
     if (j == 1) {
-      if (args[j].is_oop()) {
-        xtty->object("x", args[j].oop_value());
+      if (arg.is_oop()) {
+        xtty->object("x", arg.oop_value());
       } else {
-        xtty->object("x", args[j].metadata_value());
+        xtty->object("x", arg.metadata_value());
       }
     } else {
       char xn[10]; sprintf(xn, "x%d", j);
-      if (args[j].is_oop()) {
-        xtty->object(xn, args[j].oop_value());
+      if (arg.is_oop()) {
+        xtty->object(xn, arg.oop_value());
       } else {
-        xtty->object(xn, args[j].metadata_value());
+        xtty->object(xn, arg.metadata_value());
       }
     }
   }
@@ -525,7 +540,7 @@
   xtty->end_elem();
 }
 
-void Dependencies::print_dependency(DepType dept, int nargs, DepArgument args[],
+void Dependencies::print_dependency(DepType dept, GrowableArray<DepArgument>* args,
                                     Klass* witness) {
   ResourceMark rm;
   ttyLocker ttyl;   // keep the following output all in one block
@@ -534,8 +549,8 @@
                 dep_name(dept));
   // print arguments
   int ctxkj = dep_context_arg(dept);  // -1 if no context arg
-  for (int j = 0; j < nargs; j++) {
-    DepArgument arg = args[j];
+  for (int j = 0; j < args->length(); j++) {
+    DepArgument arg = args->at(j);
     bool put_star = false;
     if (arg.is_null())  continue;
     const char* what;
@@ -571,31 +586,33 @@
 void Dependencies::DepStream::log_dependency(Klass* witness) {
   if (_deps == NULL && xtty == NULL)  return;  // fast cutout for runtime
   ResourceMark rm;
-  int nargs = argument_count();
-  DepArgument args[max_arg_count];
+  const int nargs = argument_count();
+  GrowableArray<DepArgument>* args = new GrowableArray<DepArgument>(nargs);
   for (int j = 0; j < nargs; j++) {
     if (type() == call_site_target_value) {
-      args[j] = argument_oop(j);
+      args->push(argument_oop(j));
     } else {
-      args[j] = argument(j);
+      args->push(argument(j));
     }
   }
+  int argslen = args->length();
   if (_deps != NULL && _deps->log() != NULL) {
-    Dependencies::write_dependency_to(_deps->log(),
-                                      type(), nargs, args, witness);
+    Dependencies::write_dependency_to(_deps->log(), type(), args, witness);
   } else {
-    Dependencies::write_dependency_to(xtty,
-                                      type(), nargs, args, witness);
+    Dependencies::write_dependency_to(xtty, type(), args, witness);
   }
+  guarantee(argslen == args->length(), "args array cannot grow inside nested ResoureMark scope");
 }
 
 void Dependencies::DepStream::print_dependency(Klass* witness, bool verbose) {
+  ResourceMark rm;
   int nargs = argument_count();
-  DepArgument args[max_arg_count];
+  GrowableArray<DepArgument>* args = new GrowableArray<DepArgument>(nargs);
   for (int j = 0; j < nargs; j++) {
-    args[j] = argument(j);
+    args->push(argument(j));
   }
-  Dependencies::print_dependency(type(), nargs, args, witness);
+  int argslen = args->length();
+  Dependencies::print_dependency(type(), args, witness);
   if (verbose) {
     if (_code != NULL) {
       tty->print("  code: ");
@@ -603,6 +620,7 @@
       tty->cr();
     }
   }
+  guarantee(argslen == args->length(), "args array cannot grow inside nested ResoureMark scope");
 }
 
 
--- a/src/share/vm/code/dependencies.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/code/dependencies.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -368,20 +368,36 @@
   void copy_to(nmethod* nm);
 
   void log_all_dependencies();
-  void log_dependency(DepType dept, int nargs, ciBaseObject* args[]) {
-    write_dependency_to(log(), dept, nargs, args);
+
+  void log_dependency(DepType dept, GrowableArray<ciBaseObject*>* args) {
+    ResourceMark rm;
+    int argslen = args->length();
+    write_dependency_to(log(), dept, args);
+    guarantee(argslen == args->length(),
+              "args array cannot grow inside nested ResoureMark scope");
   }
+
   void log_dependency(DepType dept,
                       ciBaseObject* x0,
                       ciBaseObject* x1 = NULL,
                       ciBaseObject* x2 = NULL) {
-    if (log() == NULL)  return;
-    ciBaseObject* args[max_arg_count];
-    args[0] = x0;
-    args[1] = x1;
-    args[2] = x2;
-    assert(2 < max_arg_count, "");
-    log_dependency(dept, dep_args(dept), args);
+    if (log() == NULL) {
+      return;
+    }
+    ResourceMark rm;
+    GrowableArray<ciBaseObject*>* ciargs =
+                new GrowableArray<ciBaseObject*>(dep_args(dept));
+    assert (x0 != NULL, "no log x0");
+    ciargs->push(x0);
+
+    if (x1 != NULL) {
+      ciargs->push(x1);
+    }
+    if (x2 != NULL) {
+      ciargs->push(x2);
+    }
+    assert(ciargs->length() == dep_args(dept), "");
+    log_dependency(dept, ciargs);
   }
 
   class DepArgument : public ResourceObj {
@@ -404,20 +420,8 @@
     Metadata* metadata_value() const { assert(!_is_oop && _valid, "must be"); return (Metadata*) _value; }
   };
 
-  static void write_dependency_to(CompileLog* log,
-                                  DepType dept,
-                                  int nargs, ciBaseObject* args[],
-                                  Klass* witness = NULL);
-  static void write_dependency_to(CompileLog* log,
-                                  DepType dept,
-                                  int nargs, DepArgument args[],
-                                  Klass* witness = NULL);
-  static void write_dependency_to(xmlStream* xtty,
-                                  DepType dept,
-                                  int nargs, DepArgument args[],
-                                  Klass* witness = NULL);
   static void print_dependency(DepType dept,
-                               int nargs, DepArgument args[],
+                               GrowableArray<DepArgument>* args,
                                Klass* witness = NULL);
 
  private:
@@ -426,6 +430,18 @@
 
   static Klass* ctxk_encoded_as_null(DepType dept, Metadata* x);
 
+  static void write_dependency_to(CompileLog* log,
+                                  DepType dept,
+                                  GrowableArray<ciBaseObject*>* args,
+                                  Klass* witness = NULL);
+  static void write_dependency_to(CompileLog* log,
+                                  DepType dept,
+                                  GrowableArray<DepArgument>* args,
+                                  Klass* witness = NULL);
+  static void write_dependency_to(xmlStream* xtty,
+                                  DepType dept,
+                                  GrowableArray<DepArgument>* args,
+                                  Klass* witness = NULL);
  public:
   // Use this to iterate over an nmethod's dependency set.
   // Works on new and old dependency sets.
--- a/src/share/vm/code/nmethod.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/code/nmethod.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -49,6 +49,8 @@
 
 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
 
+unsigned char nmethod::_global_unloading_clock = 0;
+
 #ifdef DTRACE_ENABLED
 
 // Only bother with this argument setup if dtrace is available
@@ -384,27 +386,30 @@
   set_exception_cache(new_entry);
 }
 
-void nmethod::remove_from_exception_cache(ExceptionCache* ec) {
+void nmethod::clean_exception_cache(BoolObjectClosure* is_alive) {
   ExceptionCache* prev = NULL;
   ExceptionCache* curr = exception_cache();
-  assert(curr != NULL, "nothing to remove");
-  // find the previous and next entry of ec
-  while (curr != ec) {
-    prev = curr;
-    curr = curr->next();
-    assert(curr != NULL, "ExceptionCache not found");
+
+  while (curr != NULL) {
+    ExceptionCache* next = curr->next();
+
+    Klass* ex_klass = curr->exception_type();
+    if (ex_klass != NULL && !ex_klass->is_loader_alive(is_alive)) {
+      if (prev == NULL) {
+        set_exception_cache(next);
+      } else {
+        prev->set_next(next);
+      }
+      delete curr;
+      // prev stays the same.
+    } else {
+      prev = curr;
+    }
+
+    curr = next;
   }
-  // now: curr == ec
-  ExceptionCache* next = curr->next();
-  if (prev == NULL) {
-    set_exception_cache(next);
-  } else {
-    prev->set_next(next);
-  }
-  delete curr;
 }
 
-
 // public method for accessing the exception cache
 // These are the public access methods.
 address nmethod::handler_for_exception_and_pc(Handle exception, address pc) {
@@ -463,6 +468,7 @@
 // Fill in default values for various flag fields
 void nmethod::init_defaults() {
   _state                      = in_use;
+  _unloading_clock            = 0;
   _marked_for_reclamation     = 0;
   _has_flushed_dependencies   = 0;
   _has_unsafe_access          = 0;
@@ -481,7 +487,11 @@
   _oops_do_mark_link       = NULL;
   _jmethod_id              = NULL;
   _osr_link                = NULL;
-  _scavenge_root_link      = NULL;
+  if (UseG1GC) {
+    _unloading_next        = NULL;
+  } else {
+    _scavenge_root_link    = NULL;
+  }
   _scavenge_root_state     = 0;
   _compiler                = NULL;
 #if INCLUDE_RTM_OPT
@@ -1163,7 +1173,7 @@
     switch(iter.type()) {
       case relocInfo::virtual_call_type:
       case relocInfo::opt_virtual_call_type: {
-        CompiledIC *ic = CompiledIC_at(iter.reloc());
+        CompiledIC *ic = CompiledIC_at(&iter);
         // Ok, to lookup references to zombies here
         CodeBlob *cb = CodeCache::find_blob_unsafe(ic->ic_destination());
         if( cb != NULL && cb->is_nmethod() ) {
@@ -1187,6 +1197,77 @@
   }
 }
 
+void nmethod::verify_clean_inline_caches() {
+  assert_locked_or_safepoint(CompiledIC_lock);
+
+  // If the method is not entrant or zombie then a JMP is plastered over the
+  // first few bytes.  If an oop in the old code was there, that oop
+  // should not get GC'd.  Skip the first few bytes of oops on
+  // not-entrant methods.
+  address low_boundary = verified_entry_point();
+  if (!is_in_use()) {
+    low_boundary += NativeJump::instruction_size;
+    // %%% Note:  On SPARC we patch only a 4-byte trap, not a full NativeJump.
+    // This means that the low_boundary is going to be a little too high.
+    // This shouldn't matter, since oops of non-entrant methods are never used.
+    // In fact, why are we bothering to look at oops in a non-entrant method??
+  }
+
+  ResourceMark rm;
+  RelocIterator iter(this, low_boundary);
+  while(iter.next()) {
+    switch(iter.type()) {
+      case relocInfo::virtual_call_type:
+      case relocInfo::opt_virtual_call_type: {
+        CompiledIC *ic = CompiledIC_at(&iter);
+        // Ok, to lookup references to zombies here
+        CodeBlob *cb = CodeCache::find_blob_unsafe(ic->ic_destination());
+        if( cb != NULL && cb->is_nmethod() ) {
+          nmethod* nm = (nmethod*)cb;
+          // Verify that inline caches pointing to both zombie and not_entrant methods are clean
+          if (!nm->is_in_use() || (nm->method()->code() != nm)) {
+            assert(ic->is_clean(), "IC should be clean");
+          }
+        }
+        break;
+      }
+      case relocInfo::static_call_type: {
+        CompiledStaticCall *csc = compiledStaticCall_at(iter.reloc());
+        CodeBlob *cb = CodeCache::find_blob_unsafe(csc->destination());
+        if( cb != NULL && cb->is_nmethod() ) {
+          nmethod* nm = (nmethod*)cb;
+          // Verify that inline caches pointing to both zombie and not_entrant methods are clean
+          if (!nm->is_in_use() || (nm->method()->code() != nm)) {
+            assert(csc->is_clean(), "IC should be clean");
+          }
+        }
+        break;
+      }
+    }
+  }
+}
+
+int nmethod::verify_icholder_relocations() {
+  int count = 0;
+
+  RelocIterator iter(this);
+  while(iter.next()) {
+    if (iter.type() == relocInfo::virtual_call_type) {
+      if (CompiledIC::is_icholder_call_site(iter.virtual_call_reloc())) {
+        CompiledIC *ic = CompiledIC_at(&iter);
+        if (TraceCompiledIC) {
+          tty->print("noticed icholder " INTPTR_FORMAT " ", p2i(ic->cached_icholder()));
+          ic->print();
+        }
+        assert(ic->cached_icholder() != NULL, "must be non-NULL");
+        count++;
+      }
+    }
+  }
+
+  return count;
+}
+
 // This is a private interface with the sweeper.
 void nmethod::mark_as_seen_on_stack() {
   assert(is_alive(), "Must be an alive method");
@@ -1219,6 +1300,23 @@
   mdo->inc_decompile_count();
 }
 
+void nmethod::increase_unloading_clock() {
+  _global_unloading_clock++;
+  if (_global_unloading_clock == 0) {
+    // _nmethods are allocated with _unloading_clock == 0,
+    // so 0 is never used as a clock value.
+    _global_unloading_clock = 1;
+  }
+}
+
+void nmethod::set_unloading_clock(unsigned char unloading_clock) {
+  OrderAccess::release_store((volatile jubyte*)&_unloading_clock, unloading_clock);
+}
+
+unsigned char nmethod::unloading_clock() {
+  return (unsigned char)OrderAccess::load_acquire((volatile jubyte*)&_unloading_clock);
+}
+
 void nmethod::make_unloaded(BoolObjectClosure* is_alive, oop cause) {
 
   post_compiled_method_unload();
@@ -1264,6 +1362,10 @@
     // for later on.
     CodeCache::set_needs_cache_clean(true);
   }
+
+  // Unregister must be done before the state change
+  Universe::heap()->unregister_nmethod(this);
+
   _state = unloaded;
 
   // Log the unloading.
@@ -1618,6 +1720,35 @@
   set_unload_reported();
 }
 
+void static clean_ic_if_metadata_is_dead(CompiledIC *ic, BoolObjectClosure *is_alive) {
+  if (ic->is_icholder_call()) {
+    // The only exception is compiledICHolder oops which may
+    // yet be marked below. (We check this further below).
+    CompiledICHolder* cichk_oop = ic->cached_icholder();
+    if (cichk_oop->holder_method()->method_holder()->is_loader_alive(is_alive) &&
+        cichk_oop->holder_klass()->is_loader_alive(is_alive)) {
+      return;
+    }
+  } else {
+    Metadata* ic_oop = ic->cached_metadata();
+    if (ic_oop != NULL) {
+      if (ic_oop->is_klass()) {
+        if (((Klass*)ic_oop)->is_loader_alive(is_alive)) {
+          return;
+        }
+      } else if (ic_oop->is_method()) {
+        if (((Method*)ic_oop)->method_holder()->is_loader_alive(is_alive)) {
+          return;
+        }
+      } else {
+        ShouldNotReachHere();
+      }
+    }
+  }
+
+  ic->set_to_clean();
+}
+
 // This is called at the end of the strong tracing/marking phase of a
 // GC to unload an nmethod if it contains otherwise unreachable
 // oops.
@@ -1650,15 +1781,7 @@
   }
 
   // Exception cache
-  ExceptionCache* ec = exception_cache();
-  while (ec != NULL) {
-    Klass* ex_klass = ec->exception_type();
-    ExceptionCache* next_ec = ec->next();
-    if (ex_klass != NULL && !ex_klass->is_loader_alive(is_alive)) {
-      remove_from_exception_cache(ec);
-    }
-    ec = next_ec;
-  }
+  clean_exception_cache(is_alive);
 
   // If class unloading occurred we first iterate over all inline caches and
   // clear ICs where the cached oop is referring to an unloaded klass or method.
@@ -1668,32 +1791,8 @@
     RelocIterator iter(this, low_boundary);
     while(iter.next()) {
       if (iter.type() == relocInfo::virtual_call_type) {
-        CompiledIC *ic = CompiledIC_at(iter.reloc());
-        if (ic->is_icholder_call()) {
-          // The only exception is compiledICHolder oops which may
-          // yet be marked below. (We check this further below).
-          CompiledICHolder* cichk_oop = ic->cached_icholder();
-          if (cichk_oop->holder_method()->method_holder()->is_loader_alive(is_alive) &&
-              cichk_oop->holder_klass()->is_loader_alive(is_alive)) {
-            continue;
-          }
-        } else {
-          Metadata* ic_oop = ic->cached_metadata();
-          if (ic_oop != NULL) {
-            if (ic_oop->is_klass()) {
-              if (((Klass*)ic_oop)->is_loader_alive(is_alive)) {
-                continue;
-              }
-            } else if (ic_oop->is_method()) {
-              if (((Method*)ic_oop)->method_holder()->is_loader_alive(is_alive)) {
-                continue;
-              }
-            } else {
-              ShouldNotReachHere();
-            }
-          }
-        }
-        ic->set_to_clean();
+        CompiledIC *ic = CompiledIC_at(&iter);
+        clean_ic_if_metadata_is_dead(ic, is_alive);
       }
     }
   }
@@ -1731,6 +1830,175 @@
   verify_metadata_loaders(low_boundary, is_alive);
 }
 
+template <class CompiledICorStaticCall>
+static bool clean_if_nmethod_is_unloaded(CompiledICorStaticCall *ic, address addr, BoolObjectClosure *is_alive, nmethod* from) {
+  // Ok, to lookup references to zombies here
+  CodeBlob *cb = CodeCache::find_blob_unsafe(addr);
+  if (cb != NULL && cb->is_nmethod()) {
+    nmethod* nm = (nmethod*)cb;
+
+    if (nm->unloading_clock() != nmethod::global_unloading_clock()) {
+      // The nmethod has not been processed yet.
+      return true;
+    }
+
+    // Clean inline caches pointing to both zombie and not_entrant methods
+    if (!nm->is_in_use() || (nm->method()->code() != nm)) {
+      ic->set_to_clean();
+      assert(ic->is_clean(), err_msg("nmethod " PTR_FORMAT "not clean %s", from, from->method()->name_and_sig_as_C_string()));
+    }
+  }
+
+  return false;
+}
+
+static bool clean_if_nmethod_is_unloaded(CompiledIC *ic, BoolObjectClosure *is_alive, nmethod* from) {
+  return clean_if_nmethod_is_unloaded(ic, ic->ic_destination(), is_alive, from);
+}
+
+static bool clean_if_nmethod_is_unloaded(CompiledStaticCall *csc, BoolObjectClosure *is_alive, nmethod* from) {
+  return clean_if_nmethod_is_unloaded(csc, csc->destination(), is_alive, from);
+}
+
+bool nmethod::do_unloading_parallel(BoolObjectClosure* is_alive, bool unloading_occurred) {
+  ResourceMark rm;
+
+  // Make sure the oop's ready to receive visitors
+  assert(!is_zombie() && !is_unloaded(),
+         "should not call follow on zombie or unloaded nmethod");
+
+  // If the method is not entrant then a JMP is plastered over the
+  // first few bytes.  If an oop in the old code was there, that oop
+  // should not get GC'd.  Skip the first few bytes of oops on
+  // not-entrant methods.
+  address low_boundary = verified_entry_point();
+  if (is_not_entrant()) {
+    low_boundary += NativeJump::instruction_size;
+    // %%% Note:  On SPARC we patch only a 4-byte trap, not a full NativeJump.
+    // (See comment above.)
+  }
+
+  // The RedefineClasses() API can cause the class unloading invariant
+  // to no longer be true. See jvmtiExport.hpp for details.
+  // Also, leave a debugging breadcrumb in local flag.
+  bool a_class_was_redefined = JvmtiExport::has_redefined_a_class();
+  if (a_class_was_redefined) {
+    // This set of the unloading_occurred flag is done before the
+    // call to post_compiled_method_unload() so that the unloading
+    // of this nmethod is reported.
+    unloading_occurred = true;
+  }
+
+  // Exception cache
+  clean_exception_cache(is_alive);
+
+  bool is_unloaded = false;
+  bool postponed = false;
+
+  RelocIterator iter(this, low_boundary);
+  while(iter.next()) {
+
+    switch (iter.type()) {
+
+    case relocInfo::virtual_call_type:
+      if (unloading_occurred) {
+        // If class unloading occurred we first iterate over all inline caches and
+        // clear ICs where the cached oop is referring to an unloaded klass or method.
+        clean_ic_if_metadata_is_dead(CompiledIC_at(&iter), is_alive);
+      }
+
+      postponed |= clean_if_nmethod_is_unloaded(CompiledIC_at(&iter), is_alive, this);
+      break;
+
+    case relocInfo::opt_virtual_call_type:
+      postponed |= clean_if_nmethod_is_unloaded(CompiledIC_at(&iter), is_alive, this);
+      break;
+
+    case relocInfo::static_call_type:
+      postponed |= clean_if_nmethod_is_unloaded(compiledStaticCall_at(iter.reloc()), is_alive, this);
+      break;
+
+    case relocInfo::oop_type:
+      if (!is_unloaded) {
+        // Unload check
+        oop_Relocation* r = iter.oop_reloc();
+        // Traverse those oops directly embedded in the code.
+        // Other oops (oop_index>0) are seen as part of scopes_oops.
+        assert(1 == (r->oop_is_immediate()) +
+                  (r->oop_addr() >= oops_begin() && r->oop_addr() < oops_end()),
+              "oop must be found in exactly one place");
+        if (r->oop_is_immediate() && r->oop_value() != NULL) {
+          if (can_unload(is_alive, r->oop_addr(), unloading_occurred)) {
+            is_unloaded = true;
+          }
+        }
+      }
+      break;
+
+    }
+  }
+
+  if (is_unloaded) {
+    return postponed;
+  }
+
+  // Scopes
+  for (oop* p = oops_begin(); p < oops_end(); p++) {
+    if (*p == Universe::non_oop_word())  continue;  // skip non-oops
+    if (can_unload(is_alive, p, unloading_occurred)) {
+      is_unloaded = true;
+      break;
+    }
+  }
+
+  if (is_unloaded) {
+    return postponed;
+  }
+
+  // Ensure that all metadata is still alive
+  verify_metadata_loaders(low_boundary, is_alive);
+
+  return postponed;
+}
+
+void nmethod::do_unloading_parallel_postponed(BoolObjectClosure* is_alive, bool unloading_occurred) {
+  ResourceMark rm;
+
+  // Make sure the oop's ready to receive visitors
+  assert(!is_zombie(),
+         "should not call follow on zombie nmethod");
+
+  // If the method is not entrant then a JMP is plastered over the
+  // first few bytes.  If an oop in the old code was there, that oop
+  // should not get GC'd.  Skip the first few bytes of oops on
+  // not-entrant methods.
+  address low_boundary = verified_entry_point();
+  if (is_not_entrant()) {
+    low_boundary += NativeJump::instruction_size;
+    // %%% Note:  On SPARC we patch only a 4-byte trap, not a full NativeJump.
+    // (See comment above.)
+  }
+
+  RelocIterator iter(this, low_boundary);
+  while(iter.next()) {
+
+    switch (iter.type()) {
+
+    case relocInfo::virtual_call_type:
+      clean_if_nmethod_is_unloaded(CompiledIC_at(&iter), is_alive, this);
+      break;
+
+    case relocInfo::opt_virtual_call_type:
+      clean_if_nmethod_is_unloaded(CompiledIC_at(&iter), is_alive, this);
+      break;
+
+    case relocInfo::static_call_type:
+      clean_if_nmethod_is_unloaded(compiledStaticCall_at(iter.reloc()), is_alive, this);
+      break;
+    }
+  }
+}
+
 #ifdef ASSERT
 
 class CheckClass : AllStatic {
@@ -1777,7 +2045,7 @@
     // compiled code is maintaining a link to dead metadata.
     address static_call_addr = NULL;
     if (iter.type() == relocInfo::opt_virtual_call_type) {
-      CompiledIC* cic = CompiledIC_at(iter.reloc());
+      CompiledIC* cic = CompiledIC_at(&iter);
       if (!cic->is_call_to_interpreted()) {
         static_call_addr = iter.addr();
       }
@@ -1829,7 +2097,7 @@
         }
       } else if (iter.type() == relocInfo::virtual_call_type) {
         // Check compiledIC holders associated with this nmethod
-        CompiledIC *ic = CompiledIC_at(iter.reloc());
+        CompiledIC *ic = CompiledIC_at(&iter);
         if (ic->is_icholder_call()) {
           CompiledICHolder* cichk = ic->cached_icholder();
           f(cichk->holder_method());
@@ -1947,7 +2215,7 @@
     assert(cur != NULL, "not NULL-terminated");
     nmethod* next = cur->_oops_do_mark_link;
     cur->_oops_do_mark_link = NULL;
-    cur->fix_oop_relocations();
+    cur->verify_oop_relocations();
     NOT_PRODUCT(if (TraceScavenge)  cur->print_on(tty, "oops_do, unmark"));
     cur = next;
   }
@@ -2489,6 +2757,10 @@
 };
 
 void nmethod::verify_scavenge_root_oops() {
+  if (UseG1GC) {
+    return;
+  }
+
   if (!on_scavenge_root_list()) {
     // Actually look inside, to verify the claim that it's clean.
     DebugScavengeRoot debug_scavenge_root(this);
@@ -2932,7 +3204,7 @@
     case relocInfo::virtual_call_type:
     case relocInfo::opt_virtual_call_type: {
       VerifyMutexLocker mc(CompiledIC_lock);
-      CompiledIC_at(iter.reloc())->print();
+      CompiledIC_at(&iter)->print();
       break;
     }
     case relocInfo::static_call_type:
--- a/src/share/vm/code/nmethod.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/code/nmethod.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -116,6 +116,11 @@
   friend class NMethodSweeper;
   friend class CodeCache;  // scavengable oops
  private:
+
+  // GC support to help figure out if an nmethod has been
+  // cleaned/unloaded by the current GC.
+  static unsigned char _global_unloading_clock;
+
   // Shared fields for all nmethod's
   Method*   _method;
   int       _entry_bci;        // != InvocationEntryBci if this nmethod is an on-stack replacement method
@@ -123,7 +128,13 @@
 
   // To support simple linked-list chaining of nmethods:
   nmethod*  _osr_link;         // from InstanceKlass::osr_nmethods_head
-  nmethod*  _scavenge_root_link; // from CodeCache::scavenge_root_nmethods
+
+  union {
+    // Used by G1 to chain nmethods.
+    nmethod* _unloading_next;
+    // Used by non-G1 GCs to chain nmethods.
+    nmethod* _scavenge_root_link; // from CodeCache::scavenge_root_nmethods
+  };
 
   static nmethod* volatile _oops_do_mark_nmethods;
   nmethod*        volatile _oops_do_mark_link;
@@ -185,6 +196,8 @@
   // Protected by Patching_lock
   volatile unsigned char _state;             // {alive, not_entrant, zombie, unloaded}
 
+  volatile unsigned char _unloading_clock;   // Incremented after GC unloaded/cleaned the nmethod
+
 #ifdef ASSERT
   bool _oops_are_stale;  // indicates that it's no longer safe to access oops section
 #endif
@@ -442,6 +455,15 @@
   bool  unload_reported()                         { return _unload_reported; }
   void  set_unload_reported()                     { _unload_reported = true; }
 
+  void set_unloading_next(nmethod* next)          { _unloading_next = next; }
+  nmethod* unloading_next()                       { return _unloading_next; }
+
+  static unsigned char global_unloading_clock()   { return _global_unloading_clock; }
+  static void increase_unloading_clock();
+
+  void set_unloading_clock(unsigned char unloading_clock);
+  unsigned char unloading_clock();
+
   bool  is_marked_for_deoptimization() const      { return _marked_for_deoptimization; }
   void  mark_for_deoptimization()                 { _marked_for_deoptimization = true; }
 
@@ -534,7 +556,7 @@
   void set_exception_cache(ExceptionCache *ec)    { _exception_cache = ec; }
   address handler_for_exception_and_pc(Handle exception, address pc);
   void add_handler_for_exception_and_pc(Handle exception, address pc, address handler);
-  void remove_from_exception_cache(ExceptionCache* ec);
+  void clean_exception_cache(BoolObjectClosure* is_alive);
 
   // implicit exceptions support
   address continuation_for_implicit_exception(address pc);
@@ -557,6 +579,10 @@
     return (addr >= code_begin() && addr < verified_entry_point());
   }
 
+  // Verify calls to dead methods have been cleaned.
+  void verify_clean_inline_caches();
+  // Verify and count cached icholder relocations.
+  int  verify_icholder_relocations();
   // Check that all metadata is still alive
   void verify_metadata_loaders(address low_boundary, BoolObjectClosure* is_alive);
 
@@ -582,6 +608,10 @@
 
   // GC support
   void do_unloading(BoolObjectClosure* is_alive, bool unloading_occurred);
+  //  The parallel versions are used by G1.
+  bool do_unloading_parallel(BoolObjectClosure* is_alive, bool unloading_occurred);
+  void do_unloading_parallel_postponed(BoolObjectClosure* is_alive, bool unloading_occurred);
+  //  Unload a nmethod if the *root object is dead.
   bool can_unload(BoolObjectClosure* is_alive, oop* root, bool unloading_occurred);
 
   void preserve_callee_argument_oops(frame fr, const RegisterMap *reg_map,
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_CMSOOPCLOSURES_HPP
 
 #include "memory/genOopClosures.hpp"
+#include "memory/iterator.hpp"
 
 /////////////////////////////////////////////////////////////////
 // Closures used by ConcurrentMarkSweepGeneration's collector
@@ -48,33 +49,13 @@
     }                                                     \
   }
 
-// Applies the given oop closure to all oops in all klasses visited.
-class CMKlassClosure : public KlassClosure {
-  friend class CMSOopClosure;
-  friend class CMSOopsInGenClosure;
-
-  OopClosure* _oop_closure;
-
-  // Used when _oop_closure couldn't be set in an initialization list.
-  void initialize(OopClosure* oop_closure) {
-    assert(_oop_closure == NULL, "Should only be called once");
-    _oop_closure = oop_closure;
-  }
+// TODO: This duplication of the MetadataAwareOopClosure class is only needed
+//       because some CMS OopClosures derive from OopsInGenClosure. It would be
+//       good to get rid of them completely.
+class MetadataAwareOopsInGenClosure: public OopsInGenClosure {
+  KlassToOopClosure _klass_closure;
  public:
-  CMKlassClosure(OopClosure* oop_closure = NULL) : _oop_closure(oop_closure) { }
-
-  void do_klass(Klass* k);
-};
-
-// The base class for all CMS marking closures.
-// It's used to proxy through the metadata to the oops defined in them.
-class CMSOopClosure: public ExtendedOopClosure {
-  CMKlassClosure      _klass_closure;
- public:
-  CMSOopClosure() : ExtendedOopClosure() {
-    _klass_closure.initialize(this);
-  }
-  CMSOopClosure(ReferenceProcessor* rp) : ExtendedOopClosure(rp) {
+  MetadataAwareOopsInGenClosure() {
     _klass_closure.initialize(this);
   }
 
@@ -87,26 +68,7 @@
   virtual void do_class_loader_data(ClassLoaderData* cld);
 };
 
-// TODO: This duplication of the CMSOopClosure class is only needed because
-//       some CMS OopClosures derive from OopsInGenClosure. It would be good
-//       to get rid of them completely.
-class CMSOopsInGenClosure: public OopsInGenClosure {
-  CMKlassClosure _klass_closure;
- public:
-  CMSOopsInGenClosure() {
-    _klass_closure.initialize(this);
-  }
-
-  virtual bool do_metadata()    { return do_metadata_nv(); }
-  inline  bool do_metadata_nv() { return true; }
-
-  virtual void do_klass(Klass* k);
-  void do_klass_nv(Klass* k);
-
-  virtual void do_class_loader_data(ClassLoaderData* cld);
-};
-
-class MarkRefsIntoClosure: public CMSOopsInGenClosure {
+class MarkRefsIntoClosure: public MetadataAwareOopsInGenClosure {
  private:
   const MemRegion _span;
   CMSBitMap*      _bitMap;
@@ -122,7 +84,7 @@
   }
 };
 
-class Par_MarkRefsIntoClosure: public CMSOopsInGenClosure {
+class Par_MarkRefsIntoClosure: public MetadataAwareOopsInGenClosure {
  private:
   const MemRegion _span;
   CMSBitMap*      _bitMap;
@@ -140,7 +102,7 @@
 
 // A variant of the above used in certain kinds of CMS
 // marking verification.
-class MarkRefsIntoVerifyClosure: public CMSOopsInGenClosure {
+class MarkRefsIntoVerifyClosure: public MetadataAwareOopsInGenClosure {
  private:
   const MemRegion _span;
   CMSBitMap*      _verification_bm;
@@ -159,7 +121,7 @@
 };
 
 // The non-parallel version (the parallel version appears further below).
-class PushAndMarkClosure: public CMSOopClosure {
+class PushAndMarkClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector* _collector;
   MemRegion     _span;
@@ -193,7 +155,7 @@
 // synchronization (for instance, via CAS). The marking stack
 // used in the non-parallel case above is here replaced with
 // an OopTaskQueue structure to allow efficient work stealing.
-class Par_PushAndMarkClosure: public CMSOopClosure {
+class Par_PushAndMarkClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector* _collector;
   MemRegion     _span;
@@ -218,7 +180,7 @@
 };
 
 // The non-parallel version (the parallel version appears further below).
-class MarkRefsIntoAndScanClosure: public CMSOopsInGenClosure {
+class MarkRefsIntoAndScanClosure: public MetadataAwareOopsInGenClosure {
  private:
   MemRegion          _span;
   CMSBitMap*         _bit_map;
@@ -262,7 +224,7 @@
 // stack and the bitMap are shared, so access needs to be suitably
 // sycnhronized. An OopTaskQueue structure, supporting efficient
 // workstealing, replaces a CMSMarkStack for storing grey objects.
-class Par_MarkRefsIntoAndScanClosure: public CMSOopsInGenClosure {
+class Par_MarkRefsIntoAndScanClosure: public MetadataAwareOopsInGenClosure {
  private:
   MemRegion              _span;
   CMSBitMap*             _bit_map;
@@ -291,7 +253,7 @@
 // This closure is used during the concurrent marking phase
 // following the first checkpoint. Its use is buried in
 // the closure MarkFromRootsClosure.
-class PushOrMarkClosure: public CMSOopClosure {
+class PushOrMarkClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector*   _collector;
   MemRegion       _span;
@@ -324,7 +286,7 @@
 // This closure is used during the concurrent marking phase
 // following the first checkpoint. Its use is buried in
 // the closure Par_MarkFromRootsClosure.
-class Par_PushOrMarkClosure: public CMSOopClosure {
+class Par_PushOrMarkClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector*    _collector;
   MemRegion        _whole_span;
@@ -364,7 +326,7 @@
 // processing phase of the CMS final checkpoint step, as
 // well as during the concurrent precleaning of the discovered
 // reference lists.
-class CMSKeepAliveClosure: public CMSOopClosure {
+class CMSKeepAliveClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector* _collector;
   const MemRegion _span;
@@ -384,7 +346,7 @@
   inline void do_oop_nv(narrowOop* p) { CMSKeepAliveClosure::do_oop_work(p); }
 };
 
-class CMSInnerParMarkAndPushClosure: public CMSOopClosure {
+class CMSInnerParMarkAndPushClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector* _collector;
   MemRegion     _span;
@@ -405,7 +367,7 @@
 // A parallel (MT) version of the above, used when
 // reference processing is parallel; the only difference
 // is in the do_oop method.
-class CMSParKeepAliveClosure: public CMSOopClosure {
+class CMSParKeepAliveClosure: public MetadataAwareOopClosure {
  private:
   MemRegion     _span;
   OopTaskQueue* _work_queue;
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.inline.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -44,33 +44,20 @@
   }
 }
 
-// CMSOopClosure and CMSoopsInGenClosure are duplicated,
+// MetadataAwareOopClosure and MetadataAwareOopsInGenClosure are duplicated,
 // until we get rid of OopsInGenClosure.
 
-inline void CMSOopClosure::do_klass(Klass* k)       { do_klass_nv(k); }
-inline void CMSOopsInGenClosure::do_klass(Klass* k) { do_klass_nv(k); }
-
-inline void CMSOopClosure::do_klass_nv(Klass* k) {
+inline void MetadataAwareOopsInGenClosure::do_klass_nv(Klass* k) {
   ClassLoaderData* cld = k->class_loader_data();
   do_class_loader_data(cld);
 }
-inline void CMSOopsInGenClosure::do_klass_nv(Klass* k) {
-  ClassLoaderData* cld = k->class_loader_data();
-  do_class_loader_data(cld);
-}
+inline void MetadataAwareOopsInGenClosure::do_klass(Klass* k) { do_klass_nv(k); }
 
-inline void CMSOopClosure::do_class_loader_data(ClassLoaderData* cld) {
-  assert(_klass_closure._oop_closure == this, "Must be");
-
-  bool claim = true;  // Must claim the class loader data before processing.
-  cld->oops_do(_klass_closure._oop_closure, &_klass_closure, claim);
-}
-inline void CMSOopsInGenClosure::do_class_loader_data(ClassLoaderData* cld) {
+inline void MetadataAwareOopsInGenClosure::do_class_loader_data(ClassLoaderData* cld) {
   assert(_klass_closure._oop_closure == this, "Must be");
 
   bool claim = true;  // Must claim the class loader data before processing.
   cld->oops_do(_klass_closure._oop_closure, &_klass_closure, claim);
 }
 
-
 #endif // SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_CMSOOPCLOSURES_INLINE_HPP
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -795,53 +795,6 @@
   }
 }
 
-// Apply the given closure to each oop in the space \intersect memory region.
-void CompactibleFreeListSpace::oop_iterate(MemRegion mr, ExtendedOopClosure* cl) {
-  assert_lock_strong(freelistLock());
-  if (is_empty()) {
-    return;
-  }
-  MemRegion cur = MemRegion(bottom(), end());
-  mr = mr.intersection(cur);
-  if (mr.is_empty()) {
-    return;
-  }
-  if (mr.equals(cur)) {
-    oop_iterate(cl);
-    return;
-  }
-  assert(mr.end() <= end(), "just took an intersection above");
-  HeapWord* obj_addr = block_start(mr.start());
-  HeapWord* t = mr.end();
-
-  SpaceMemRegionOopsIterClosure smr_blk(cl, mr);
-  if (block_is_obj(obj_addr)) {
-    // Handle first object specially.
-    oop obj = oop(obj_addr);
-    obj_addr += adjustObjectSize(obj->oop_iterate(&smr_blk));
-  } else {
-    FreeChunk* fc = (FreeChunk*)obj_addr;
-    obj_addr += fc->size();
-  }
-  while (obj_addr < t) {
-    HeapWord* obj = obj_addr;
-    obj_addr += block_size(obj_addr);
-    // If "obj_addr" is not greater than top, then the
-    // entire object "obj" is within the region.
-    if (obj_addr <= t) {
-      if (block_is_obj(obj)) {
-        oop(obj)->oop_iterate(cl);
-      }
-    } else {
-      // "obj" extends beyond end of region
-      if (block_is_obj(obj)) {
-        oop(obj)->oop_iterate(&smr_blk);
-      }
-      break;
-    }
-  }
-}
-
 // NOTE: In the following methods, in order to safely be able to
 // apply the closure to an object, we need to be sure that the
 // object has been initialized. We are guaranteed that an object
@@ -900,42 +853,60 @@
                                                   UpwardsObjectClosure* cl) {
   assert_locked(freelistLock());
   NOT_PRODUCT(verify_objects_initialized());
-  Space::object_iterate_mem(mr, cl);
+  assert(!mr.is_empty(), "Should be non-empty");
+  // We use MemRegion(bottom(), end()) rather than used_region() below
+  // because the two are not necessarily equal for some kinds of
+  // spaces, in particular, certain kinds of free list spaces.
+  // We could use the more complicated but more precise:
+  // MemRegion(used_region().start(), round_to(used_region().end(), CardSize))
+  // but the slight imprecision seems acceptable in the assertion check.
+  assert(MemRegion(bottom(), end()).contains(mr),
+         "Should be within used space");
+  HeapWord* prev = cl->previous();   // max address from last time
+  if (prev >= mr.end()) { // nothing to do
+    return;
+  }
+  // This assert will not work when we go from cms space to perm
+  // space, and use same closure. Easy fix deferred for later. XXX YSR
+  // assert(prev == NULL || contains(prev), "Should be within space");
+
+  bool last_was_obj_array = false;
+  HeapWord *blk_start_addr, *region_start_addr;
+  if (prev > mr.start()) {
+    region_start_addr = prev;
+    blk_start_addr    = prev;
+    // The previous invocation may have pushed "prev" beyond the
+    // last allocated block yet there may be still be blocks
+    // in this region due to a particular coalescing policy.
+    // Relax the assertion so that the case where the unallocated
+    // block is maintained and "prev" is beyond the unallocated
+    // block does not cause the assertion to fire.
+    assert((BlockOffsetArrayUseUnallocatedBlock &&
+            (!is_in(prev))) ||
+           (blk_start_addr == block_start(region_start_addr)), "invariant");
+  } else {
+    region_start_addr = mr.start();
+    blk_start_addr    = block_start(region_start_addr);
+  }
+  HeapWord* region_end_addr = mr.end();
+  MemRegion derived_mr(region_start_addr, region_end_addr);
+  while (blk_start_addr < region_end_addr) {
+    const size_t size = block_size(blk_start_addr);
+    if (block_is_obj(blk_start_addr)) {
+      last_was_obj_array = cl->do_object_bm(oop(blk_start_addr), derived_mr);
+    } else {
+      last_was_obj_array = false;
+    }
+    blk_start_addr += size;
+  }
+  if (!last_was_obj_array) {
+    assert((bottom() <= blk_start_addr) && (blk_start_addr <= end()),
+           "Should be within (closed) used space");
+    assert(blk_start_addr > prev, "Invariant");
+    cl->set_previous(blk_start_addr); // min address for next time
+  }
 }
 
-// Callers of this iterator beware: The closure application should
-// be robust in the face of uninitialized objects and should (always)
-// return a correct size so that the next addr + size below gives us a
-// valid block boundary. [See for instance,
-// ScanMarkedObjectsAgainCarefullyClosure::do_object_careful()
-// in ConcurrentMarkSweepGeneration.cpp.]
-HeapWord*
-CompactibleFreeListSpace::object_iterate_careful(ObjectClosureCareful* cl) {
-  assert_lock_strong(freelistLock());
-  HeapWord *addr, *last;
-  size_t size;
-  for (addr = bottom(), last  = end();
-       addr < last; addr += size) {
-    FreeChunk* fc = (FreeChunk*)addr;
-    if (fc->is_free()) {
-      // Since we hold the free list lock, which protects direct
-      // allocation in this generation by mutators, a free object
-      // will remain free throughout this iteration code.
-      size = fc->size();
-    } else {
-      // Note that the object need not necessarily be initialized,
-      // because (for instance) the free list lock does NOT protect
-      // object initialization. The closure application below must
-      // therefore be correct in the face of uninitialized objects.
-      size = cl->do_object_careful(oop(addr));
-      if (size == 0) {
-        // An unparsable object found. Signal early termination.
-        return addr;
-      }
-    }
-  }
-  return NULL;
-}
 
 // Callers of this iterator beware: The closure application should
 // be robust in the face of uninitialized objects and should (always)
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -337,10 +337,6 @@
                      unallocated_block() : end());
   }
 
-  bool is_in(const void* p) const {
-    return used_region().contains(p);
-  }
-
   virtual bool is_free_block(const HeapWord* p) const;
 
   // Resizing support
@@ -350,7 +346,6 @@
   Mutex* freelistLock() const { return &_freelistLock; }
 
   // Iteration support
-  void oop_iterate(MemRegion mr, ExtendedOopClosure* cl);
   void oop_iterate(ExtendedOopClosure* cl);
 
   void object_iterate(ObjectClosure* blk);
@@ -363,6 +358,12 @@
   // obj_is_alive() to determine whether it is safe to iterate of
   // an object.
   void safe_object_iterate(ObjectClosure* blk);
+
+  // Iterate over all objects that intersect with mr, calling "cl->do_object"
+  // on each.  There is an exception to this: if this closure has already
+  // been invoked on an object, it may skip such objects in some cases.  This is
+  // Most likely to happen in an "upwards" (ascending address) iteration of
+  // MemRegions.
   void object_iterate_mem(MemRegion mr, UpwardsObjectClosure* cl);
 
   // Requires that "mr" be entirely within the space.
@@ -371,11 +372,8 @@
   // terminate the iteration and return the address of the start of the
   // subregion that isn't done.  Return of "NULL" indicates that the
   // interation completed.
-  virtual HeapWord*
-       object_iterate_careful_m(MemRegion mr,
-                                ObjectClosureCareful* cl);
-  virtual HeapWord*
-       object_iterate_careful(ObjectClosureCareful* cl);
+ HeapWord* object_iterate_careful_m(MemRegion mr,
+                                    ObjectClosureCareful* cl);
 
   // Override: provides a DCTO_CL specific to this kind of space.
   DirtyCardToOopClosure* new_dcto_cl(ExtendedOopClosure* cl,
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -49,7 +49,7 @@
 #include "memory/genCollectedHeap.hpp"
 #include "memory/genMarkSweep.hpp"
 #include "memory/genOopClosures.inline.hpp"
-#include "memory/iterator.hpp"
+#include "memory/iterator.inline.hpp"
 #include "memory/padded.hpp"
 #include "memory/referencePolicy.hpp"
 #include "memory/resourceArea.hpp"
@@ -1570,11 +1570,11 @@
   }
 
   if (MetaspaceGC::should_concurrent_collect()) {
-      if (Verbose && PrintGCDetails) {
+    if (Verbose && PrintGCDetails) {
       gclog_or_tty->print("CMSCollector: collect for metadata allocation ");
-      }
-      return true;
-    }
+    }
+    return true;
+  }
 
   return false;
 }
@@ -3028,22 +3028,21 @@
   HandleMark  hm;
   GenCollectedHeap* gch = GenCollectedHeap::heap();
 
-  // Get a clear set of claim bits for the strong roots processing to work with.
+  // Get a clear set of claim bits for the roots processing to work with.
   ClassLoaderDataGraph::clear_claimed_marks();
 
   // Mark from roots one level into CMS
   MarkRefsIntoClosure notOlder(_span, verification_mark_bm());
   gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
 
-  gch->gen_process_strong_roots(_cmsGen->level(),
-                                true,   // younger gens are roots
-                                true,   // activate StrongRootsScope
-                                false,  // not scavenging
-                                SharedHeap::ScanningOption(roots_scanning_options()),
-                                &notOlder,
-                                true,   // walk code active on stacks
-                                NULL,
-                                NULL); // SSS: Provide correct closure
+  gch->gen_process_roots(_cmsGen->level(),
+                         true,   // younger gens are roots
+                         true,   // activate StrongRootsScope
+                         SharedHeap::ScanningOption(roots_scanning_options()),
+                         should_unload_classes(),
+                         &notOlder,
+                         NULL,
+                         NULL);  // SSS: Provide correct closure
 
   // Now mark from the roots
   MarkFromRootsClosure markFromRootsClosure(this, _span,
@@ -3094,24 +3093,24 @@
   HandleMark  hm;
   GenCollectedHeap* gch = GenCollectedHeap::heap();
 
-  // Get a clear set of claim bits for the strong roots processing to work with.
+  // Get a clear set of claim bits for the roots processing to work with.
   ClassLoaderDataGraph::clear_claimed_marks();
 
   // Mark from roots one level into CMS
   MarkRefsIntoVerifyClosure notOlder(_span, verification_mark_bm(),
                                      markBitMap());
-  CMKlassClosure klass_closure(&notOlder);
+  CLDToOopClosure cld_closure(&notOlder, true);
 
   gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
-  gch->gen_process_strong_roots(_cmsGen->level(),
-                                true,   // younger gens are roots
-                                true,   // activate StrongRootsScope
-                                false,  // not scavenging
-                                SharedHeap::ScanningOption(roots_scanning_options()),
-                                &notOlder,
-                                true,   // walk code active on stacks
-                                NULL,
-                                &klass_closure);
+
+  gch->gen_process_roots(_cmsGen->level(),
+                         true,   // younger gens are roots
+                         true,   // activate StrongRootsScope
+                         SharedHeap::ScanningOption(roots_scanning_options()),
+                         should_unload_classes(),
+                         &notOlder,
+                         NULL,
+                         &cld_closure);
 
   // Now mark from the roots
   MarkFromRootsVerifyClosure markFromRootsClosure(this, _span,
@@ -3172,16 +3171,6 @@
 }
 
 void
-ConcurrentMarkSweepGeneration::oop_iterate(MemRegion mr, ExtendedOopClosure* cl) {
-  if (freelistLock()->owned_by_self()) {
-    Generation::oop_iterate(mr, cl);
-  } else {
-    MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
-    Generation::oop_iterate(mr, cl);
-  }
-}
-
-void
 ConcurrentMarkSweepGeneration::oop_iterate(ExtendedOopClosure* cl) {
   if (freelistLock()->owned_by_self()) {
     Generation::oop_iterate(cl);
@@ -3308,12 +3297,10 @@
 void CMSCollector::setup_cms_unloading_and_verification_state() {
   const  bool should_verify =   VerifyBeforeGC || VerifyAfterGC || VerifyDuringGC
                              || VerifyBeforeExit;
-  const  int  rso           =   SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;
+  const  int  rso           =   SharedHeap::SO_AllCodeCache;
 
   // We set the proper root for this CMS cycle here.
   if (should_unload_classes()) {   // Should unload classes this cycle
-    remove_root_scanning_option(SharedHeap::SO_AllClasses);
-    add_root_scanning_option(SharedHeap::SO_SystemClasses);
     remove_root_scanning_option(rso);  // Shrink the root set appropriately
     set_verifying(should_verify);    // Set verification state for this cycle
     return;                            // Nothing else needs to be done at this time
@@ -3321,8 +3308,6 @@
 
   // Not unloading classes this cycle
   assert(!should_unload_classes(), "Inconsitency!");
-  remove_root_scanning_option(SharedHeap::SO_SystemClasses);
-  add_root_scanning_option(SharedHeap::SO_AllClasses);
 
   if ((!verifying() || unloaded_classes_last_cycle()) && should_verify) {
     // Include symbols, strings and code cache elements to prevent their resurrection.
@@ -3688,12 +3673,6 @@
   ResourceMark rm;
   HandleMark  hm;
 
-  FalseClosure falseClosure;
-  // In the case of a synchronous collection, we will elide the
-  // remark step, so it's important to catch all the nmethod oops
-  // in this step.
-  // The final 'true' flag to gen_process_strong_roots will ensure this.
-  // If 'async' is true, we can relax the nmethod tracing.
   MarkRefsIntoClosure notOlder(_span, &_markBitMap);
   GenCollectedHeap* gch = GenCollectedHeap::heap();
 
@@ -3739,17 +3718,16 @@
       gch->set_par_threads(0);
     } else {
       // The serial version.
-      CMKlassClosure klass_closure(&notOlder);
+      CLDToOopClosure cld_closure(&notOlder, true);
       gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
-      gch->gen_process_strong_roots(_cmsGen->level(),
-                                    true,   // younger gens are roots
-                                    true,   // activate StrongRootsScope
-                                    false,  // not scavenging
-                                    SharedHeap::ScanningOption(roots_scanning_options()),
-                                    &notOlder,
-                                    true,   // walk all of code cache if (so & SO_CodeCache)
-                                    NULL,
-                                    &klass_closure);
+      gch->gen_process_roots(_cmsGen->level(),
+                             true,   // younger gens are roots
+                             true,   // activate StrongRootsScope
+                             SharedHeap::ScanningOption(roots_scanning_options()),
+                             should_unload_classes(),
+                             &notOlder,
+                             NULL,
+                             &cld_closure);
     }
   }
 
@@ -4203,7 +4181,7 @@
   pst->all_tasks_completed();
 }
 
-class Par_ConcMarkingClosure: public CMSOopClosure {
+class Par_ConcMarkingClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector* _collector;
   CMSConcMarkingTask* _task;
@@ -4216,7 +4194,7 @@
  public:
   Par_ConcMarkingClosure(CMSCollector* collector, CMSConcMarkingTask* task, OopTaskQueue* work_queue,
                          CMSBitMap* bit_map, CMSMarkStack* overflow_stack):
-    CMSOopClosure(collector->ref_processor()),
+    MetadataAwareOopClosure(collector->ref_processor()),
     _collector(collector),
     _task(task),
     _span(collector->_span),
@@ -4987,7 +4965,7 @@
 }
 
 class PrecleanKlassClosure : public KlassClosure {
-  CMKlassClosure _cm_klass_closure;
+  KlassToOopClosure _cm_klass_closure;
  public:
   PrecleanKlassClosure(OopClosure* oop_closure) : _cm_klass_closure(oop_closure) {}
   void do_klass(Klass* k) {
@@ -5225,7 +5203,6 @@
   _timer.start();
   GenCollectedHeap* gch = GenCollectedHeap::heap();
   Par_MarkRefsIntoClosure par_mri_cl(_collector->_span, &(_collector->_markBitMap));
-  CMKlassClosure klass_closure(&par_mri_cl);
 
   // ---------- young gen roots --------------
   {
@@ -5241,17 +5218,19 @@
   // ---------- remaining roots --------------
   _timer.reset();
   _timer.start();
-  gch->gen_process_strong_roots(_collector->_cmsGen->level(),
-                                false,     // yg was scanned above
-                                false,     // this is parallel code
-                                false,     // not scavenging
-                                SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
-                                &par_mri_cl,
-                                true,   // walk all of code cache if (so & SO_CodeCache)
-                                NULL,
-                                &klass_closure);
+
+  CLDToOopClosure cld_closure(&par_mri_cl, true);
+
+  gch->gen_process_roots(_collector->_cmsGen->level(),
+                         false,     // yg was scanned above
+                         false,     // this is parallel code
+                         SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
+                         _collector->should_unload_classes(),
+                         &par_mri_cl,
+                         NULL,
+                         &cld_closure);
   assert(_collector->should_unload_classes()
-         || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_CodeCache),
+         || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_AllCodeCache),
          "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
   _timer.stop();
   if (PrintCMSStatistics != 0) {
@@ -5301,7 +5280,7 @@
 };
 
 class RemarkKlassClosure : public KlassClosure {
-  CMKlassClosure _cm_klass_closure;
+  KlassToOopClosure _cm_klass_closure;
  public:
   RemarkKlassClosure(OopClosure* oop_closure) : _cm_klass_closure(oop_closure) {}
   void do_klass(Klass* k) {
@@ -5378,17 +5357,17 @@
   // ---------- remaining roots --------------
   _timer.reset();
   _timer.start();
-  gch->gen_process_strong_roots(_collector->_cmsGen->level(),
-                                false,     // yg was scanned above
-                                false,     // this is parallel code
-                                false,     // not scavenging
-                                SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
-                                &par_mrias_cl,
-                                true,   // walk all of code cache if (so & SO_CodeCache)
-                                NULL,
-                                NULL);     // The dirty klasses will be handled below
+  gch->gen_process_roots(_collector->_cmsGen->level(),
+                         false,     // yg was scanned above
+                         false,     // this is parallel code
+                         SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
+                         _collector->should_unload_classes(),
+                         &par_mrias_cl,
+                         NULL,
+                         NULL);     // The dirty klasses will be handled below
+
   assert(_collector->should_unload_classes()
-         || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_CodeCache),
+         || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_AllCodeCache),
          "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
   _timer.stop();
   if (PrintCMSStatistics != 0) {
@@ -5441,7 +5420,7 @@
   // We might have added oops to ClassLoaderData::_handles during the
   // concurrent marking phase. These oops point to newly allocated objects
   // that are guaranteed to be kept alive. Either by the direct allocation
-  // code, or when the young collector processes the strong roots. Hence,
+  // code, or when the young collector processes the roots. Hence,
   // we don't have to revisit the _handles block during the remark phase.
 
   // ---------- rescan dirty cards ------------
@@ -5863,7 +5842,7 @@
     cms_space,
     n_workers, workers, task_queues());
 
-  // Set up for parallel process_strong_roots work.
+  // Set up for parallel process_roots work.
   gch->set_par_threads(n_workers);
   // We won't be iterating over the cards in the card table updating
   // the younger_gen cards, so we shouldn't call the following else
@@ -5872,7 +5851,7 @@
   // gch->rem_set()->prepare_for_younger_refs_iterate(true); // parallel
 
   // The young gen rescan work will not be done as part of
-  // process_strong_roots (which currently doesn't knw how to
+  // process_roots (which currently doesn't know how to
   // parallelize such a scan), but rather will be broken up into
   // a set of parallel tasks (via the sampling that the [abortable]
   // preclean phase did of EdenSpace, plus the [two] tasks of
@@ -5969,18 +5948,18 @@
 
     gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
     GenCollectedHeap::StrongRootsScope srs(gch);
-    gch->gen_process_strong_roots(_cmsGen->level(),
-                                  true,  // younger gens as roots
-                                  false, // use the local StrongRootsScope
-                                  false, // not scavenging
-                                  SharedHeap::ScanningOption(roots_scanning_options()),
-                                  &mrias_cl,
-                                  true,   // walk code active on stacks
-                                  NULL,
-                                  NULL);  // The dirty klasses will be handled below
+
+    gch->gen_process_roots(_cmsGen->level(),
+                           true,  // younger gens as roots
+                           false, // use the local StrongRootsScope
+                           SharedHeap::ScanningOption(roots_scanning_options()),
+                           should_unload_classes(),
+                           &mrias_cl,
+                           NULL,
+                           NULL); // The dirty klasses will be handled below
 
     assert(should_unload_classes()
-           || (roots_scanning_options() & SharedHeap::SO_CodeCache),
+           || (roots_scanning_options() & SharedHeap::SO_AllCodeCache),
            "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
   }
 
@@ -6017,7 +5996,7 @@
   // We might have added oops to ClassLoaderData::_handles during the
   // concurrent marking phase. These oops point to newly allocated objects
   // that are guaranteed to be kept alive. Either by the direct allocation
-  // code, or when the young collector processes the strong roots. Hence,
+  // code, or when the young collector processes the roots. Hence,
   // we don't have to revisit the _handles block during the remark phase.
 
   verify_work_stacks_empty();
@@ -6072,6 +6051,8 @@
 };
 
 void CMSRefProcTaskProxy::work(uint worker_id) {
+  ResourceMark rm;
+  HandleMark hm;
   assert(_collector->_span.equals(_span), "Inconsistency in _span");
   CMSParKeepAliveClosure par_keep_alive(_collector, _span,
                                         _mark_bit_map,
@@ -6267,15 +6248,14 @@
       // Clean up unreferenced symbols in symbol table.
       SymbolTable::unlink();
     }
-  }
-
-  // CMS doesn't use the StringTable as hard roots when class unloading is turned off.
-  // Need to check if we really scanned the StringTable.
-  if ((roots_scanning_options() & SharedHeap::SO_Strings) == 0) {
-    GCTraceTime t("scrub string table", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
-    // Delete entries for dead interned strings.
-    StringTable::unlink(&_is_alive_closure);
-  }
+
+    {
+      GCTraceTime t("scrub string table", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
+      // Delete entries for dead interned strings.
+      StringTable::unlink(&_is_alive_closure);
+    }
+  }
+
 
   // Restore any preserved marks as a result of mark stack or
   // work queue overflow
@@ -7744,7 +7724,7 @@
   CMSCollector* collector, MemRegion span,
   CMSBitMap* verification_bm, CMSBitMap* cms_bm,
   CMSMarkStack*  mark_stack):
-  CMSOopClosure(collector->ref_processor()),
+  MetadataAwareOopClosure(collector->ref_processor()),
   _collector(collector),
   _span(span),
   _verification_bm(verification_bm),
@@ -7797,7 +7777,7 @@
                      MemRegion span,
                      CMSBitMap* bitMap, CMSMarkStack*  markStack,
                      HeapWord* finger, MarkFromRootsClosure* parent) :
-  CMSOopClosure(collector->ref_processor()),
+  MetadataAwareOopClosure(collector->ref_processor()),
   _collector(collector),
   _span(span),
   _bitMap(bitMap),
@@ -7814,7 +7794,7 @@
                      HeapWord* finger,
                      HeapWord** global_finger_addr,
                      Par_MarkFromRootsClosure* parent) :
-  CMSOopClosure(collector->ref_processor()),
+  MetadataAwareOopClosure(collector->ref_processor()),
   _collector(collector),
   _whole_span(collector->_span),
   _span(span),
@@ -7863,11 +7843,6 @@
   _overflow_stack->expand(); // expand the stack if possible
 }
 
-void CMKlassClosure::do_klass(Klass* k) {
-  assert(_oop_closure != NULL, "Not initialized?");
-  k->oops_do(_oop_closure);
-}
-
 void PushOrMarkClosure::do_oop(oop obj) {
   // Ignore mark word because we are running concurrent with mutators.
   assert(obj->is_oop_or_null(true), "expected an oop or NULL");
@@ -7965,7 +7940,7 @@
                                        CMSBitMap* mod_union_table,
                                        CMSMarkStack*  mark_stack,
                                        bool           concurrent_precleaning):
-  CMSOopClosure(rp),
+  MetadataAwareOopClosure(rp),
   _collector(collector),
   _span(span),
   _bit_map(bit_map),
@@ -8038,7 +8013,7 @@
                                                ReferenceProcessor* rp,
                                                CMSBitMap* bit_map,
                                                OopTaskQueue* work_queue):
-  CMSOopClosure(rp),
+  MetadataAwareOopClosure(rp),
   _collector(collector),
   _span(span),
   _bit_map(bit_map),
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -32,6 +32,7 @@
 #include "gc_implementation/shared/generationCounters.hpp"
 #include "memory/freeBlockDictionary.hpp"
 #include "memory/generation.hpp"
+#include "memory/iterator.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/virtualspace.hpp"
 #include "services/memoryService.hpp"
@@ -1285,7 +1286,6 @@
   void save_sweep_limit();
 
   // More iteration support
-  virtual void oop_iterate(MemRegion mr, ExtendedOopClosure* cl);
   virtual void oop_iterate(ExtendedOopClosure* cl);
   virtual void safe_object_iterate(ObjectClosure* cl);
   virtual void object_iterate(ObjectClosure* cl);
@@ -1383,13 +1383,6 @@
 // Closures of various sorts used by CMS to accomplish its work
 //
 
-// This closure is used to check that a certain set of oops is empty.
-class FalseClosure: public OopClosure {
- public:
-  void do_oop(oop* p)       { guarantee(false, "Should be an empty set"); }
-  void do_oop(narrowOop* p) { guarantee(false, "Should be an empty set"); }
-};
-
 // This closure is used to do concurrent marking from the roots
 // following the first checkpoint.
 class MarkFromRootsClosure: public BitMapClosure {
@@ -1454,7 +1447,7 @@
 
 // The following closures are used to do certain kinds of verification of
 // CMS marking.
-class PushAndMarkVerifyClosure: public CMSOopClosure {
+class PushAndMarkVerifyClosure: public MetadataAwareOopClosure {
   CMSCollector*    _collector;
   MemRegion        _span;
   CMSBitMap*       _verification_bm;
@@ -1507,6 +1500,19 @@
   }
 };
 
+// A version of ObjectClosure with "memory" (see _previous_address below)
+class UpwardsObjectClosure: public BoolObjectClosure {
+  HeapWord* _previous_address;
+ public:
+  UpwardsObjectClosure() : _previous_address(NULL) { }
+  void set_previous(HeapWord* addr) { _previous_address = addr; }
+  HeapWord* previous()              { return _previous_address; }
+  // A return value of "true" can be used by the caller to decide
+  // if this object's end should *NOT* be recorded in
+  // _previous_address above.
+  virtual bool do_object_bm(oop obj, MemRegion mr) = 0;
+};
+
 // This closure is used during the second checkpointing phase
 // to rescan the marked objects on the dirty cards in the mod
 // union table and the card table proper. It's invoked via
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/bufferingOopClosure.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -0,0 +1,271 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/g1/bufferingOopClosure.hpp"
+#include "memory/iterator.hpp"
+#include "utilities/debug.hpp"
+
+/////////////// Unit tests ///////////////
+
+#ifndef PRODUCT
+
+class TestBufferingOopClosure {
+
+  // Helper class to fake a set of oop*s and narrowOop*s.
+  class FakeRoots {
+   public:
+    // Used for sanity checking of the values passed to the do_oops functions in the test.
+    static const uintptr_t NarrowOopMarker = uintptr_t(1) << (BitsPerWord -1);
+
+    int    _num_narrow;
+    int    _num_full;
+    void** _narrow;
+    void** _full;
+
+    FakeRoots(int num_narrow, int num_full) :
+        _num_narrow(num_narrow),
+        _num_full(num_full),
+        _narrow((void**)::malloc(sizeof(void*) * num_narrow)),
+        _full((void**)::malloc(sizeof(void*) * num_full)) {
+
+      for (int i = 0; i < num_narrow; i++) {
+        _narrow[i] = (void*)(NarrowOopMarker + (uintptr_t)i);
+      }
+      for (int i = 0; i < num_full; i++) {
+        _full[i] = (void*)(uintptr_t)i;
+      }
+    }
+
+    ~FakeRoots() {
+      ::free(_narrow);
+      ::free(_full);
+    }
+
+    void oops_do_narrow_then_full(OopClosure* cl) {
+      for (int i = 0; i < _num_narrow; i++) {
+        cl->do_oop((narrowOop*)_narrow[i]);
+      }
+      for (int i = 0; i < _num_full; i++) {
+        cl->do_oop((oop*)_full[i]);
+      }
+    }
+
+    void oops_do_full_then_narrow(OopClosure* cl) {
+      for (int i = 0; i < _num_full; i++) {
+        cl->do_oop((oop*)_full[i]);
+      }
+      for (int i = 0; i < _num_narrow; i++) {
+        cl->do_oop((narrowOop*)_narrow[i]);
+      }
+    }
+
+    void oops_do_mixed(OopClosure* cl) {
+      int i;
+      for (i = 0; i < _num_full && i < _num_narrow; i++) {
+        cl->do_oop((oop*)_full[i]);
+        cl->do_oop((narrowOop*)_narrow[i]);
+      }
+      for (int j = i; j < _num_full; j++) {
+        cl->do_oop((oop*)_full[i]);
+      }
+      for (int j = i; j < _num_narrow; j++) {
+        cl->do_oop((narrowOop*)_narrow[i]);
+      }
+    }
+
+    static const int MaxOrder = 2;
+
+    void oops_do(OopClosure* cl, int do_oop_order) {
+      switch(do_oop_order) {
+        case 0:
+          oops_do_narrow_then_full(cl);
+          break;
+        case 1:
+          oops_do_full_then_narrow(cl);
+          break;
+        case 2:
+          oops_do_mixed(cl);
+          break;
+        default:
+          oops_do_narrow_then_full(cl);
+          break;
+      }
+    }
+  };
+
+  class CountOopClosure : public OopClosure {
+    int _narrow_oop_count;
+    int _full_oop_count;
+   public:
+    CountOopClosure() : _narrow_oop_count(0), _full_oop_count(0) {}
+    void do_oop(narrowOop* p) {
+      assert((uintptr_t(p) & FakeRoots::NarrowOopMarker) != 0,
+          "The narrowOop was unexpectedly not marked with the NarrowOopMarker");
+      _narrow_oop_count++;
+    }
+
+    void do_oop(oop* p){
+      assert((uintptr_t(p) & FakeRoots::NarrowOopMarker) == 0,
+          "The oop was unexpectedly marked with the NarrowOopMarker");
+      _full_oop_count++;
+    }
+
+    int narrow_oop_count() { return _narrow_oop_count; }
+    int full_oop_count()   { return _full_oop_count; }
+    int all_oop_count()    { return _narrow_oop_count + _full_oop_count; }
+  };
+
+  class DoNothingOopClosure : public OopClosure {
+   public:
+    void do_oop(narrowOop* p) {}
+    void do_oop(oop* p)       {}
+  };
+
+  static void testCount(int num_narrow, int num_full, int do_oop_order) {
+    FakeRoots fr(num_narrow, num_full);
+
+    CountOopClosure coc;
+    BufferingOopClosure boc(&coc);
+
+    fr.oops_do(&boc, do_oop_order);
+
+    boc.done();
+
+    #define assert_testCount(got, expected)                                     \
+       assert((got) == (expected),                                              \
+           err_msg("Expected: %d, got: %d, when running testCount(%d, %d, %d)", \
+               (got), (expected), num_narrow, num_full, do_oop_order))
+
+    assert_testCount(num_narrow, coc.narrow_oop_count());
+    assert_testCount(num_full, coc.full_oop_count());
+    assert_testCount(num_narrow + num_full, coc.all_oop_count());
+  }
+
+  static void testCount() {
+    int buffer_length = BufferingOopClosure::BufferLength;
+
+    for (int order = 0; order < FakeRoots::MaxOrder; order++) {
+      testCount(0,                 0,                 order);
+      testCount(10,                0,                 order);
+      testCount(0,                 10,                order);
+      testCount(10,                10,                order);
+      testCount(buffer_length,     10,                order);
+      testCount(10,                buffer_length,     order);
+      testCount(buffer_length,     buffer_length,     order);
+      testCount(buffer_length + 1, 10,                order);
+      testCount(10,                buffer_length + 1, order);
+      testCount(buffer_length + 1, buffer_length,     order);
+      testCount(buffer_length,     buffer_length + 1, order);
+      testCount(buffer_length + 1, buffer_length + 1, order);
+    }
+  }
+
+  static void testIsBufferEmptyOrFull(int num_narrow, int num_full, bool expect_empty, bool expect_full) {
+    FakeRoots fr(num_narrow, num_full);
+
+    DoNothingOopClosure cl;
+    BufferingOopClosure boc(&cl);
+
+    fr.oops_do(&boc, 0);
+
+    #define assert_testIsBufferEmptyOrFull(got, expected)                             \
+        assert((got) == (expected),                                                   \
+            err_msg("Expected: %d, got: %d. testIsBufferEmptyOrFull(%d, %d, %s, %s)", \
+                (got), (expected), num_narrow, num_full,                              \
+                BOOL_TO_STR(expect_empty), BOOL_TO_STR(expect_full)))
+
+    assert_testIsBufferEmptyOrFull(expect_empty, boc.is_buffer_empty());
+    assert_testIsBufferEmptyOrFull(expect_full, boc.is_buffer_full());
+  }
+
+  static void testIsBufferEmptyOrFull() {
+    int bl = BufferingOopClosure::BufferLength;
+
+    testIsBufferEmptyOrFull(0,       0, true,  false);
+    testIsBufferEmptyOrFull(1,       0, false, false);
+    testIsBufferEmptyOrFull(0,       1, false, false);
+    testIsBufferEmptyOrFull(1,       1, false, false);
+    testIsBufferEmptyOrFull(10,      0, false, false);
+    testIsBufferEmptyOrFull(0,      10, false, false);
+    testIsBufferEmptyOrFull(10,     10, false, false);
+    testIsBufferEmptyOrFull(0,      bl, false, true);
+    testIsBufferEmptyOrFull(bl,      0, false, true);
+    testIsBufferEmptyOrFull(bl/2, bl/2, false, true);
+    testIsBufferEmptyOrFull(bl-1,    1, false, true);
+    testIsBufferEmptyOrFull(1,    bl-1, false, true);
+    // Processed
+    testIsBufferEmptyOrFull(bl+1,    0, false, false);
+    testIsBufferEmptyOrFull(bl*2,    0, false, true);
+  }
+
+  static void testEmptyAfterDone(int num_narrow, int num_full) {
+    FakeRoots fr(num_narrow, num_full);
+
+    DoNothingOopClosure cl;
+    BufferingOopClosure boc(&cl);
+
+    fr.oops_do(&boc, 0);
+
+    // Make sure all get processed.
+    boc.done();
+
+    assert(boc.is_buffer_empty(),
+        err_msg("Should be empty after call to done(). testEmptyAfterDone(%d, %d)",
+            num_narrow, num_full));
+  }
+
+  static void testEmptyAfterDone() {
+    int bl = BufferingOopClosure::BufferLength;
+
+    testEmptyAfterDone(0,       0);
+    testEmptyAfterDone(1,       0);
+    testEmptyAfterDone(0,       1);
+    testEmptyAfterDone(1,       1);
+    testEmptyAfterDone(10,      0);
+    testEmptyAfterDone(0,      10);
+    testEmptyAfterDone(10,     10);
+    testEmptyAfterDone(0,      bl);
+    testEmptyAfterDone(bl,      0);
+    testEmptyAfterDone(bl/2, bl/2);
+    testEmptyAfterDone(bl-1,    1);
+    testEmptyAfterDone(1,    bl-1);
+    // Processed
+    testEmptyAfterDone(bl+1,    0);
+    testEmptyAfterDone(bl*2,    0);
+  }
+
+  public:
+  static void test() {
+    testCount();
+    testIsBufferEmptyOrFull();
+    testEmptyAfterDone();
+  }
+};
+
+void TestBufferingOopClosure_test() {
+  TestBufferingOopClosure::test();
+}
+
+#endif
--- a/src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,10 +25,10 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_BUFFERINGOOPCLOSURE_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_BUFFERINGOOPCLOSURE_HPP
 
-#include "memory/genOopClosures.hpp"
-#include "memory/generation.hpp"
+#include "memory/iterator.hpp"
+#include "oops/oopsHierarchy.hpp"
 #include "runtime/os.hpp"
-#include "utilities/taskqueue.hpp"
+#include "utilities/debug.hpp"
 
 // A BufferingOops closure tries to separate out the cost of finding roots
 // from the cost of applying closures to them.  It maintains an array of
@@ -41,60 +41,103 @@
 // The caller must be sure to call "done" to process any unprocessed
 // buffered entriess.
 
-class Generation;
-class HeapRegion;
-
 class BufferingOopClosure: public OopClosure {
+  friend class TestBufferingOopClosure;
 protected:
-  enum PrivateConstants {
-    BufferLength = 1024
-  };
+  static const size_t BufferLength = 1024;
 
-  StarTask  _buffer[BufferLength];
-  StarTask* _buffer_top;
-  StarTask* _buffer_curr;
+  // We need to know if the buffered addresses contain oops or narrowOops.
+  // We can't tag the addresses the way StarTask does, because we need to
+  // be able to handle unaligned addresses coming from oops embedded in code.
+  //
+  // The addresses for the full-sized oops are filled in from the bottom,
+  // while the addresses for the narrowOops are filled in from the top.
+  OopOrNarrowOopStar  _buffer[BufferLength];
+  OopOrNarrowOopStar* _oop_top;
+  OopOrNarrowOopStar* _narrowOop_bottom;
 
   OopClosure* _oc;
   double      _closure_app_seconds;
 
-  void process_buffer () {
+
+  bool is_buffer_empty() {
+    return _oop_top == _buffer && _narrowOop_bottom == (_buffer + BufferLength - 1);
+  }
+
+  bool is_buffer_full() {
+    return _narrowOop_bottom < _oop_top;
+  }
+
+  // Process addresses containing full-sized oops.
+  void process_oops() {
+    for (OopOrNarrowOopStar* curr = _buffer; curr < _oop_top; ++curr) {
+      _oc->do_oop((oop*)(*curr));
+    }
+    _oop_top = _buffer;
+  }
+
+  // Process addresses containing narrow oops.
+  void process_narrowOops() {
+    for (OopOrNarrowOopStar* curr = _buffer + BufferLength - 1; curr > _narrowOop_bottom; --curr) {
+      _oc->do_oop((narrowOop*)(*curr));
+    }
+    _narrowOop_bottom = _buffer + BufferLength - 1;
+  }
+
+  // Apply the closure to all oops and clear the buffer.
+  // Accumulate the time it took.
+  void process_buffer() {
     double start = os::elapsedTime();
-    for (StarTask* curr = _buffer; curr < _buffer_curr; ++curr) {
-      if (curr->is_narrow()) {
-        assert(UseCompressedOops, "Error");
-        _oc->do_oop((narrowOop*)(*curr));
-      } else {
-        _oc->do_oop((oop*)(*curr));
-      }
-    }
-    _buffer_curr = _buffer;
+
+    process_oops();
+    process_narrowOops();
+
     _closure_app_seconds += (os::elapsedTime() - start);
   }
 
-  template <class T> inline void do_oop_work(T* p) {
-    if (_buffer_curr == _buffer_top) {
+  void process_buffer_if_full() {
+    if (is_buffer_full()) {
       process_buffer();
     }
-    StarTask new_ref(p);
-    *_buffer_curr = new_ref;
-    ++_buffer_curr;
+  }
+
+  void add_narrowOop(narrowOop* p) {
+    assert(!is_buffer_full(), "Buffer should not be full");
+    *_narrowOop_bottom = (OopOrNarrowOopStar)p;
+    _narrowOop_bottom--;
+  }
+
+  void add_oop(oop* p) {
+    assert(!is_buffer_full(), "Buffer should not be full");
+    *_oop_top = (OopOrNarrowOopStar)p;
+    _oop_top++;
   }
 
 public:
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(oop* p)       { do_oop_work(p); }
+  virtual void do_oop(narrowOop* p) {
+    process_buffer_if_full();
+    add_narrowOop(p);
+  }
 
-  void done () {
-    if (_buffer_curr > _buffer) {
+  virtual void do_oop(oop* p)       {
+    process_buffer_if_full();
+    add_oop(p);
+  }
+
+  void done() {
+    if (!is_buffer_empty()) {
       process_buffer();
     }
   }
-  double closure_app_seconds () {
+
+  double closure_app_seconds() {
     return _closure_app_seconds;
   }
-  BufferingOopClosure (OopClosure *oc) :
+
+  BufferingOopClosure(OopClosure *oc) :
     _oc(oc),
-    _buffer_curr(_buffer), _buffer_top(_buffer + BufferLength),
+    _oop_top(_buffer),
+    _narrowOop_bottom(_buffer + BufferLength - 1),
     _closure_app_seconds(0.0) { }
 };
 
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "classfile/symbolTable.hpp"
+#include "code/codeCache.hpp"
 #include "gc_implementation/g1/concurrentMark.inline.hpp"
 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
@@ -39,6 +40,7 @@
 #include "gc_implementation/shared/gcTimer.hpp"
 #include "gc_implementation/shared/gcTrace.hpp"
 #include "gc_implementation/shared/gcTraceTime.hpp"
+#include "memory/allocation.hpp"
 #include "memory/genOopClosures.inline.hpp"
 #include "memory/referencePolicy.hpp"
 #include "memory/resourceArea.hpp"
@@ -57,8 +59,8 @@
   _bmWordSize = 0;
 }
 
-HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
-                                               HeapWord* limit) const {
+HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
+                                               const HeapWord* limit) const {
   // First we must round addr *up* to a possible object boundary.
   addr = (HeapWord*)align_size_up((intptr_t)addr,
                                   HeapWordSize << _shifter);
@@ -75,8 +77,8 @@
   return nextAddr;
 }
 
-HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
-                                                 HeapWord* limit) const {
+HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
+                                                 const HeapWord* limit) const {
   size_t addrOffset = heapWordToOffset(addr);
   if (limit == NULL) {
     limit = _bmStartWord + _bmWordSize;
@@ -888,6 +890,10 @@
   guarantee(!g1h->mark_in_progress(), "invariant");
 }
 
+bool ConcurrentMark::nextMarkBitmapIsClear() {
+  return _nextMarkBitMap->getNextMarkedWordAddress(_heap_start, _heap_end) == _heap_end;
+}
+
 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 public:
   bool doHeapRegion(HeapRegion* r) {
@@ -1222,6 +1228,9 @@
 };
 
 void ConcurrentMark::scanRootRegions() {
+  // Start of concurrent marking.
+  ClassLoaderDataGraph::clear_claimed_marks();
+
   // scan_in_progress() will have been set to true only if there was
   // at least one root region to scan. So, if it's false, we
   // should not attempt to do any further work.
@@ -1270,7 +1279,7 @@
   CMConcurrentMarkingTask markingTask(this, cmThread());
   if (use_parallel_marking_threads()) {
     _parallel_workers->set_active_workers((int)active_workers);
-    // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
+    // Don't set _n_par_threads because it affects MT in process_roots()
     // and the decisions on that MT processing is made elsewhere.
     assert(_parallel_workers->active_workers() > 0, "Should have been set");
     _parallel_workers->run_task(&markingTask);
@@ -1301,6 +1310,7 @@
     Universe::verify(VerifyOption_G1UsePrevMarking,
                      " VerifyDuringGC:(before)");
   }
+  g1h->check_bitmaps("Remark Start");
 
   G1CollectorPolicy* g1p = g1h->g1_policy();
   g1p->record_concurrent_mark_remark_start();
@@ -1349,6 +1359,7 @@
       Universe::verify(VerifyOption_G1UseNextMarking,
                        " VerifyDuringGC:(after)");
     }
+    g1h->check_bitmaps("Remark End");
     assert(!restart_for_overflow(), "sanity");
     // Completely reset the marking state since marking completed
     set_non_marking_state();
@@ -1998,6 +2009,7 @@
     Universe::verify(VerifyOption_G1UsePrevMarking,
                      " VerifyDuringGC:(before)");
   }
+  g1h->check_bitmaps("Cleanup Start");
 
   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
   g1p->record_concurrent_mark_cleanup_start();
@@ -2035,8 +2047,8 @@
     // that calculated by walking the marking bitmap.
 
     // Bitmaps to hold expected values
-    BitMap expected_region_bm(_region_bm.size(), false);
-    BitMap expected_card_bm(_card_bm.size(), false);
+    BitMap expected_region_bm(_region_bm.size(), true);
+    BitMap expected_card_bm(_card_bm.size(), true);
 
     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
                                                  &_region_bm,
@@ -2138,22 +2150,30 @@
   // Update the soft reference policy with the new heap occupancy.
   Universe::update_heap_info_at_gc();
 
-  // We need to make this be a "collection" so any collection pause that
-  // races with it goes around and waits for completeCleanup to finish.
-  g1h->increment_total_collections();
-
-  // We reclaimed old regions so we should calculate the sizes to make
-  // sure we update the old gen/space data.
-  g1h->g1mm()->update_sizes();
-
   if (VerifyDuringGC) {
     HandleMark hm;  // handle scope
     Universe::heap()->prepare_for_verify();
     Universe::verify(VerifyOption_G1UsePrevMarking,
                      " VerifyDuringGC:(after)");
   }
+  g1h->check_bitmaps("Cleanup End");
 
   g1h->verify_region_sets_optional();
+
+  // We need to make this be a "collection" so any collection pause that
+  // races with it goes around and waits for completeCleanup to finish.
+  g1h->increment_total_collections();
+
+  // Clean out dead classes and update Metaspace sizes.
+  if (ClassUnloadingWithConcurrentMark) {
+    ClassLoaderDataGraph::purge();
+  }
+  MetaspaceGC::compute_new_size();
+
+  // We reclaimed old regions so we should calculate the sizes to make
+  // sure we update the old gen/space data.
+  g1h->g1mm()->update_sizes();
+
   g1h->trace_heap_after_concurrent_cycle();
 }
 
@@ -2383,6 +2403,8 @@
   }
 
   virtual void work(uint worker_id) {
+    ResourceMark rm;
+    HandleMark hm;
     CMTask* task = _cm->task(worker_id);
     G1CMIsAliveClosure g1_is_alive(_g1h);
     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
@@ -2440,6 +2462,26 @@
   _g1h->set_par_threads(0);
 }
 
+void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
+  G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
+}
+
+// Helper class to get rid of some boilerplate code.
+class G1RemarkGCTraceTime : public GCTraceTime {
+  static bool doit_and_prepend(bool doit) {
+    if (doit) {
+      gclog_or_tty->put(' ');
+    }
+    return doit;
+  }
+
+ public:
+  G1RemarkGCTraceTime(const char* title, bool doit)
+    : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
+        G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
+  }
+};
+
 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
   if (has_overflown()) {
     // Skip processing the discovered references if we have
@@ -2552,9 +2594,31 @@
     return;
   }
 
-  g1h->unlink_string_and_symbol_table(&g1_is_alive,
-                                      /* process_strings */ false, // currently strings are always roots
-                                      /* process_symbols */ true);
+  assert(_markStack.isEmpty(), "Marking should have completed");
+
+  // Unload Klasses, String, Symbols, Code Cache, etc.
+  {
+    G1RemarkGCTraceTime trace("Unloading", G1Log::finer());
+
+    if (ClassUnloadingWithConcurrentMark) {
+      bool purged_classes;
+
+      {
+        G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest());
+        purged_classes = SystemDictionary::do_unloading(&g1_is_alive);
+      }
+
+      {
+        G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest());
+        weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
+      }
+    }
+
+    if (G1StringDedup::is_enabled()) {
+      G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest());
+      G1StringDedup::unlink(&g1_is_alive);
+    }
+  }
 }
 
 void ConcurrentMark::swapMarkBitMaps() {
@@ -2563,6 +2627,57 @@
   _nextMarkBitMap  = (CMBitMap*)  temp;
 }
 
+class CMObjectClosure;
+
+// Closure for iterating over objects, currently only used for
+// processing SATB buffers.
+class CMObjectClosure : public ObjectClosure {
+private:
+  CMTask* _task;
+
+public:
+  void do_object(oop obj) {
+    _task->deal_with_reference(obj);
+  }
+
+  CMObjectClosure(CMTask* task) : _task(task) { }
+};
+
+class G1RemarkThreadsClosure : public ThreadClosure {
+  CMObjectClosure _cm_obj;
+  G1CMOopClosure _cm_cl;
+  MarkingCodeBlobClosure _code_cl;
+  int _thread_parity;
+  bool _is_par;
+
+ public:
+  G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) :
+    _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
+    _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {}
+
+  void do_thread(Thread* thread) {
+    if (thread->is_Java_thread()) {
+      if (thread->claim_oops_do(_is_par, _thread_parity)) {
+        JavaThread* jt = (JavaThread*)thread;
+
+        // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
+        // however the liveness of oops reachable from nmethods have very complex lifecycles:
+        // * Alive if on the stack of an executing method
+        // * Weakly reachable otherwise
+        // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
+        // live by the SATB invariant but other oops recorded in nmethods may behave differently.
+        jt->nmethods_do(&_code_cl);
+
+        jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj);
+      }
+    } else if (thread->is_VM_thread()) {
+      if (thread->claim_oops_do(_is_par, _thread_parity)) {
+        JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj);
+      }
+    }
+  }
+};
+
 class CMRemarkTask: public AbstractGangTask {
 private:
   ConcurrentMark* _cm;
@@ -2574,6 +2689,14 @@
     if (worker_id < _cm->active_tasks()) {
       CMTask* task = _cm->task(worker_id);
       task->record_start_time();
+      {
+        ResourceMark rm;
+        HandleMark hm;
+
+        G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial);
+        Threads::threads_do(&threads_f);
+      }
+
       do {
         task->do_marking_step(1000000000.0 /* something very large */,
                               true         /* do_termination       */,
@@ -2596,6 +2719,8 @@
   HandleMark   hm;
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 
+  G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer());
+
   g1h->ensure_parsability(false);
 
   if (G1CollectedHeap::use_parallel_gc_threads()) {
@@ -3243,8 +3368,14 @@
 
 // abandon current marking iteration due to a Full GC
 void ConcurrentMark::abort() {
-  // Clear all marks to force marking thread to do nothing
+  // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
+  // concurrent bitmap clearing.
   _nextMarkBitMap->clearAll();
+
+  // Note we cannot clear the previous marking bitmap here
+  // since VerifyDuringGC verifies the objects marked during
+  // a full GC against the previous bitmap.
+
   // Clear the liveness counting data
   clear_all_count_data();
   // Empty mark stack
@@ -3421,20 +3552,6 @@
   }
 };
 
-// Closure for iterating over objects, currently only used for
-// processing SATB buffers.
-class CMObjectClosure : public ObjectClosure {
-private:
-  CMTask* _task;
-
-public:
-  void do_object(oop obj) {
-    _task->deal_with_reference(obj);
-  }
-
-  CMObjectClosure(CMTask* task) : _task(task) { }
-};
-
 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
                                ConcurrentMark* cm,
                                CMTask* task)
@@ -3900,15 +4017,6 @@
     }
   }
 
-  if (!concurrent() && !has_aborted()) {
-    // We should only do this during remark.
-    if (G1CollectedHeap::use_parallel_gc_threads()) {
-      satb_mq_set.par_iterate_closure_all_threads(_worker_id);
-    } else {
-      satb_mq_set.iterate_closure_all_threads();
-    }
-  }
-
   _draining_satb_buffers = false;
 
   assert(has_aborted() ||
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -25,6 +25,7 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP
 
+#include "classfile/javaClasses.hpp"
 #include "gc_implementation/g1/heapRegionSet.hpp"
 #include "gc_implementation/shared/gcId.hpp"
 #include "utilities/taskqueue.hpp"
@@ -86,19 +87,19 @@
   // Return the address corresponding to the next marked bit at or after
   // "addr", and before "limit", if "limit" is non-NULL.  If there is no
   // such bit, returns "limit" if that is non-NULL, or else "endWord()".
-  HeapWord* getNextMarkedWordAddress(HeapWord* addr,
-                                     HeapWord* limit = NULL) const;
+  HeapWord* getNextMarkedWordAddress(const HeapWord* addr,
+                                     const HeapWord* limit = NULL) const;
   // Return the address corresponding to the next unmarked bit at or after
   // "addr", and before "limit", if "limit" is non-NULL.  If there is no
   // such bit, returns "limit" if that is non-NULL, or else "endWord()".
-  HeapWord* getNextUnmarkedWordAddress(HeapWord* addr,
-                                       HeapWord* limit = NULL) const;
+  HeapWord* getNextUnmarkedWordAddress(const HeapWord* addr,
+                                       const HeapWord* limit = NULL) const;
 
   // conversion utilities
   HeapWord* offsetToHeapWord(size_t offset) const {
     return _bmStartWord + (offset << _shifter);
   }
-  size_t heapWordToOffset(HeapWord* addr) const {
+  size_t heapWordToOffset(const HeapWord* addr) const {
     return pointer_delta(addr, _bmStartWord) >> _shifter;
   }
   int heapWordDiffToOffsetDiff(size_t diff) const;
@@ -476,6 +477,7 @@
   ForceOverflowSettings _force_overflow_conc;
   ForceOverflowSettings _force_overflow_stw;
 
+  void weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes);
   void weakRefsWork(bool clear_all_soft_refs);
 
   void swapMarkBitMaps();
@@ -734,6 +736,9 @@
   // Clear the next marking bitmap (will be called concurrently).
   void clearNextBitmap();
 
+  // Return whether the next mark bitmap has no marks set.
+  bool nextMarkBitmapIsClear();
+
   // These two do the work that needs to be done before and after the
   // initial root checkpoint. Since this checkpoint can be done at two
   // different points (i.e. an explicit pause or piggy-backed on a
--- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -275,9 +275,13 @@
 
       // We now want to allow clearing of the marking bitmap to be
       // suspended by a collection pause.
-      {
+      // We may have aborted just before the remark. Do not bother clearing the
+      // bitmap then, as it has been done during mark abort.
+      if (!cm()->has_aborted()) {
         SuspendibleThreadSetJoiner sts;
         _cm->clearNextBitmap();
+      } else {
+        assert(!G1VerifyBitmaps || _cm->nextMarkBitmapIsClear(), "Next mark bitmap must be clear");
       }
     }
 
--- a/src/share/vm/gc_implementation/g1/g1AllocRegion.inline.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1AllocRegion.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1ALLOCREGION_INLINE_HPP
 
 #include "gc_implementation/g1/g1AllocRegion.hpp"
+#include "gc_implementation/g1/heapRegion.inline.hpp"
 
 inline HeapWord* G1AllocRegion::allocate(HeapRegion* alloc_region,
                                          size_t word_size,
--- a/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
 #include "memory/space.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/java.hpp"
@@ -98,6 +99,20 @@
   return (delta & right_n_bits(LogN_words)) == (size_t)NoBits;
 }
 
+void G1BlockOffsetSharedArray::set_offset_array(HeapWord* left, HeapWord* right, u_char offset) {
+  check_index(index_for(right - 1), "right address out of range");
+  assert(left  < right, "Heap addresses out of order");
+  size_t num_cards = pointer_delta(right, left) >> LogN_words;
+  if (UseMemSetInBOT) {
+    memset(&_offset_array[index_for(left)], offset, num_cards);
+  } else {
+    size_t i = index_for(left);
+    const size_t end = i + num_cards;
+    for (; i < end; i++) {
+      _offset_array[i] = offset;
+    }
+  }
+}
 
 //////////////////////////////////////////////////////////////////////
 // G1BlockOffsetArray
@@ -107,7 +122,7 @@
                                        MemRegion mr, bool init_to_zero) :
   G1BlockOffsetTable(mr.start(), mr.end()),
   _unallocated_block(_bottom),
-  _array(array), _csp(NULL),
+  _array(array), _gsp(NULL),
   _init_to_zero(init_to_zero) {
   assert(_bottom <= _end, "arguments out of order");
   if (!_init_to_zero) {
@@ -117,9 +132,8 @@
   }
 }
 
-void G1BlockOffsetArray::set_space(Space* sp) {
-  _sp = sp;
-  _csp = sp->toContiguousSpace();
+void G1BlockOffsetArray::set_space(G1OffsetTableContigSpace* sp) {
+  _gsp = sp;
 }
 
 // The arguments follow the normal convention of denoting
@@ -378,7 +392,7 @@
   }
   // Otherwise, find the block start using the table.
   HeapWord* q = block_at_or_preceding(addr, false, 0);
-  HeapWord* n = q + _sp->block_size(q);
+  HeapWord* n = q + block_size(q);
   return forward_to_block_containing_addr_const(q, n, addr);
 }
 
@@ -406,31 +420,17 @@
          err_msg("next_boundary is beyond the end of the covered region "
                  " next_boundary " PTR_FORMAT " _array->_end " PTR_FORMAT,
                  next_boundary, _array->_end));
-  if (csp() != NULL) {
-    if (addr >= csp()->top()) return csp()->top();
-    while (next_boundary < addr) {
-      while (n <= next_boundary) {
-        q = n;
-        oop obj = oop(q);
-        if (obj->klass_or_null() == NULL) return q;
-        n += obj->size();
-      }
-      assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
-      // [q, n) is the block that crosses the boundary.
-      alloc_block_work2(&next_boundary, &next_index, q, n);
+  if (addr >= gsp()->top()) return gsp()->top();
+  while (next_boundary < addr) {
+    while (n <= next_boundary) {
+      q = n;
+      oop obj = oop(q);
+      if (obj->klass_or_null() == NULL) return q;
+      n += block_size(q);
     }
-  } else {
-    while (next_boundary < addr) {
-      while (n <= next_boundary) {
-        q = n;
-        oop obj = oop(q);
-        if (obj->klass_or_null() == NULL) return q;
-        n += _sp->block_size(q);
-      }
-      assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
-      // [q, n) is the block that crosses the boundary.
-      alloc_block_work2(&next_boundary, &next_index, q, n);
-    }
+    assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
+    // [q, n) is the block that crosses the boundary.
+    alloc_block_work2(&next_boundary, &next_index, q, n);
   }
   return forward_to_block_containing_addr_const(q, n, addr);
 }
@@ -638,7 +638,7 @@
   assert(_bottom <= addr && addr < _end,
          "addr must be covered by this Array");
   HeapWord* q = block_at_or_preceding(addr, true, _next_offset_index-1);
-  HeapWord* n = q + _sp->block_size(q);
+  HeapWord* n = q + block_size(q);
   return forward_to_block_containing_addr_const(q, n, addr);
 }
 
--- a/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -52,8 +52,8 @@
 // consolidation.
 
 // Forward declarations
-class ContiguousSpace;
 class G1BlockOffsetSharedArray;
+class G1OffsetTableContigSpace;
 
 class G1BlockOffsetTable VALUE_OBJ_CLASS_SPEC {
   friend class VMStructs;
@@ -157,6 +157,8 @@
     return _offset_array[index];
   }
 
+  void set_offset_array(HeapWord* left, HeapWord* right, u_char offset);
+
   void set_offset_array(size_t index, u_char offset) {
     check_index(index, "index out of range");
     check_offset(offset, "offset too large");
@@ -170,21 +172,6 @@
     _offset_array[index] = (u_char) pointer_delta(high, low);
   }
 
-  void set_offset_array(HeapWord* left, HeapWord* right, u_char offset) {
-    check_index(index_for(right - 1), "right address out of range");
-    assert(left  < right, "Heap addresses out of order");
-    size_t num_cards = pointer_delta(right, left) >> LogN_words;
-    if (UseMemSetInBOT) {
-      memset(&_offset_array[index_for(left)], offset, num_cards);
-    } else {
-      size_t i = index_for(left);
-      const size_t end = i + num_cards;
-      for (; i < end; i++) {
-        _offset_array[i] = offset;
-      }
-    }
-  }
-
   void set_offset_array(size_t left, size_t right, u_char offset) {
     check_index(right, "right index out of range");
     assert(left <= right, "indexes out of order");
@@ -281,11 +268,7 @@
   G1BlockOffsetSharedArray* _array;
 
   // The space that owns this subregion.
-  Space* _sp;
-
-  // If "_sp" is a contiguous space, the field below is the view of "_sp"
-  // as a contiguous space, else NULL.
-  ContiguousSpace* _csp;
+  G1OffsetTableContigSpace* _gsp;
 
   // If true, array entries are initialized to 0; otherwise, they are
   // initialized to point backwards to the beginning of the covered region.
@@ -310,7 +293,9 @@
 
 protected:
 
-  ContiguousSpace* csp() const { return _csp; }
+  G1OffsetTableContigSpace* gsp() const { return _gsp; }
+
+  inline size_t block_size(const HeapWord* p) const;
 
   // Returns the address of a block whose start is at most "addr".
   // If "has_max_index" is true, "assumes "max_index" is the last valid one
@@ -363,7 +348,7 @@
   // "this" to be passed as a parameter to a member constructor for
   // the containing concrete subtype of Space.
   // This would be legal C++, but MS VC++ doesn't allow it.
-  void set_space(Space* sp);
+  void set_space(G1OffsetTableContigSpace* sp);
 
   // Resets the covered region to the given "mr".
   void set_region(MemRegion mr);
--- a/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -26,6 +26,8 @@
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1BLOCKOFFSETTABLE_INLINE_HPP
 
 #include "gc_implementation/g1/g1BlockOffsetTable.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/heapRegion.inline.hpp"
 #include "memory/space.hpp"
 
 inline HeapWord* G1BlockOffsetTable::block_start(const void* addr) {
@@ -69,6 +71,11 @@
   return result;
 }
 
+inline size_t
+G1BlockOffsetArray::block_size(const HeapWord* p) const {
+  return gsp()->block_size(p);
+}
+
 inline HeapWord*
 G1BlockOffsetArray::block_at_or_preceding(const void* addr,
                                           bool has_max_index,
@@ -88,7 +95,7 @@
     // to go back by.
     size_t n_cards_back = BlockOffsetArray::entry_to_cards_back(offset);
     q -= (N_words * n_cards_back);
-    assert(q >= _sp->bottom(), "Went below bottom!");
+    assert(q >= gsp()->bottom(), "Went below bottom!");
     index -= n_cards_back;
     offset = _array->offset_array(index);
   }
@@ -101,21 +108,12 @@
 G1BlockOffsetArray::
 forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n,
                                        const void* addr) const {
-  if (csp() != NULL) {
-    if (addr >= csp()->top()) return csp()->top();
-    while (n <= addr) {
-      q = n;
-      oop obj = oop(q);
-      if (obj->klass_or_null() == NULL) return q;
-      n += obj->size();
-    }
-  } else {
-    while (n <= addr) {
-      q = n;
-      oop obj = oop(q);
-      if (obj->klass_or_null() == NULL) return q;
-      n += _sp->block_size(q);
-    }
+  if (addr >= gsp()->top()) return gsp()->top();
+  while (n <= addr) {
+    q = n;
+    oop obj = oop(q);
+    if (obj->klass_or_null() == NULL) return q;
+    n += block_size(q);
   }
   assert(q <= n, "wrong order for q and addr");
   assert(addr < n, "wrong order for addr and n");
@@ -126,7 +124,7 @@
 G1BlockOffsetArray::forward_to_block_containing_addr(HeapWord* q,
                                                      const void* addr) {
   if (oop(q)->klass_or_null() == NULL) return q;
-  HeapWord* n = q + _sp->block_size(q);
+  HeapWord* n = q + block_size(q);
   // In the normal case, where the query "addr" is a card boundary, and the
   // offset table chunks are the same size as cards, the block starting at
   // "q" will contain addr, so the test below will fail, and we'll fall
--- a/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -30,23 +30,52 @@
 
 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
 
-G1CodeRootChunk::G1CodeRootChunk() : _top(NULL), _next(NULL), _prev(NULL) {
+G1CodeRootChunk::G1CodeRootChunk() : _top(NULL), _next(NULL), _prev(NULL), _free(NULL) {
   _top = bottom();
 }
 
 void G1CodeRootChunk::reset() {
   _next = _prev = NULL;
+  _free = NULL;
   _top = bottom();
 }
 
 void G1CodeRootChunk::nmethods_do(CodeBlobClosure* cl) {
-  nmethod** cur = bottom();
+  NmethodOrLink* cur = bottom();
   while (cur != _top) {
-    cl->do_code_blob(*cur);
+    if (is_nmethod(cur)) {
+      cl->do_code_blob(cur->_nmethod);
+    }
     cur++;
   }
 }
 
+bool G1CodeRootChunk::remove_lock_free(nmethod* method) {
+  NmethodOrLink* cur = bottom();
+
+  for (NmethodOrLink* cur = bottom(); cur != _top; cur++) {
+    if (cur->_nmethod == method) {
+      bool result = Atomic::cmpxchg_ptr(NULL, &cur->_nmethod, method) == method;
+
+      if (!result) {
+        // Someone else cleared out this entry.
+        return false;
+      }
+
+      // The method was cleared. Time to link it into the free list.
+      NmethodOrLink* prev_free;
+      do {
+        prev_free = (NmethodOrLink*)_free;
+        cur->_link = prev_free;
+      } while (Atomic::cmpxchg_ptr(cur, &_free, prev_free) != prev_free);
+
+      return true;
+    }
+  }
+
+  return false;
+}
+
 G1CodeRootChunkManager::G1CodeRootChunkManager() : _free_list(), _num_chunks_handed_out(0) {
   _free_list.initialize();
   _free_list.set_size(G1CodeRootChunk::word_size());
@@ -140,34 +169,43 @@
 
 void G1CodeRootSet::add(nmethod* method) {
   if (!contains(method)) {
-    // Try to add the nmethod. If there is not enough space, get a new chunk.
-    if (_list.head() == NULL || _list.head()->is_full()) {
-      G1CodeRootChunk* cur = new_chunk();
+    // Find the first chunk that isn't full.
+    G1CodeRootChunk* cur = _list.head();
+    while (cur != NULL) {
+      if (!cur->is_full()) {
+        break;
+      }
+      cur = cur->next();
+    }
+
+    // All chunks are full, get a new chunk.
+    if (cur == NULL) {
+      cur = new_chunk();
       _list.return_chunk_at_head(cur);
     }
-    bool result = _list.head()->add(method);
+
+    // Add the nmethod.
+    bool result = cur->add(method);
+
     guarantee(result, err_msg("Not able to add nmethod "PTR_FORMAT" to newly allocated chunk.", method));
+
     _length++;
   }
 }
 
-void G1CodeRootSet::remove(nmethod* method) {
+void G1CodeRootSet::remove_lock_free(nmethod* method) {
   G1CodeRootChunk* found = find(method);
   if (found != NULL) {
-    bool result = found->remove(method);
-    guarantee(result, err_msg("could not find nmethod "PTR_FORMAT" during removal although we previously found it", method));
-    // eventually free completely emptied chunk
-    if (found->is_empty()) {
-      _list.remove_chunk(found);
-      free(found);
+    bool result = found->remove_lock_free(method);
+    if (result) {
+      Atomic::dec_ptr((volatile intptr_t*)&_length);
     }
-    _length--;
   }
   assert(!contains(method), err_msg(PTR_FORMAT" still contains nmethod "PTR_FORMAT, this, method));
 }
 
 nmethod* G1CodeRootSet::pop() {
-  do {
+  while (true) {
     G1CodeRootChunk* cur = _list.head();
     if (cur == NULL) {
       assert(_length == 0, "when there are no chunks, there should be no elements");
@@ -180,7 +218,7 @@
     } else {
       free(_list.get_chunk_at_head());
     }
-  } while (true);
+  }
 }
 
 G1CodeRootChunk* G1CodeRootSet::find(nmethod* method) {
--- a/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -31,6 +31,14 @@
 
 class CodeBlobClosure;
 
+// The elements of the G1CodeRootChunk is either:
+//  1) nmethod pointers
+//  2) nodes in an internally chained free list
+typedef union {
+  nmethod* _nmethod;
+  void*    _link;
+} NmethodOrLink;
+
 class G1CodeRootChunk : public CHeapObj<mtGC> {
  private:
   static const int NUM_ENTRIES = 32;
@@ -38,16 +46,28 @@
   G1CodeRootChunk*     _next;
   G1CodeRootChunk*     _prev;
 
-  nmethod** _top;
+  NmethodOrLink*          _top;
+  // First free position within the chunk.
+  volatile NmethodOrLink* _free;
 
-  nmethod* _data[NUM_ENTRIES];
+  NmethodOrLink _data[NUM_ENTRIES];
 
-  nmethod** bottom() const {
-    return (nmethod**) &(_data[0]);
+  NmethodOrLink* bottom() const {
+    return (NmethodOrLink*) &(_data[0]);
   }
 
-  nmethod** end() const {
-    return (nmethod**) &(_data[NUM_ENTRIES]);
+  NmethodOrLink* end() const {
+    return (NmethodOrLink*) &(_data[NUM_ENTRIES]);
+  }
+
+  bool is_link(NmethodOrLink* nmethod_or_link) {
+    return nmethod_or_link->_link == NULL ||
+        (bottom() <= nmethod_or_link->_link
+        && nmethod_or_link->_link < end());
+  }
+
+  bool is_nmethod(NmethodOrLink* nmethod_or_link) {
+    return !is_link(nmethod_or_link);
   }
 
  public:
@@ -85,46 +105,55 @@
   }
 
   bool is_full() const {
-    return _top == (nmethod**)end();
+    return _top == end() && _free == NULL;
   }
 
   bool contains(nmethod* method) {
-    nmethod** cur = bottom();
+    NmethodOrLink* cur = bottom();
     while (cur != _top) {
-      if (*cur == method) return true;
+      if (cur->_nmethod == method) return true;
       cur++;
     }
     return false;
   }
 
   bool add(nmethod* method) {
-    if (is_full()) return false;
-    *_top = method;
-    _top++;
+    if (is_full()) {
+      return false;
+    }
+
+    if (_free != NULL) {
+      // Take from internally chained free list
+      NmethodOrLink* first_free = (NmethodOrLink*)_free;
+      _free = (NmethodOrLink*)_free->_link;
+      first_free->_nmethod = method;
+    } else {
+      // Take from top.
+      _top->_nmethod = method;
+      _top++;
+    }
+
     return true;
   }
 
-  bool remove(nmethod* method) {
-    nmethod** cur = bottom();
-    while (cur != _top) {
-      if (*cur == method) {
-        memmove(cur, cur + 1, (_top - (cur + 1)) * sizeof(nmethod**));
-        _top--;
-        return true;
-      }
-      cur++;
-    }
-    return false;
-  }
+  bool remove_lock_free(nmethod* method);
 
   void nmethods_do(CodeBlobClosure* blk);
 
   nmethod* pop() {
-    if (is_empty()) {
-      return NULL;
+    if (_free != NULL) {
+      // Kill the free list.
+      _free = NULL;
     }
-    _top--;
-    return *_top;
+
+    while (!is_empty()) {
+      _top--;
+      if (is_nmethod(_top)) {
+        return _top->_nmethod;
+      }
+    }
+
+    return NULL;
   }
 };
 
@@ -193,7 +222,7 @@
   // method is likely to be repeatedly called with the same nmethod.
   void add(nmethod* method);
 
-  void remove(nmethod* method);
+  void remove_lock_free(nmethod* method);
   nmethod* pop();
 
   bool contains(nmethod* method);
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -55,6 +55,7 @@
 #include "gc_implementation/shared/gcTrace.hpp"
 #include "gc_implementation/shared/gcTraceTime.hpp"
 #include "gc_implementation/shared/isGCActiveMark.hpp"
+#include "memory/allocation.hpp"
 #include "memory/gcLocker.inline.hpp"
 #include "memory/generationSpec.hpp"
 #include "memory/iterator.hpp"
@@ -87,10 +88,10 @@
 // G1ParVerifyTask uses heap_region_par_iterate_chunked() for parallelism.
 // The number of GC workers is passed to heap_region_par_iterate_chunked().
 // It does use run_task() which sets _n_workers in the task.
-// G1ParTask executes g1_process_strong_roots() ->
-// SharedHeap::process_strong_roots() which calls eventually to
+// G1ParTask executes g1_process_roots() ->
+// SharedHeap::process_roots() which calls eventually to
 // CardTableModRefBS::par_non_clean_card_iterate_work() which uses
-// SequentialSubTasksDone.  SharedHeap::process_strong_roots() also
+// SequentialSubTasksDone.  SharedHeap::process_roots() also
 // directly uses SubTasksDone (_process_strong_tasks field in SharedHeap).
 //
 
@@ -770,6 +771,7 @@
   // match new_top.
   assert(hr == NULL ||
          (hr->end() == new_end && hr->top() == new_top), "sanity");
+  check_bitmaps("Humongous Region Allocation", first_hr);
 
   assert(first_hr->used() == word_size * HeapWordSize, "invariant");
   _summary_bytes_used += first_hr->used();
@@ -1328,6 +1330,7 @@
 
       verify_before_gc();
 
+      check_bitmaps("Full GC Start");
       pre_full_gc_dump(gc_timer);
 
       COMPILER2_PRESENT(DerivedPointerTable::clear());
@@ -1501,6 +1504,18 @@
 
       verify_after_gc();
 
+      // Clear the previous marking bitmap, if needed for bitmap verification.
+      // Note we cannot do this when we clear the next marking bitmap in
+      // ConcurrentMark::abort() above since VerifyDuringGC verifies the
+      // objects marked during a full GC against the previous bitmap.
+      // But we need to clear it before calling check_bitmaps below since
+      // the full GC has compacted objects and updated TAMS but not updated
+      // the prev bitmap.
+      if (G1VerifyBitmaps) {
+        ((CMBitMap*) concurrent_mark()->prevMarkBitMap())->clearAll();
+      }
+      check_bitmaps("Full GC End");
+
       // Start a new incremental collection set for the next pause
       assert(g1_policy()->collection_set() == NULL, "must be");
       g1_policy()->start_incremental_cset_building();
@@ -1913,6 +1928,8 @@
   _secondary_free_list("Secondary Free List", new SecondaryFreeRegionListMtSafeChecker()),
   _old_set("Old Set", false /* humongous */, new OldRegionSetMtSafeChecker()),
   _humongous_set("Master Humongous Set", true /* humongous */, new HumongousRegionSetMtSafeChecker()),
+  _humongous_is_live(),
+  _has_humongous_reclaim_candidates(false),
   _free_regions_coming(false),
   _young_list(new YoungList(this)),
   _gc_time_stamp(0),
@@ -2069,6 +2086,7 @@
   _g1h = this;
 
   _in_cset_fast_test.initialize(_g1_reserved.start(), _g1_reserved.end(), HeapRegion::GrainBytes);
+  _humongous_is_live.initialize(_g1_reserved.start(), _g1_reserved.end(), HeapRegion::GrainBytes);
 
   // Create the ConcurrentMark data structure and thread.
   // (Must do this late, so that "max_regions" is defined.)
@@ -2164,6 +2182,11 @@
   }
 }
 
+void G1CollectedHeap::clear_humongous_is_live_table() {
+  guarantee(G1ReclaimDeadHumongousObjectsAtYoungGC, "Should only be called if true");
+  _humongous_is_live.clear();
+}
+
 size_t G1CollectedHeap::conservative_max_heap_alignment() {
   return HeapRegion::max_region_size();
 }
@@ -2580,15 +2603,12 @@
 
 // Iteration functions.
 
-// Iterates an OopClosure over all ref-containing fields of objects
-// within a HeapRegion.
+// Applies an ExtendedOopClosure onto all references of objects within a HeapRegion.
 
 class IterateOopClosureRegionClosure: public HeapRegionClosure {
-  MemRegion _mr;
   ExtendedOopClosure* _cl;
 public:
-  IterateOopClosureRegionClosure(MemRegion mr, ExtendedOopClosure* cl)
-    : _mr(mr), _cl(cl) {}
+  IterateOopClosureRegionClosure(ExtendedOopClosure* cl) : _cl(cl) {}
   bool doHeapRegion(HeapRegion* r) {
     if (!r->continuesHumongous()) {
       r->oop_iterate(_cl);
@@ -2598,12 +2618,7 @@
 };
 
 void G1CollectedHeap::oop_iterate(ExtendedOopClosure* cl) {
-  IterateOopClosureRegionClosure blk(_g1_committed, cl);
-  heap_region_iterate(&blk);
-}
-
-void G1CollectedHeap::oop_iterate(MemRegion mr, ExtendedOopClosure* cl) {
-  IterateOopClosureRegionClosure blk(mr, cl);
+  IterateOopClosureRegionClosure blk(cl);
   heap_region_iterate(&blk);
 }
 
@@ -2956,10 +2971,17 @@
   }
 }
 
-CompactibleSpace* G1CollectedHeap::first_compactible_space() {
-  return n_regions() > 0 ? region_at(0) : NULL;
-}
-
+HeapRegion* G1CollectedHeap::next_compaction_region(const HeapRegion* from) const {
+  // We're not using an iterator given that it will wrap around when
+  // it reaches the last region and this is not what we want here.
+  for (uint index = from->hrs_index() + 1; index < n_regions(); index++) {
+    HeapRegion* hr = region_at(index);
+    if (!hr->isHumongous()) {
+      return hr;
+    }
+  }
+  return NULL;
+}
 
 Space* G1CollectedHeap::space_containing(const void* addr) const {
   Space* res = heap_region_containing(addr);
@@ -3390,25 +3412,20 @@
 
     if (!silent) { gclog_or_tty->print("Roots "); }
     VerifyRootsClosure rootsCl(vo);
-    G1VerifyCodeRootOopClosure codeRootsCl(this, &rootsCl, vo);
-    G1VerifyCodeRootBlobClosure blobsCl(&codeRootsCl);
     VerifyKlassClosure klassCl(this, &rootsCl);
+    CLDToKlassAndOopClosure cldCl(&klassCl, &rootsCl, false);
 
     // We apply the relevant closures to all the oops in the
-    // system dictionary, the string table and the code cache.
-    const int so = SO_AllClasses | SO_Strings | SO_CodeCache;
-
-    // Need cleared claim bits for the strong roots processing
-    ClassLoaderDataGraph::clear_claimed_marks();
-
-    process_strong_roots(true,      // activate StrongRootsScope
-                         false,     // we set "is scavenging" to false,
-                                    // so we don't reset the dirty cards.
-                         ScanningOption(so),  // roots scanning options
-                         &rootsCl,
-                         &blobsCl,
-                         &klassCl
-                         );
+    // system dictionary, class loader data graph, the string table
+    // and the nmethods in the code cache.
+    G1VerifyCodeRootOopClosure codeRootsCl(this, &rootsCl, vo);
+    G1VerifyCodeRootBlobClosure blobsCl(&codeRootsCl);
+
+    process_all_roots(true,            // activate StrongRootsScope
+                      SO_AllCodeCache, // roots scanning options
+                      &rootsCl,
+                      &cldCl,
+                      &blobsCl);
 
     bool failures = rootsCl.failures() || codeRootsCl.failures();
 
@@ -3780,6 +3797,61 @@
   return g1_rem_set()->cardsScanned();
 }
 
+bool G1CollectedHeap::humongous_region_is_always_live(uint index) {
+  HeapRegion* region = region_at(index);
+  assert(region->startsHumongous(), "Must start a humongous object");
+  return oop(region->bottom())->is_objArray() || !region->rem_set()->is_empty();
+}
+
+class RegisterHumongousWithInCSetFastTestClosure : public HeapRegionClosure {
+ private:
+  size_t _total_humongous;
+  size_t _candidate_humongous;
+ public:
+  RegisterHumongousWithInCSetFastTestClosure() : _total_humongous(0), _candidate_humongous(0) {
+  }
+
+  virtual bool doHeapRegion(HeapRegion* r) {
+    if (!r->startsHumongous()) {
+      return false;
+    }
+    G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+    uint region_idx = r->hrs_index();
+    bool is_candidate = !g1h->humongous_region_is_always_live(region_idx);
+    // Is_candidate already filters out humongous regions with some remembered set.
+    // This will not lead to humongous object that we mistakenly keep alive because
+    // during young collection the remembered sets will only be added to.
+    if (is_candidate) {
+      g1h->register_humongous_region_with_in_cset_fast_test(region_idx);
+      _candidate_humongous++;
+    }
+    _total_humongous++;
+
+    return false;
+  }
+
+  size_t total_humongous() const { return _total_humongous; }
+  size_t candidate_humongous() const { return _candidate_humongous; }
+};
+
+void G1CollectedHeap::register_humongous_regions_with_in_cset_fast_test() {
+  if (!G1ReclaimDeadHumongousObjectsAtYoungGC) {
+    g1_policy()->phase_times()->record_fast_reclaim_humongous_stats(0, 0);
+    return;
+  }
+
+  RegisterHumongousWithInCSetFastTestClosure cl;
+  heap_region_iterate(&cl);
+  g1_policy()->phase_times()->record_fast_reclaim_humongous_stats(cl.total_humongous(),
+                                                                  cl.candidate_humongous());
+  _has_humongous_reclaim_candidates = cl.candidate_humongous() > 0;
+
+  if (_has_humongous_reclaim_candidates) {
+    clear_humongous_is_live_table();
+  }
+}
+
 void
 G1CollectedHeap::setup_surviving_young_words() {
   assert(_surviving_young_words == NULL, "pre-condition");
@@ -3990,6 +4062,7 @@
       increment_gc_time_stamp();
 
       verify_before_gc();
+      check_bitmaps("GC Start");
 
       COMPILER2_PRESENT(DerivedPointerTable::clear());
 
@@ -4065,6 +4138,8 @@
 
         g1_policy()->finalize_cset(target_pause_time_ms, evacuation_info);
 
+        register_humongous_regions_with_in_cset_fast_test();
+
         _cm->note_start_of_gc();
         // We should not verify the per-thread SATB buffers given that
         // we have not filtered them yet (we'll do so during the
@@ -4115,6 +4190,9 @@
                                  true  /* verify_fingers */);
 
         free_collection_set(g1_policy()->collection_set(), evacuation_info);
+
+        eagerly_reclaim_humongous_regions();
+
         g1_policy()->clear_collection_set();
 
         cleanup_surviving_young_words();
@@ -4235,6 +4313,7 @@
         increment_gc_time_stamp();
 
         verify_after_gc();
+        check_bitmaps("GC End");
 
         assert(!ref_processor_stw()->discovery_enabled(), "Postcondition");
         ref_processor_stw()->verify_no_references_recorded();
@@ -4338,11 +4417,7 @@
   assert(_mutator_alloc_region.get() == NULL, "post-condition");
 }
 
-void G1CollectedHeap::init_gc_alloc_regions(EvacuationInfo& evacuation_info) {
-  assert_at_safepoint(true /* should_be_vm_thread */);
-
-  _survivor_gc_alloc_region.init();
-  _old_gc_alloc_region.init();
+void G1CollectedHeap::use_retained_old_gc_alloc_region(EvacuationInfo& evacuation_info) {
   HeapRegion* retained_region = _retained_old_gc_alloc_region;
   _retained_old_gc_alloc_region = NULL;
 
@@ -4360,7 +4435,7 @@
       !(retained_region->top() == retained_region->end()) &&
       !retained_region->is_empty() &&
       !retained_region->isHumongous()) {
-    retained_region->set_saved_mark();
+    retained_region->record_top_and_timestamp();
     // The retained region was added to the old region set when it was
     // retired. We have to remove it now, since we don't allow regions
     // we allocate to in the region sets. We'll re-add it later, when
@@ -4374,6 +4449,15 @@
   }
 }
 
+void G1CollectedHeap::init_gc_alloc_regions(EvacuationInfo& evacuation_info) {
+  assert_at_safepoint(true /* should_be_vm_thread */);
+
+  _survivor_gc_alloc_region.init();
+  _old_gc_alloc_region.init();
+
+  use_retained_old_gc_alloc_region(evacuation_info);
+}
+
 void G1CollectedHeap::release_gc_alloc_regions(uint no_of_gc_workers, EvacuationInfo& evacuation_info) {
   evacuation_info.set_allocation_regions(_survivor_gc_alloc_region.count() +
                                          _old_gc_alloc_region.count());
@@ -4607,7 +4691,7 @@
   }
 }
 
-template <G1Barrier barrier, bool do_mark_object>
+template <G1Barrier barrier, G1Mark do_mark_object>
 template <class T>
 void G1ParCopyClosure<barrier, do_mark_object>::do_oop_work(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
@@ -4620,7 +4704,9 @@
 
   assert(_worker_id == _par_scan_state->queue_num(), "sanity");
 
-  if (_g1->in_cset_fast_test(obj)) {
+  G1CollectedHeap::in_cset_state_t state = _g1->in_cset_state(obj);
+
+  if (state == G1CollectedHeap::InCSet) {
     oop forwardee;
     if (obj->is_forwarded()) {
       forwardee = obj->forwardee();
@@ -4629,7 +4715,7 @@
     }
     assert(forwardee != NULL, "forwardee should not be NULL");
     oopDesc::encode_store_heap_oop(p, forwardee);
-    if (do_mark_object && forwardee != obj) {
+    if (do_mark_object != G1MarkNone && forwardee != obj) {
       // If the object is self-forwarded we don't need to explicitly
       // mark it, the evacuation failure protocol will do so.
       mark_forwarded_object(obj, forwardee);
@@ -4639,10 +4725,12 @@
       do_klass_barrier(p, forwardee);
     }
   } else {
+    if (state == G1CollectedHeap::IsHumongous) {
+      _g1->set_humongous_is_live(obj);
+    }
     // The object is not in collection set. If we're a root scanning
-    // closure during an initial mark pause (i.e. do_mark_object will
-    // be true) then attempt to mark the object.
-    if (do_mark_object) {
+    // closure during an initial mark pause then attempt to mark the object.
+    if (do_mark_object == G1MarkFromRoot) {
       mark_object(obj);
     }
   }
@@ -4652,8 +4740,8 @@
   }
 }
 
-template void G1ParCopyClosure<G1BarrierEvac, false>::do_oop_work(oop* p);
-template void G1ParCopyClosure<G1BarrierEvac, false>::do_oop_work(narrowOop* p);
+template void G1ParCopyClosure<G1BarrierEvac, G1MarkNone>::do_oop_work(oop* p);
+template void G1ParCopyClosure<G1BarrierEvac, G1MarkNone>::do_oop_work(narrowOop* p);
 
 class G1ParEvacuateFollowersClosure : public VoidClosure {
 protected:
@@ -4732,11 +4820,6 @@
   Mutex _stats_lock;
   Mutex* stats_lock() { return &_stats_lock; }
 
-  size_t getNCards() {
-    return (_g1h->capacity() + G1BlockOffsetSharedArray::N_bytes - 1)
-      / G1BlockOffsetSharedArray::N_bytes;
-  }
-
 public:
   G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues)
     : AbstractGangTask("G1 collection"),
@@ -4766,6 +4849,51 @@
     _n_workers = active_workers;
   }
 
+  // Helps out with CLD processing.
+  //
+  // During InitialMark we need to:
+  // 1) Scavenge all CLDs for the young GC.
+  // 2) Mark all objects directly reachable from strong CLDs.
+  template <G1Mark do_mark_object>
+  class G1CLDClosure : public CLDClosure {
+    G1ParCopyClosure<G1BarrierNone,  do_mark_object>* _oop_closure;
+    G1ParCopyClosure<G1BarrierKlass, do_mark_object>  _oop_in_klass_closure;
+    G1KlassScanClosure                                _klass_in_cld_closure;
+    bool                                              _claim;
+
+   public:
+    G1CLDClosure(G1ParCopyClosure<G1BarrierNone, do_mark_object>* oop_closure,
+                 bool only_young, bool claim)
+        : _oop_closure(oop_closure),
+          _oop_in_klass_closure(oop_closure->g1(),
+                                oop_closure->pss(),
+                                oop_closure->rp()),
+          _klass_in_cld_closure(&_oop_in_klass_closure, only_young),
+          _claim(claim) {
+
+    }
+
+    void do_cld(ClassLoaderData* cld) {
+      cld->oops_do(_oop_closure, &_klass_in_cld_closure, _claim);
+    }
+  };
+
+  class G1CodeBlobClosure: public CodeBlobClosure {
+    OopClosure* _f;
+
+   public:
+    G1CodeBlobClosure(OopClosure* f) : _f(f) {}
+    void do_code_blob(CodeBlob* blob) {
+      nmethod* that = blob->as_nmethod_or_null();
+      if (that != NULL) {
+        if (!that->test_set_oops_do_mark()) {
+          that->oops_do(_f);
+          that->fix_oop_relocations();
+        }
+      }
+    }
+  };
+
   void work(uint worker_id) {
     if (worker_id >= _n_workers) return;  // no work needed this round
 
@@ -4783,40 +4911,67 @@
 
       pss.set_evac_failure_closure(&evac_failure_cl);
 
-      G1ParScanExtRootClosure        only_scan_root_cl(_g1h, &pss, rp);
-      G1ParScanMetadataClosure       only_scan_metadata_cl(_g1h, &pss, rp);
-
-      G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss, rp);
-      G1ParScanAndMarkMetadataClosure scan_mark_metadata_cl(_g1h, &pss, rp);
-
-      bool only_young                 = _g1h->g1_policy()->gcs_are_young();
-      G1KlassScanClosure              scan_mark_klasses_cl_s(&scan_mark_metadata_cl, false);
-      G1KlassScanClosure              only_scan_klasses_cl_s(&only_scan_metadata_cl, only_young);
-
-      OopClosure*                    scan_root_cl = &only_scan_root_cl;
-      G1KlassScanClosure*            scan_klasses_cl = &only_scan_klasses_cl_s;
+      bool only_young = _g1h->g1_policy()->gcs_are_young();
+
+      // Non-IM young GC.
+      G1ParCopyClosure<G1BarrierNone, G1MarkNone>             scan_only_root_cl(_g1h, &pss, rp);
+      G1CLDClosure<G1MarkNone>                                scan_only_cld_cl(&scan_only_root_cl,
+                                                                               only_young, // Only process dirty klasses.
+                                                                               false);     // No need to claim CLDs.
+      // IM young GC.
+      //    Strong roots closures.
+      G1ParCopyClosure<G1BarrierNone, G1MarkFromRoot>         scan_mark_root_cl(_g1h, &pss, rp);
+      G1CLDClosure<G1MarkFromRoot>                            scan_mark_cld_cl(&scan_mark_root_cl,
+                                                                               false, // Process all klasses.
+                                                                               true); // Need to claim CLDs.
+      //    Weak roots closures.
+      G1ParCopyClosure<G1BarrierNone, G1MarkPromotedFromRoot> scan_mark_weak_root_cl(_g1h, &pss, rp);
+      G1CLDClosure<G1MarkPromotedFromRoot>                    scan_mark_weak_cld_cl(&scan_mark_weak_root_cl,
+                                                                                    false, // Process all klasses.
+                                                                                    true); // Need to claim CLDs.
+
+      G1CodeBlobClosure scan_only_code_cl(&scan_only_root_cl);
+      G1CodeBlobClosure scan_mark_code_cl(&scan_mark_root_cl);
+      // IM Weak code roots are handled later.
+
+      OopClosure* strong_root_cl;
+      OopClosure* weak_root_cl;
+      CLDClosure* strong_cld_cl;
+      CLDClosure* weak_cld_cl;
+      CodeBlobClosure* strong_code_cl;
 
       if (_g1h->g1_policy()->during_initial_mark_pause()) {
         // We also need to mark copied objects.
-        scan_root_cl = &scan_mark_root_cl;
-        scan_klasses_cl = &scan_mark_klasses_cl_s;
+        strong_root_cl = &scan_mark_root_cl;
+        strong_cld_cl  = &scan_mark_cld_cl;
+        strong_code_cl = &scan_mark_code_cl;
+        if (ClassUnloadingWithConcurrentMark) {
+          weak_root_cl = &scan_mark_weak_root_cl;
+          weak_cld_cl  = &scan_mark_weak_cld_cl;
+        } else {
+          weak_root_cl = &scan_mark_root_cl;
+          weak_cld_cl  = &scan_mark_cld_cl;
+        }
+      } else {
+        strong_root_cl = &scan_only_root_cl;
+        weak_root_cl   = &scan_only_root_cl;
+        strong_cld_cl  = &scan_only_cld_cl;
+        weak_cld_cl    = &scan_only_cld_cl;
+        strong_code_cl = &scan_only_code_cl;
       }
 
-      G1ParPushHeapRSClosure          push_heap_rs_cl(_g1h, &pss);
-
-      // Don't scan the scavengable methods in the code cache as part
-      // of strong root scanning. The code roots that point into a
-      // region in the collection set are scanned when we scan the
-      // region's RSet.
-      int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings;
+
+      G1ParPushHeapRSClosure  push_heap_rs_cl(_g1h, &pss);
 
       pss.start_strong_roots();
-      _g1h->g1_process_strong_roots(/* is scavenging */ true,
-                                    SharedHeap::ScanningOption(so),
-                                    scan_root_cl,
-                                    &push_heap_rs_cl,
-                                    scan_klasses_cl,
-                                    worker_id);
+      _g1h->g1_process_roots(strong_root_cl,
+                             weak_root_cl,
+                             &push_heap_rs_cl,
+                             strong_cld_cl,
+                             weak_cld_cl,
+                             strong_code_cl,
+                             worker_id);
+
       pss.end_strong_roots();
 
       {
@@ -4854,30 +5009,32 @@
 
 void
 G1CollectedHeap::
-g1_process_strong_roots(bool is_scavenging,
-                        ScanningOption so,
-                        OopClosure* scan_non_heap_roots,
-                        OopsInHeapRegionClosure* scan_rs,
-                        G1KlassScanClosure* scan_klasses,
-                        uint worker_i) {
-
-  // First scan the strong roots
+g1_process_roots(OopClosure* scan_non_heap_roots,
+                 OopClosure* scan_non_heap_weak_roots,
+                 OopsInHeapRegionClosure* scan_rs,
+                 CLDClosure* scan_strong_clds,
+                 CLDClosure* scan_weak_clds,
+                 CodeBlobClosure* scan_strong_code,
+                 uint worker_i) {
+
+  // First scan the shared roots.
   double ext_roots_start = os::elapsedTime();
   double closure_app_time_sec = 0.0;
 
+  bool during_im = _g1h->g1_policy()->during_initial_mark_pause();
+  bool trace_metadata = during_im && ClassUnloadingWithConcurrentMark;
+
   BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots);
-
-  assert(so & SO_CodeCache || scan_rs != NULL, "must scan code roots somehow");
-  // Walk the code cache/strong code roots w/o buffering, because StarTask
-  // cannot handle unaligned oop locations.
-  CodeBlobToOopClosure eager_scan_code_roots(scan_non_heap_roots, true /* do_marking */);
-
-  process_strong_roots(false, // no scoping; this is parallel code
-                       is_scavenging, so,
-                       &buf_scan_non_heap_roots,
-                       &eager_scan_code_roots,
-                       scan_klasses
-                       );
+  BufferingOopClosure buf_scan_non_heap_weak_roots(scan_non_heap_weak_roots);
+
+  process_roots(false, // no scoping; this is parallel code
+                SharedHeap::SO_None,
+                &buf_scan_non_heap_roots,
+                &buf_scan_non_heap_weak_roots,
+                scan_strong_clds,
+                // Unloading Initial Marks handle the weak CLDs separately.
+                (trace_metadata ? NULL : scan_weak_clds),
+                scan_strong_code);
 
   // Now the CM ref_processor roots.
   if (!_process_strong_tasks->is_task_claimed(G1H_PS_refProcessor_oops_do)) {
@@ -4888,10 +5045,21 @@
     ref_processor_cm()->weak_oops_do(&buf_scan_non_heap_roots);
   }
 
+  if (trace_metadata) {
+    // Barrier to make sure all workers passed
+    // the strong CLD and strong nmethods phases.
+    active_strong_roots_scope()->wait_until_all_workers_done_with_threads(n_par_threads());
+
+    // Now take the complement of the strong CLDs.
+    ClassLoaderDataGraph::roots_cld_do(NULL, scan_weak_clds);
+  }
+
   // Finish up any enqueued closure apps (attributed as object copy time).
   buf_scan_non_heap_roots.done();
-
-  double obj_copy_time_sec = buf_scan_non_heap_roots.closure_app_seconds();
+  buf_scan_non_heap_weak_roots.done();
+
+  double obj_copy_time_sec = buf_scan_non_heap_roots.closure_app_seconds()
+      + buf_scan_non_heap_weak_roots.closure_app_seconds();
 
   g1_policy()->phase_times()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0);
 
@@ -4915,32 +5083,14 @@
   }
   g1_policy()->phase_times()->record_satb_filtering_time(worker_i, satb_filtering_ms);
 
-  // If this is an initial mark pause, and we're not scanning
-  // the entire code cache, we need to mark the oops in the
-  // strong code root lists for the regions that are not in
-  // the collection set.
-  // Note all threads participate in this set of root tasks.
-  double mark_strong_code_roots_ms = 0.0;
-  if (g1_policy()->during_initial_mark_pause() && !(so & SO_CodeCache)) {
-    double mark_strong_roots_start = os::elapsedTime();
-    mark_strong_code_roots(worker_i);
-    mark_strong_code_roots_ms = (os::elapsedTime() - mark_strong_roots_start) * 1000.0;
-  }
-  g1_policy()->phase_times()->record_strong_code_root_mark_time(worker_i, mark_strong_code_roots_ms);
-
   // Now scan the complement of the collection set.
-  if (scan_rs != NULL) {
-    g1_rem_set()->oops_into_collection_set_do(scan_rs, &eager_scan_code_roots, worker_i);
-  }
+  MarkingCodeBlobClosure scavenge_cs_nmethods(scan_non_heap_weak_roots, CodeBlobToOopClosure::FixRelocations);
+
+  g1_rem_set()->oops_into_collection_set_do(scan_rs, &scavenge_cs_nmethods, worker_i);
+
   _process_strong_tasks->all_tasks_completed();
 }
 
-void
-G1CollectedHeap::g1_process_weak_roots(OopClosure* root_closure) {
-  CodeBlobToOopClosure roots_in_blobs(root_closure, /*do_marking=*/ false);
-  SharedHeap::process_weak_roots(root_closure, &roots_in_blobs);
-}
-
 class G1StringSymbolTableUnlinkTask : public AbstractGangTask {
 private:
   BoolObjectClosure* _is_alive;
@@ -4958,7 +5108,8 @@
   bool _do_in_parallel;
 public:
   G1StringSymbolTableUnlinkTask(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols) :
-    AbstractGangTask("Par String/Symbol table unlink"), _is_alive(is_alive),
+    AbstractGangTask("String/Symbol Unlinking"),
+    _is_alive(is_alive),
     _do_in_parallel(G1CollectedHeap::use_parallel_gc_threads()),
     _process_strings(process_strings), _strings_processed(0), _strings_removed(0),
     _process_symbols(process_symbols), _symbols_processed(0), _symbols_removed(0) {
@@ -4980,6 +5131,14 @@
     guarantee(!_process_symbols || !_do_in_parallel || SymbolTable::parallel_claimed_index() >= _initial_symbol_table_size,
               err_msg("claim value "INT32_FORMAT" after unlink less than initial symbol table size "INT32_FORMAT,
                       SymbolTable::parallel_claimed_index(), _initial_symbol_table_size));
+
+    if (G1TraceStringSymbolTableScrubbing) {
+      gclog_or_tty->print_cr("Cleaned string and symbol table, "
+                             "strings: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed, "
+                             "symbols: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed",
+                             strings_processed(), strings_removed(),
+                             symbols_processed(), symbols_removed());
+    }
   }
 
   void work(uint worker_id) {
@@ -5015,12 +5174,279 @@
   size_t symbols_removed()   const { return (size_t)_symbols_removed; }
 };
 
-void G1CollectedHeap::unlink_string_and_symbol_table(BoolObjectClosure* is_alive,
-                                                     bool process_strings, bool process_symbols) {
+class G1CodeCacheUnloadingTask VALUE_OBJ_CLASS_SPEC {
+private:
+  static Monitor* _lock;
+
+  BoolObjectClosure* const _is_alive;
+  const bool               _unloading_occurred;
+  const uint               _num_workers;
+
+  // Variables used to claim nmethods.
+  nmethod* _first_nmethod;
+  volatile nmethod* _claimed_nmethod;
+
+  // The list of nmethods that need to be processed by the second pass.
+  volatile nmethod* _postponed_list;
+  volatile uint     _num_entered_barrier;
+
+ public:
+  G1CodeCacheUnloadingTask(uint num_workers, BoolObjectClosure* is_alive, bool unloading_occurred) :
+      _is_alive(is_alive),
+      _unloading_occurred(unloading_occurred),
+      _num_workers(num_workers),
+      _first_nmethod(NULL),
+      _claimed_nmethod(NULL),
+      _postponed_list(NULL),
+      _num_entered_barrier(0)
+  {
+    nmethod::increase_unloading_clock();
+    _first_nmethod = CodeCache::alive_nmethod(CodeCache::first());
+    _claimed_nmethod = (volatile nmethod*)_first_nmethod;
+  }
+
+  ~G1CodeCacheUnloadingTask() {
+    CodeCache::verify_clean_inline_caches();
+
+    CodeCache::set_needs_cache_clean(false);
+    guarantee(CodeCache::scavenge_root_nmethods() == NULL, "Must be");
+
+    CodeCache::verify_icholder_relocations();
+  }
+
+ private:
+  void add_to_postponed_list(nmethod* nm) {
+      nmethod* old;
+      do {
+        old = (nmethod*)_postponed_list;
+        nm->set_unloading_next(old);
+      } while ((nmethod*)Atomic::cmpxchg_ptr(nm, &_postponed_list, old) != old);
+  }
+
+  void clean_nmethod(nmethod* nm) {
+    bool postponed = nm->do_unloading_parallel(_is_alive, _unloading_occurred);
+
+    if (postponed) {
+      // This nmethod referred to an nmethod that has not been cleaned/unloaded yet.
+      add_to_postponed_list(nm);
+    }
+
+    // Mark that this thread has been cleaned/unloaded.
+    // After this call, it will be safe to ask if this nmethod was unloaded or not.
+    nm->set_unloading_clock(nmethod::global_unloading_clock());
+  }
+
+  void clean_nmethod_postponed(nmethod* nm) {
+    nm->do_unloading_parallel_postponed(_is_alive, _unloading_occurred);
+  }
+
+  static const int MaxClaimNmethods = 16;
+
+  void claim_nmethods(nmethod** claimed_nmethods, int *num_claimed_nmethods) {
+    nmethod* first;
+    nmethod* last;
+
+    do {
+      *num_claimed_nmethods = 0;
+
+      first = last = (nmethod*)_claimed_nmethod;
+
+      if (first != NULL) {
+        for (int i = 0; i < MaxClaimNmethods; i++) {
+          last = CodeCache::alive_nmethod(CodeCache::next(last));
+
+          if (last == NULL) {
+            break;
+          }
+
+          claimed_nmethods[i] = last;
+          (*num_claimed_nmethods)++;
+        }
+      }
+
+    } while ((nmethod*)Atomic::cmpxchg_ptr(last, &_claimed_nmethod, first) != first);
+  }
+
+  nmethod* claim_postponed_nmethod() {
+    nmethod* claim;
+    nmethod* next;
+
+    do {
+      claim = (nmethod*)_postponed_list;
+      if (claim == NULL) {
+        return NULL;
+      }
+
+      next = claim->unloading_next();
+
+    } while ((nmethod*)Atomic::cmpxchg_ptr(next, &_postponed_list, claim) != claim);
+
+    return claim;
+  }
+
+ public:
+  // Mark that we're done with the first pass of nmethod cleaning.
+  void barrier_mark(uint worker_id) {
+    MonitorLockerEx ml(_lock, Mutex::_no_safepoint_check_flag);
+    _num_entered_barrier++;
+    if (_num_entered_barrier == _num_workers) {
+      ml.notify_all();
+    }
+  }
+
+  // See if we have to wait for the other workers to
+  // finish their first-pass nmethod cleaning work.
+  void barrier_wait(uint worker_id) {
+    if (_num_entered_barrier < _num_workers) {
+      MonitorLockerEx ml(_lock, Mutex::_no_safepoint_check_flag);
+      while (_num_entered_barrier < _num_workers) {
+          ml.wait(Mutex::_no_safepoint_check_flag, 0, false);
+      }
+    }
+  }
+
+  // Cleaning and unloading of nmethods. Some work has to be postponed
+  // to the second pass, when we know which nmethods survive.
+  void work_first_pass(uint worker_id) {
+    // The first nmethods is claimed by the first worker.
+    if (worker_id == 0 && _first_nmethod != NULL) {
+      clean_nmethod(_first_nmethod);
+      _first_nmethod = NULL;
+    }
+
+    int num_claimed_nmethods;
+    nmethod* claimed_nmethods[MaxClaimNmethods];
+
+    while (true) {
+      claim_nmethods(claimed_nmethods, &num_claimed_nmethods);
+
+      if (num_claimed_nmethods == 0) {
+        break;
+      }
+
+      for (int i = 0; i < num_claimed_nmethods; i++) {
+        clean_nmethod(claimed_nmethods[i]);
+      }
+    }
+  }
+
+  void work_second_pass(uint worker_id) {
+    nmethod* nm;
+    // Take care of postponed nmethods.
+    while ((nm = claim_postponed_nmethod()) != NULL) {
+      clean_nmethod_postponed(nm);
+    }
+  }
+};
+
+Monitor* G1CodeCacheUnloadingTask::_lock = new Monitor(Mutex::leaf, "Code Cache Unload lock");
+
+class G1KlassCleaningTask : public StackObj {
+  BoolObjectClosure*                      _is_alive;
+  volatile jint                           _clean_klass_tree_claimed;
+  ClassLoaderDataGraphKlassIteratorAtomic _klass_iterator;
+
+ public:
+  G1KlassCleaningTask(BoolObjectClosure* is_alive) :
+      _is_alive(is_alive),
+      _clean_klass_tree_claimed(0),
+      _klass_iterator() {
+  }
+
+ private:
+  bool claim_clean_klass_tree_task() {
+    if (_clean_klass_tree_claimed) {
+      return false;
+    }
+
+    return Atomic::cmpxchg(1, (jint*)&_clean_klass_tree_claimed, 0) == 0;
+  }
+
+  InstanceKlass* claim_next_klass() {
+    Klass* klass;
+    do {
+      klass =_klass_iterator.next_klass();
+    } while (klass != NULL && !klass->oop_is_instance());
+
+    return (InstanceKlass*)klass;
+  }
+
+public:
+
+  void clean_klass(InstanceKlass* ik) {
+    ik->clean_implementors_list(_is_alive);
+    ik->clean_method_data(_is_alive);
+
+    // G1 specific cleanup work that has
+    // been moved here to be done in parallel.
+    ik->clean_dependent_nmethods();
+  }
+
+  void work() {
+    ResourceMark rm;
+
+    // One worker will clean the subklass/sibling klass tree.
+    if (claim_clean_klass_tree_task()) {
+      Klass::clean_subklass_tree(_is_alive);
+    }
+
+    // All workers will help cleaning the classes,
+    InstanceKlass* klass;
+    while ((klass = claim_next_klass()) != NULL) {
+      clean_klass(klass);
+    }
+  }
+};
+
+// To minimize the remark pause times, the tasks below are done in parallel.
+class G1ParallelCleaningTask : public AbstractGangTask {
+private:
+  G1StringSymbolTableUnlinkTask _string_symbol_task;
+  G1CodeCacheUnloadingTask      _code_cache_task;
+  G1KlassCleaningTask           _klass_cleaning_task;
+
+public:
+  // The constructor is run in the VMThread.
+  G1ParallelCleaningTask(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols, uint num_workers, bool unloading_occurred) :
+      AbstractGangTask("Parallel Cleaning"),
+      _string_symbol_task(is_alive, process_strings, process_symbols),
+      _code_cache_task(num_workers, is_alive, unloading_occurred),
+      _klass_cleaning_task(is_alive) {
+  }
+
+  // The parallel work done by all worker threads.
+  void work(uint worker_id) {
+    // Do first pass of code cache cleaning.
+    _code_cache_task.work_first_pass(worker_id);
+
+    // Let the threads mark that the first pass is done.
+    _code_cache_task.barrier_mark(worker_id);
+
+    // Clean the Strings and Symbols.
+    _string_symbol_task.work(worker_id);
+
+    // Wait for all workers to finish the first code cache cleaning pass.
+    _code_cache_task.barrier_wait(worker_id);
+
+    // Do the second code cache cleaning work, which realize on
+    // the liveness information gathered during the first pass.
+    _code_cache_task.work_second_pass(worker_id);
+
+    // Clean all klasses that were not unloaded.
+    _klass_cleaning_task.work();
+  }
+};
+
+
+void G1CollectedHeap::parallel_cleaning(BoolObjectClosure* is_alive,
+                                        bool process_strings,
+                                        bool process_symbols,
+                                        bool class_unloading_occurred) {
   uint n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
-                   _g1h->workers()->active_workers() : 1);
-
-  G1StringSymbolTableUnlinkTask g1_unlink_task(is_alive, process_strings, process_symbols);
+                    workers()->active_workers() : 1);
+
+  G1ParallelCleaningTask g1_unlink_task(is_alive, process_strings, process_symbols,
+                                        n_workers, class_unloading_occurred);
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     set_par_threads(n_workers);
     workers()->run_task(&g1_unlink_task);
@@ -5028,12 +5454,21 @@
   } else {
     g1_unlink_task.work(0);
   }
-  if (G1TraceStringSymbolTableScrubbing) {
-    gclog_or_tty->print_cr("Cleaned string and symbol table, "
-                           "strings: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed, "
-                           "symbols: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed",
-                           g1_unlink_task.strings_processed(), g1_unlink_task.strings_removed(),
-                           g1_unlink_task.symbols_processed(), g1_unlink_task.symbols_removed());
+}
+
+void G1CollectedHeap::unlink_string_and_symbol_table(BoolObjectClosure* is_alive,
+                                                     bool process_strings, bool process_symbols) {
+  {
+    uint n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+                     _g1h->workers()->active_workers() : 1);
+    G1StringSymbolTableUnlinkTask g1_unlink_task(is_alive, process_strings, process_symbols);
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
+      set_par_threads(n_workers);
+      workers()->run_task(&g1_unlink_task);
+      set_par_threads(0);
+    } else {
+      g1_unlink_task.work(0);
+    }
   }
 
   if (G1StringDedup::is_enabled()) {
@@ -5117,12 +5552,21 @@
 public:
   G1KeepAliveClosure(G1CollectedHeap* g1) : _g1(g1) {}
   void do_oop(narrowOop* p) { guarantee(false, "Not needed"); }
-  void do_oop(      oop* p) {
+  void do_oop(oop* p) {
     oop obj = *p;
 
-    if (_g1->obj_in_cs(obj)) {
+    G1CollectedHeap::in_cset_state_t cset_state = _g1->in_cset_state(obj);
+    if (obj == NULL || cset_state == G1CollectedHeap::InNeither) {
+      return;
+    }
+    if (cset_state == G1CollectedHeap::InCSet) {
       assert( obj->is_forwarded(), "invariant" );
       *p = obj->forwardee();
+    } else {
+      assert(!obj->is_forwarded(), "invariant" );
+      assert(cset_state == G1CollectedHeap::IsHumongous,
+             err_msg("Only allowed InCSet state is IsHumongous, but is %d", cset_state));
+      _g1->set_humongous_is_live(obj);
     }
   }
 };
@@ -5135,17 +5579,14 @@
 class G1CopyingKeepAliveClosure: public OopClosure {
   G1CollectedHeap*         _g1h;
   OopClosure*              _copy_non_heap_obj_cl;
-  OopsInHeapRegionClosure* _copy_metadata_obj_cl;
   G1ParScanThreadState*    _par_scan_state;
 
 public:
   G1CopyingKeepAliveClosure(G1CollectedHeap* g1h,
                             OopClosure* non_heap_obj_cl,
-                            OopsInHeapRegionClosure* metadata_obj_cl,
                             G1ParScanThreadState* pss):
     _g1h(g1h),
     _copy_non_heap_obj_cl(non_heap_obj_cl),
-    _copy_metadata_obj_cl(metadata_obj_cl),
     _par_scan_state(pss)
   {}
 
@@ -5155,7 +5596,7 @@
   template <class T> void do_oop_work(T* p) {
     oop obj = oopDesc::load_decode_heap_oop(p);
 
-    if (_g1h->obj_in_cs(obj)) {
+    if (_g1h->is_in_cset_or_humongous(obj)) {
       // If the referent object has been forwarded (either copied
       // to a new location or to itself in the event of an
       // evacuation failure) then we need to update the reference
@@ -5178,12 +5619,12 @@
         _par_scan_state->push_on_queue(p);
       } else {
         assert(!Metaspace::contains((const void*)p),
-               err_msg("Otherwise need to call _copy_metadata_obj_cl->do_oop(p) "
+               err_msg("Unexpectedly found a pointer from metadata: "
                               PTR_FORMAT, p));
-          _copy_non_heap_obj_cl->do_oop(p);
-        }
+        _copy_non_heap_obj_cl->do_oop(p);
       }
     }
+  }
 };
 
 // Serial drain queue closure. Called as the 'complete_gc'
@@ -5273,22 +5714,18 @@
     pss.set_evac_failure_closure(&evac_failure_cl);
 
     G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, &pss, NULL);
-    G1ParScanMetadataClosure       only_copy_metadata_cl(_g1h, &pss, NULL);
 
     G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, &pss, NULL);
-    G1ParScanAndMarkMetadataClosure copy_mark_metadata_cl(_g1h, &pss, NULL);
 
     OopClosure*                    copy_non_heap_cl = &only_copy_non_heap_cl;
-    OopsInHeapRegionClosure*       copy_metadata_cl = &only_copy_metadata_cl;
 
     if (_g1h->g1_policy()->during_initial_mark_pause()) {
       // We also need to mark copied objects.
       copy_non_heap_cl = &copy_mark_non_heap_cl;
-      copy_metadata_cl = &copy_mark_metadata_cl;
     }
 
     // Keep alive closure.
-    G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, copy_metadata_cl, &pss);
+    G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, &pss);
 
     // Complete GC closure
     G1ParEvacuateFollowersClosure drain_queue(_g1h, &pss, _task_queues, _terminator);
@@ -5382,18 +5819,14 @@
     assert(pss.queue_is_empty(), "both queue and overflow should be empty");
 
     G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, &pss, NULL);
-    G1ParScanMetadataClosure       only_copy_metadata_cl(_g1h, &pss, NULL);
 
     G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, &pss, NULL);
-    G1ParScanAndMarkMetadataClosure copy_mark_metadata_cl(_g1h, &pss, NULL);
 
     OopClosure*                    copy_non_heap_cl = &only_copy_non_heap_cl;
-    OopsInHeapRegionClosure*       copy_metadata_cl = &only_copy_metadata_cl;
 
     if (_g1h->g1_policy()->during_initial_mark_pause()) {
       // We also need to mark copied objects.
       copy_non_heap_cl = &copy_mark_non_heap_cl;
-      copy_metadata_cl = &copy_mark_metadata_cl;
     }
 
     // Is alive closure
@@ -5401,7 +5834,7 @@
 
     // Copying keep alive closure. Applied to referent objects that need
     // to be copied.
-    G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, copy_metadata_cl, &pss);
+    G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, &pss);
 
     ReferenceProcessor* rp = _g1h->ref_processor_cm();
 
@@ -5507,22 +5940,18 @@
   assert(pss.queue_is_empty(), "pre-condition");
 
   G1ParScanExtRootClosure        only_copy_non_heap_cl(this, &pss, NULL);
-  G1ParScanMetadataClosure       only_copy_metadata_cl(this, &pss, NULL);
 
   G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(this, &pss, NULL);
-  G1ParScanAndMarkMetadataClosure copy_mark_metadata_cl(this, &pss, NULL);
 
   OopClosure*                    copy_non_heap_cl = &only_copy_non_heap_cl;
-  OopsInHeapRegionClosure*       copy_metadata_cl = &only_copy_metadata_cl;
 
   if (_g1h->g1_policy()->during_initial_mark_pause()) {
     // We also need to mark copied objects.
     copy_non_heap_cl = &copy_mark_non_heap_cl;
-    copy_metadata_cl = &copy_mark_metadata_cl;
   }
 
   // Keep alive closure.
-  G1CopyingKeepAliveClosure keep_alive(this, copy_non_heap_cl, copy_metadata_cl, &pss);
+  G1CopyingKeepAliveClosure keep_alive(this, copy_non_heap_cl, &pss);
 
   // Serial Complete GC closure
   G1STWDrainQueueClosure drain_queue(this, &pss);
@@ -5641,6 +6070,10 @@
 
   {
     StrongRootsScope srs(this);
+    // InitialMark needs claim bits to keep track of the marked-through CLDs.
+    if (g1_policy()->during_initial_mark_pause()) {
+      ClassLoaderDataGraph::clear_claimed_marks();
+    }
 
     if (G1CollectedHeap::use_parallel_gc_threads()) {
       // The individual threads will set their evac-failure closures.
@@ -5745,6 +6178,11 @@
   assert(!hr->is_empty(), "the region should not be empty");
   assert(free_list != NULL, "pre-condition");
 
+  if (G1VerifyBitmaps) {
+    MemRegion mr(hr->bottom(), hr->end());
+    concurrent_mark()->clearRangePrevBitmap(mr);
+  }
+
   // Clear the card counts for this region.
   // Note: we only need to do this if the region is not young
   // (since we don't refine cards in young regions).
@@ -5879,7 +6317,87 @@
 void G1CollectedHeap::verify_dirty_young_regions() {
   verify_dirty_young_list(_young_list->first_region());
 }
-#endif
+
+bool G1CollectedHeap::verify_no_bits_over_tams(const char* bitmap_name, CMBitMapRO* bitmap,
+                                               HeapWord* tams, HeapWord* end) {
+  guarantee(tams <= end,
+            err_msg("tams: "PTR_FORMAT" end: "PTR_FORMAT, tams, end));
+  HeapWord* result = bitmap->getNextMarkedWordAddress(tams, end);
+  if (result < end) {
+    gclog_or_tty->cr();
+    gclog_or_tty->print_cr("## wrong marked address on %s bitmap: "PTR_FORMAT,
+                           bitmap_name, result);
+    gclog_or_tty->print_cr("## %s tams: "PTR_FORMAT" end: "PTR_FORMAT,
+                           bitmap_name, tams, end);
+    return false;
+  }
+  return true;
+}
+
+bool G1CollectedHeap::verify_bitmaps(const char* caller, HeapRegion* hr) {
+  CMBitMapRO* prev_bitmap = concurrent_mark()->prevMarkBitMap();
+  CMBitMapRO* next_bitmap = (CMBitMapRO*) concurrent_mark()->nextMarkBitMap();
+
+  HeapWord* bottom = hr->bottom();
+  HeapWord* ptams  = hr->prev_top_at_mark_start();
+  HeapWord* ntams  = hr->next_top_at_mark_start();
+  HeapWord* end    = hr->end();
+
+  bool res_p = verify_no_bits_over_tams("prev", prev_bitmap, ptams, end);
+
+  bool res_n = true;
+  // We reset mark_in_progress() before we reset _cmThread->in_progress() and in this window
+  // we do the clearing of the next bitmap concurrently. Thus, we can not verify the bitmap
+  // if we happen to be in that state.
+  if (mark_in_progress() || !_cmThread->in_progress()) {
+    res_n = verify_no_bits_over_tams("next", next_bitmap, ntams, end);
+  }
+  if (!res_p || !res_n) {
+    gclog_or_tty->print_cr("#### Bitmap verification failed for "HR_FORMAT,
+                           HR_FORMAT_PARAMS(hr));
+    gclog_or_tty->print_cr("#### Caller: %s", caller);
+    return false;
+  }
+  return true;
+}
+
+void G1CollectedHeap::check_bitmaps(const char* caller, HeapRegion* hr) {
+  if (!G1VerifyBitmaps) return;
+
+  guarantee(verify_bitmaps(caller, hr), "bitmap verification");
+}
+
+class G1VerifyBitmapClosure : public HeapRegionClosure {
+private:
+  const char* _caller;
+  G1CollectedHeap* _g1h;
+  bool _failures;
+
+public:
+  G1VerifyBitmapClosure(const char* caller, G1CollectedHeap* g1h) :
+    _caller(caller), _g1h(g1h), _failures(false) { }
+
+  bool failures() { return _failures; }
+
+  virtual bool doHeapRegion(HeapRegion* hr) {
+    if (hr->continuesHumongous()) return false;
+
+    bool result = _g1h->verify_bitmaps(_caller, hr);
+    if (!result) {
+      _failures = true;
+    }
+    return false;
+  }
+};
+
+void G1CollectedHeap::check_bitmaps(const char* caller) {
+  if (!G1VerifyBitmaps) return;
+
+  G1VerifyBitmapClosure cl(caller, this);
+  heap_region_iterate(&cl);
+  guarantee(!cl.failures(), "bitmap verification");
+}
+#endif // PRODUCT
 
 void G1CollectedHeap::cleanUpCardTable() {
   G1SATBCardTableModRefBS* ct_bs = g1_barrier_set();
@@ -6028,6 +6546,154 @@
   policy->phase_times()->record_non_young_free_cset_time_ms(non_young_time_ms);
 }
 
+class G1FreeHumongousRegionClosure : public HeapRegionClosure {
+ private:
+  FreeRegionList* _free_region_list;
+  HeapRegionSet* _proxy_set;
+  HeapRegionSetCount _humongous_regions_removed;
+  size_t _freed_bytes;
+ public:
+
+  G1FreeHumongousRegionClosure(FreeRegionList* free_region_list) :
+    _free_region_list(free_region_list), _humongous_regions_removed(), _freed_bytes(0) {
+  }
+
+  virtual bool doHeapRegion(HeapRegion* r) {
+    if (!r->startsHumongous()) {
+      return false;
+    }
+
+    G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+    oop obj = (oop)r->bottom();
+    CMBitMap* next_bitmap = g1h->concurrent_mark()->nextMarkBitMap();
+
+    // The following checks whether the humongous object is live are sufficient.
+    // The main additional check (in addition to having a reference from the roots
+    // or the young gen) is whether the humongous object has a remembered set entry.
+    //
+    // A humongous object cannot be live if there is no remembered set for it
+    // because:
+    // - there can be no references from within humongous starts regions referencing
+    // the object because we never allocate other objects into them.
+    // (I.e. there are no intra-region references that may be missed by the
+    // remembered set)
+    // - as soon there is a remembered set entry to the humongous starts region
+    // (i.e. it has "escaped" to an old object) this remembered set entry will stay
+    // until the end of a concurrent mark.
+    //
+    // It is not required to check whether the object has been found dead by marking
+    // or not, in fact it would prevent reclamation within a concurrent cycle, as
+    // all objects allocated during that time are considered live.
+    // SATB marking is even more conservative than the remembered set.
+    // So if at this point in the collection there is no remembered set entry,
+    // nobody has a reference to it.
+    // At the start of collection we flush all refinement logs, and remembered sets
+    // are completely up-to-date wrt to references to the humongous object.
+    //
+    // Other implementation considerations:
+    // - never consider object arrays: while they are a valid target, they have not
+    // been observed to be used as temporary objects.
+    // - they would also pose considerable effort for cleaning up the the remembered
+    // sets.
+    // While this cleanup is not strictly necessary to be done (or done instantly),
+    // given that their occurrence is very low, this saves us this additional
+    // complexity.
+    uint region_idx = r->hrs_index();
+    if (g1h->humongous_is_live(region_idx) ||
+        g1h->humongous_region_is_always_live(region_idx)) {
+
+      if (G1TraceReclaimDeadHumongousObjectsAtYoungGC) {
+        gclog_or_tty->print_cr("Live humongous %d region %d with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d live-other %d obj array %d",
+                               r->isHumongous(),
+                               region_idx,
+                               r->rem_set()->occupied(),
+                               r->rem_set()->strong_code_roots_list_length(),
+                               next_bitmap->isMarked(r->bottom()),
+                               g1h->humongous_is_live(region_idx),
+                               obj->is_objArray()
+                              );
+      }
+
+      return false;
+    }
+
+    guarantee(!obj->is_objArray(),
+              err_msg("Eagerly reclaiming object arrays is not supported, but the object "PTR_FORMAT" is.",
+                      r->bottom()));
+
+    if (G1TraceReclaimDeadHumongousObjectsAtYoungGC) {
+      gclog_or_tty->print_cr("Reclaim humongous region %d start "PTR_FORMAT" region %d length "UINT32_FORMAT" with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d live-other %d obj array %d",
+                             r->isHumongous(),
+                             r->bottom(),
+                             region_idx,
+                             r->region_num(),
+                             r->rem_set()->occupied(),
+                             r->rem_set()->strong_code_roots_list_length(),
+                             next_bitmap->isMarked(r->bottom()),
+                             g1h->humongous_is_live(region_idx),
+                             obj->is_objArray()
+                            );
+    }
+    // Need to clear mark bit of the humongous object if already set.
+    if (next_bitmap->isMarked(r->bottom())) {
+      next_bitmap->clear(r->bottom());
+    }
+    _freed_bytes += r->used();
+    r->set_containing_set(NULL);
+    _humongous_regions_removed.increment(1u, r->capacity());
+    g1h->free_humongous_region(r, _free_region_list, false);
+
+    return false;
+  }
+
+  HeapRegionSetCount& humongous_free_count() {
+    return _humongous_regions_removed;
+  }
+
+  size_t bytes_freed() const {
+    return _freed_bytes;
+  }
+
+  size_t humongous_reclaimed() const {
+    return _humongous_regions_removed.length();
+  }
+};
+
+void G1CollectedHeap::eagerly_reclaim_humongous_regions() {
+  assert_at_safepoint(true);
+
+  if (!G1ReclaimDeadHumongousObjectsAtYoungGC || !_has_humongous_reclaim_candidates) {
+    g1_policy()->phase_times()->record_fast_reclaim_humongous_time_ms(0.0, 0);
+    return;
+  }
+
+  double start_time = os::elapsedTime();
+
+  FreeRegionList local_cleanup_list("Local Humongous Cleanup List");
+
+  G1FreeHumongousRegionClosure cl(&local_cleanup_list);
+  heap_region_iterate(&cl);
+
+  HeapRegionSetCount empty_set;
+  remove_from_old_sets(empty_set, cl.humongous_free_count());
+
+  G1HRPrinter* hr_printer = _g1h->hr_printer();
+  if (hr_printer->is_active()) {
+    FreeRegionListIterator iter(&local_cleanup_list);
+    while (iter.more_available()) {
+      HeapRegion* hr = iter.get_next();
+      hr_printer->cleanup(hr);
+    }
+  }
+
+  prepend_to_freelist(&local_cleanup_list);
+  decrement_summary_bytes(cl.bytes_freed());
+
+  g1_policy()->phase_times()->record_fast_reclaim_humongous_time_ms((os::elapsedTime() - start_time) * 1000.0,
+                                                                    cl.humongous_reclaimed());
+}
+
 // This routine is similar to the above but does not record
 // any policy statistics or update free lists; we are abandoning
 // the current incremental collection set in preparation of a
@@ -6268,6 +6934,7 @@
     if (new_alloc_region != NULL) {
       set_region_short_lived_locked(new_alloc_region);
       _hr_printer.alloc(new_alloc_region, G1HRPrinter::Eden, young_list_full);
+      check_bitmaps("Mutator Region Allocation", new_alloc_region);
       return new_alloc_region;
     }
   }
@@ -6330,12 +6997,14 @@
       // We really only need to do this for old regions given that we
       // should never scan survivors. But it doesn't hurt to do it
       // for survivors too.
-      new_alloc_region->set_saved_mark();
+      new_alloc_region->record_top_and_timestamp();
       if (survivor) {
         new_alloc_region->set_survivor();
         _hr_printer.alloc(new_alloc_region, G1HRPrinter::Survivor);
+        check_bitmaps("Survivor Region Allocation", new_alloc_region);
       } else {
         _hr_printer.alloc(new_alloc_region, G1HRPrinter::Old);
+        check_bitmaps("Old Region Allocation", new_alloc_region);
       }
       bool during_im = g1_policy()->during_initial_mark_pause();
       new_alloc_region->note_start_of_copying(during_im);
@@ -6592,106 +7261,6 @@
   g1_policy()->phase_times()->record_strong_code_root_purge_time(purge_time_ms);
 }
 
-// Mark all the code roots that point into regions *not* in the
-// collection set.
-//
-// Note we do not want to use a "marking" CodeBlobToOopClosure while
-// walking the the code roots lists of regions not in the collection
-// set. Suppose we have an nmethod (M) that points to objects in two
-// separate regions - one in the collection set (R1) and one not (R2).
-// Using a "marking" CodeBlobToOopClosure here would result in "marking"
-// nmethod M when walking the code roots for R1. When we come to scan
-// the code roots for R2, we would see that M is already marked and it
-// would be skipped and the objects in R2 that are referenced from M
-// would not be evacuated.
-
-class MarkStrongCodeRootCodeBlobClosure: public CodeBlobClosure {
-
-  class MarkStrongCodeRootOopClosure: public OopClosure {
-    ConcurrentMark* _cm;
-    HeapRegion* _hr;
-    uint _worker_id;
-
-    template <class T> void do_oop_work(T* p) {
-      T heap_oop = oopDesc::load_heap_oop(p);
-      if (!oopDesc::is_null(heap_oop)) {
-        oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-        // Only mark objects in the region (which is assumed
-        // to be not in the collection set).
-        if (_hr->is_in(obj)) {
-          _cm->grayRoot(obj, (size_t) obj->size(), _worker_id);
-        }
-      }
-    }
-
-  public:
-    MarkStrongCodeRootOopClosure(ConcurrentMark* cm, HeapRegion* hr, uint worker_id) :
-      _cm(cm), _hr(hr), _worker_id(worker_id) {
-      assert(!_hr->in_collection_set(), "sanity");
-    }
-
-    void do_oop(narrowOop* p) { do_oop_work(p); }
-    void do_oop(oop* p)       { do_oop_work(p); }
-  };
-
-  MarkStrongCodeRootOopClosure _oop_cl;
-
-public:
-  MarkStrongCodeRootCodeBlobClosure(ConcurrentMark* cm, HeapRegion* hr, uint worker_id):
-    _oop_cl(cm, hr, worker_id) {}
-
-  void do_code_blob(CodeBlob* cb) {
-    nmethod* nm = (cb == NULL) ? NULL : cb->as_nmethod_or_null();
-    if (nm != NULL) {
-      nm->oops_do(&_oop_cl);
-    }
-  }
-};
-
-class MarkStrongCodeRootsHRClosure: public HeapRegionClosure {
-  G1CollectedHeap* _g1h;
-  uint _worker_id;
-
-public:
-  MarkStrongCodeRootsHRClosure(G1CollectedHeap* g1h, uint worker_id) :
-    _g1h(g1h), _worker_id(worker_id) {}
-
-  bool doHeapRegion(HeapRegion *hr) {
-    HeapRegionRemSet* hrrs = hr->rem_set();
-    if (hr->continuesHumongous()) {
-      // Code roots should never be attached to a continuation of a humongous region
-      assert(hrrs->strong_code_roots_list_length() == 0,
-             err_msg("code roots should never be attached to continuations of humongous region "HR_FORMAT
-                     " starting at "HR_FORMAT", but has "SIZE_FORMAT,
-                     HR_FORMAT_PARAMS(hr), HR_FORMAT_PARAMS(hr->humongous_start_region()),
-                     hrrs->strong_code_roots_list_length()));
-      return false;
-    }
-
-    if (hr->in_collection_set()) {
-      // Don't mark code roots into regions in the collection set here.
-      // They will be marked when we scan them.
-      return false;
-    }
-
-    MarkStrongCodeRootCodeBlobClosure cb_cl(_g1h->concurrent_mark(), hr, _worker_id);
-    hr->strong_code_roots_do(&cb_cl);
-    return false;
-  }
-};
-
-void G1CollectedHeap::mark_strong_code_roots(uint worker_id) {
-  MarkStrongCodeRootsHRClosure cl(this, worker_id);
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    heap_region_par_iterate_chunked(&cl,
-                                    worker_id,
-                                    workers()->active_workers(),
-                                    HeapRegion::ParMarkRootClaimValue);
-  } else {
-    heap_region_iterate(&cl);
-  }
-}
-
 class RebuildStrongCodeRootClosure: public CodeBlobClosure {
   G1CollectedHeap* _g1h;
 
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -197,19 +197,10 @@
   bool do_object_b(oop p);
 };
 
-// Instances of this class are used for quick tests on whether a reference points
-// into the collection set. Each of the array's elements denotes whether the
-// corresponding region is in the collection set.
-class G1FastCSetBiasedMappedArray : public G1BiasedMappedArray<bool> {
- protected:
-  bool default_value() const { return false; }
- public:
-  void clear() { G1BiasedMappedArray<bool>::clear(); }
-};
-
 class RefineCardTableEntryClosure;
 
 class G1CollectedHeap : public SharedHeap {
+  friend class VM_CollectForMetadataAllocation;
   friend class VM_G1CollectForAllocation;
   friend class VM_G1CollectFull;
   friend class VM_G1IncCollectionPause;
@@ -219,7 +210,7 @@
   friend class OldGCAllocRegion;
 
   // Closures used in implementation.
-  template <G1Barrier barrier, bool do_mark_object>
+  template <G1Barrier barrier, G1Mark do_mark_object>
   friend class G1ParCopyClosure;
   friend class G1IsAliveClosure;
   friend class G1EvacuateFollowersClosure;
@@ -236,6 +227,7 @@
   friend class EvacPopObjClosure;
   friend class G1ParCleanupCTTask;
 
+  friend class G1FreeHumongousRegionClosure;
   // Other related classes.
   friend class G1MarkSweep;
 
@@ -266,6 +258,9 @@
   // It keeps track of the humongous regions.
   HeapRegionSet _humongous_set;
 
+  void clear_humongous_is_live_table();
+  void eagerly_reclaim_humongous_regions();
+
   // The number of regions we could create by expansion.
   uint _expansion_regions;
 
@@ -346,6 +341,9 @@
   // It initializes the GC alloc regions at the start of a GC.
   void init_gc_alloc_regions(EvacuationInfo& evacuation_info);
 
+  // Setup the retained old gc alloc region as the currrent old gc alloc region.
+  void use_retained_old_gc_alloc_region(EvacuationInfo& evacuation_info);
+
   // It releases the GC alloc regions at the end of a GC.
   void release_gc_alloc_regions(uint no_of_gc_workers, EvacuationInfo& evacuation_info);
 
@@ -363,10 +361,25 @@
   // than the current allocation region.
   size_t _summary_bytes_used;
 
-  // This array is used for a quick test on whether a reference points into
-  // the collection set or not. Each of the array's elements denotes whether the
-  // corresponding region is in the collection set or not.
-  G1FastCSetBiasedMappedArray _in_cset_fast_test;
+  // Records whether the region at the given index is kept live by roots or
+  // references from the young generation.
+  class HumongousIsLiveBiasedMappedArray : public G1BiasedMappedArray<bool> {
+   protected:
+    bool default_value() const { return false; }
+   public:
+    void clear() { G1BiasedMappedArray<bool>::clear(); }
+    void set_live(uint region) {
+      set_by_index(region, true);
+    }
+    bool is_live(uint region) {
+      return get_by_index(region);
+    }
+  };
+
+  HumongousIsLiveBiasedMappedArray _humongous_is_live;
+  // Stores whether during humongous object registration we found candidate regions.
+  // If not, we can skip a few steps.
+  bool _has_humongous_reclaim_candidates;
 
   volatile unsigned _gc_time_stamp;
 
@@ -686,10 +699,24 @@
   virtual void gc_prologue(bool full);
   virtual void gc_epilogue(bool full);
 
+  inline void set_humongous_is_live(oop obj);
+
+  bool humongous_is_live(uint region) {
+    return _humongous_is_live.is_live(region);
+  }
+
+  // Returns whether the given region (which must be a humongous (start) region)
+  // is to be considered conservatively live regardless of any other conditions.
+  bool humongous_region_is_always_live(uint index);
+  // Register the given region to be part of the collection set.
+  inline void register_humongous_region_with_in_cset_fast_test(uint index);
+  // Register regions with humongous objects (actually on the start region) in
+  // the in_cset_fast_test table.
+  void register_humongous_regions_with_in_cset_fast_test();
   // We register a region with the fast "in collection set" test. We
   // simply set to true the array slot corresponding to this region.
   void register_region_with_in_cset_fast_test(HeapRegion* r) {
-    _in_cset_fast_test.set_by_index(r->hrs_index(), true);
+    _in_cset_fast_test.set_in_cset(r->hrs_index());
   }
 
   // This is a fast test on whether a reference points into the
@@ -827,17 +854,13 @@
   // param is for use with parallel roots processing, and should be
   // the "i" of the calling parallel worker thread's work(i) function.
   // In the sequential case this param will be ignored.
-  void g1_process_strong_roots(bool is_scavenging,
-                               ScanningOption so,
-                               OopClosure* scan_non_heap_roots,
-                               OopsInHeapRegionClosure* scan_rs,
-                               G1KlassScanClosure* scan_klasses,
-                               uint worker_i);
-
-  // Apply "blk" to all the weak roots of the system.  These include
-  // JNI weak roots, the code cache, system dictionary, symbol table,
-  // string table, and referents of reachable weak refs.
-  void g1_process_weak_roots(OopClosure* root_closure);
+  void g1_process_roots(OopClosure* scan_non_heap_roots,
+                        OopClosure* scan_non_heap_weak_roots,
+                        OopsInHeapRegionClosure* scan_rs,
+                        CLDClosure* scan_strong_clds,
+                        CLDClosure* scan_weak_clds,
+                        CodeBlobClosure* scan_strong_code,
+                        uint worker_i);
 
   // Notifies all the necessary spaces that the committed space has
   // been updated (either expanded or shrunk). It should be called
@@ -1030,7 +1053,7 @@
   // of G1CollectedHeap::_gc_time_stamp.
   unsigned int* _worker_cset_start_region_time_stamp;
 
-  enum G1H_process_strong_roots_tasks {
+  enum G1H_process_roots_tasks {
     G1H_PS_filter_satb_buffers,
     G1H_PS_refProcessor_oops_do,
     // Leave this one last.
@@ -1167,19 +1190,19 @@
   }
 
   // The total number of regions in the heap.
-  uint n_regions() { return _hrs.length(); }
+  uint n_regions() const { return _hrs.length(); }
 
   // The max number of regions in the heap.
-  uint max_regions() { return _hrs.max_length(); }
+  uint max_regions() const { return _hrs.max_length(); }
 
   // The number of regions that are completely free.
-  uint free_regions() { return _free_list.length(); }
+  uint free_regions() const { return _free_list.length(); }
 
   // The number of regions that are not completely free.
-  uint used_regions() { return n_regions() - free_regions(); }
+  uint used_regions() const { return n_regions() - free_regions(); }
 
   // The number of regions available for "regular" expansion.
-  uint expansion_regions() { return _expansion_regions; }
+  uint expansion_regions() const { return _expansion_regions; }
 
   // Factory method for HeapRegion instances. It will return NULL if
   // the allocation fails.
@@ -1190,6 +1213,30 @@
   void verify_dirty_young_list(HeapRegion* head) PRODUCT_RETURN;
   void verify_dirty_young_regions() PRODUCT_RETURN;
 
+#ifndef PRODUCT
+  // Make sure that the given bitmap has no marked objects in the
+  // range [from,limit). If it does, print an error message and return
+  // false. Otherwise, just return true. bitmap_name should be "prev"
+  // or "next".
+  bool verify_no_bits_over_tams(const char* bitmap_name, CMBitMapRO* bitmap,
+                                HeapWord* from, HeapWord* limit);
+
+  // Verify that the prev / next bitmap range [tams,end) for the given
+  // region has no marks. Return true if all is well, false if errors
+  // are detected.
+  bool verify_bitmaps(const char* caller, HeapRegion* hr);
+#endif // PRODUCT
+
+  // If G1VerifyBitmaps is set, verify that the marking bitmaps for
+  // the given region do not have any spurious marks. If errors are
+  // detected, print appropriate error messages and crash.
+  void check_bitmaps(const char* caller, HeapRegion* hr) PRODUCT_RETURN;
+
+  // If G1VerifyBitmaps is set, verify that the marking bitmaps do not
+  // have any spurious marks. If errors are detected, print
+  // appropriate error messages and crash.
+  void check_bitmaps(const char* caller) PRODUCT_RETURN;
+
   // verify_region_sets() performs verification over the region
   // lists. It will be compiled in the product code to be used when
   // necessary (i.e., during heap verification).
@@ -1268,9 +1315,61 @@
   virtual bool is_in(const void* p) const;
 
   // Return "TRUE" iff the given object address is within the collection
-  // set.
+  // set. Slow implementation.
   inline bool obj_in_cs(oop obj);
 
+  inline bool is_in_cset(oop obj);
+
+  inline bool is_in_cset_or_humongous(const oop obj);
+
+  enum in_cset_state_t {
+   InNeither,           // neither in collection set nor humongous
+   InCSet,              // region is in collection set only
+   IsHumongous          // region is a humongous start region
+  };
+ private:
+  // Instances of this class are used for quick tests on whether a reference points
+  // into the collection set or is a humongous object (points into a humongous
+  // object).
+  // Each of the array's elements denotes whether the corresponding region is in
+  // the collection set or a humongous region.
+  // We use this to quickly reclaim humongous objects: by making a humongous region
+  // succeed this test, we sort-of add it to the collection set. During the reference
+  // iteration closures, when we see a humongous region, we simply mark it as
+  // referenced, i.e. live.
+  class G1FastCSetBiasedMappedArray : public G1BiasedMappedArray<char> {
+   protected:
+    char default_value() const { return G1CollectedHeap::InNeither; }
+   public:
+    void set_humongous(uintptr_t index) {
+      assert(get_by_index(index) != InCSet, "Should not overwrite InCSet values");
+      set_by_index(index, G1CollectedHeap::IsHumongous);
+    }
+
+    void clear_humongous(uintptr_t index) {
+      set_by_index(index, G1CollectedHeap::InNeither);
+    }
+
+    void set_in_cset(uintptr_t index) {
+      assert(get_by_index(index) != G1CollectedHeap::IsHumongous, "Should not overwrite IsHumongous value");
+      set_by_index(index, G1CollectedHeap::InCSet);
+    }
+
+    bool is_in_cset_or_humongous(HeapWord* addr) const { return get_by_address(addr) != G1CollectedHeap::InNeither; }
+    bool is_in_cset(HeapWord* addr) const { return get_by_address(addr) == G1CollectedHeap::InCSet; }
+    G1CollectedHeap::in_cset_state_t at(HeapWord* addr) const { return (G1CollectedHeap::in_cset_state_t)get_by_address(addr); }
+    void clear() { G1BiasedMappedArray<char>::clear(); }
+  };
+
+  // This array is used for a quick test on whether a reference points into
+  // the collection set or not. Each of the array's elements denotes whether the
+  // corresponding region is in the collection set or not.
+  G1FastCSetBiasedMappedArray _in_cset_fast_test;
+
+ public:
+
+  inline in_cset_state_t in_cset_state(const oop obj);
+
   // Return "TRUE" iff the given object address is in the reserved
   // region of g1.
   bool is_in_g1_reserved(const void* p) const {
@@ -1305,9 +1404,6 @@
   // "cl.do_oop" on each.
   virtual void oop_iterate(ExtendedOopClosure* cl);
 
-  // Same as above, restricted to a memory region.
-  void oop_iterate(MemRegion mr, ExtendedOopClosure* cl);
-
   // Iterate over all objects, calling "cl.do_object" on each.
   virtual void object_iterate(ObjectClosure* cl);
 
@@ -1325,6 +1421,10 @@
   // Return the region with the given index. It assumes the index is valid.
   inline HeapRegion* region_at(uint index) const;
 
+  // Calculate the region index of the given address. Given address must be
+  // within the heap.
+  inline uint addr_to_region(HeapWord* addr) const;
+
   // Divide the heap region sequence into "chunks" of some size (the number
   // of regions divided by the number of parallel threads times some
   // overpartition factor, currently 4).  Assumes that this will be called
@@ -1377,8 +1477,7 @@
   // As above but starting from region r
   void collection_set_iterate_from(HeapRegion* r, HeapRegionClosure *blk);
 
-  // Returns the first (lowest address) compactible space in the heap.
-  virtual CompactibleSpace* first_compactible_space();
+  HeapRegion* next_compaction_region(const HeapRegion* from) const;
 
   // A CollectedHeap will contain some number of spaces.  This finds the
   // space containing a given address, or else returns NULL.
@@ -1601,10 +1700,6 @@
   // Free up superfluous code root memory.
   void purge_code_root_memory();
 
-  // During an initial mark pause, mark all the code roots that
-  // point into regions *not* in the collection set.
-  void mark_strong_code_roots(uint worker_id);
-
   // Rebuild the stong code root lists for each region
   // after a full GC
   void rebuild_strong_code_roots();
@@ -1613,6 +1708,9 @@
   // in symbol table, possibly in parallel.
   void unlink_string_and_symbol_table(BoolObjectClosure* is_alive, bool unlink_strings = true, bool unlink_symbols = true);
 
+  // Parallel phase of unloading/cleaning after G1 concurrent mark.
+  void parallel_cleaning(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols, bool class_unloading_occurred);
+
   // Redirty logged cards in the refinement queue.
   void redirty_logged_cards();
   // Verification
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -40,6 +40,13 @@
 // Return the region with the given index. It assumes the index is valid.
 inline HeapRegion* G1CollectedHeap::region_at(uint index) const { return _hrs.at(index); }
 
+inline uint G1CollectedHeap::addr_to_region(HeapWord* addr) const {
+  assert(is_in_reserved(addr),
+         err_msg("Cannot calculate region index for address "PTR_FORMAT" that is outside of the heap ["PTR_FORMAT", "PTR_FORMAT")",
+                 p2i(addr), p2i(_reserved.start()), p2i(_reserved.end())));
+  return (uint)(pointer_delta(addr, _reserved.start(), sizeof(uint8_t)) >> HeapRegion::LogOfHRGrainBytes);
+}
+
 template <class T>
 inline HeapRegion*
 G1CollectedHeap::heap_region_containing(const T addr) const {
@@ -172,12 +179,11 @@
   return _cm->nextMarkBitMap()->isMarked((HeapWord *)obj);
 }
 
-
 // This is a fast test on whether a reference points into the
 // collection set or not. Assume that the reference
 // points into the heap.
-inline bool G1CollectedHeap::in_cset_fast_test(oop obj) {
-  bool ret = _in_cset_fast_test.get_by_address((HeapWord*)obj);
+inline bool G1CollectedHeap::is_in_cset(oop obj) {
+  bool ret = _in_cset_fast_test.is_in_cset((HeapWord*)obj);
   // let's make sure the result is consistent with what the slower
   // test returns
   assert( ret || !obj_in_cs(obj), "sanity");
@@ -185,6 +191,18 @@
   return ret;
 }
 
+bool G1CollectedHeap::is_in_cset_or_humongous(const oop obj) {
+  return _in_cset_fast_test.is_in_cset_or_humongous((HeapWord*)obj);
+}
+
+G1CollectedHeap::in_cset_state_t G1CollectedHeap::in_cset_state(const oop obj) {
+  return _in_cset_fast_test.at((HeapWord*)obj);
+}
+
+void G1CollectedHeap::register_humongous_region_with_in_cset_fast_test(uint index) {
+  _in_cset_fast_test.set_humongous(index);
+}
+
 #ifndef PRODUCT
 // Support for G1EvacuationFailureALot
 
@@ -290,4 +308,22 @@
   else return is_obj_ill(obj, hr);
 }
 
+inline void G1CollectedHeap::set_humongous_is_live(oop obj) {
+  uint region = addr_to_region((HeapWord*)obj);
+  // We not only set the "live" flag in the humongous_is_live table, but also
+  // reset the entry in the _in_cset_fast_test table so that subsequent references
+  // to the same humongous object do not go into the slow path again.
+  // This is racy, as multiple threads may at the same time enter here, but this
+  // is benign.
+  // During collection we only ever set the "live" flag, and only ever clear the
+  // entry in the in_cset_fast_table.
+  // We only ever evaluate the contents of these tables (in the VM thread) after
+  // having synchronized the worker threads with the VM thread, or in the same
+  // thread (i.e. within the VM thread).
+  if (!_humongous_is_live.is_live(region)) {
+    _humongous_is_live.set_live(region);
+    _in_cset_fast_test.clear_humongous(region);
+  }
+}
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1COLLECTEDHEAP_INLINE_HPP
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1046,7 +1046,7 @@
 
   bool new_in_marking_window = _in_marking_window;
   bool new_in_marking_window_im = false;
-  if (during_initial_mark_pause()) {
+  if (last_pause_included_initial_mark) {
     new_in_marking_window = true;
     new_in_marking_window_im = true;
   }
--- a/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -71,6 +71,9 @@
   bool _during_initial_mark;
   bool _during_conc_mark;
   uint _worker_id;
+  HeapWord* _end_of_last_gap;
+  HeapWord* _last_gap_threshold;
+  HeapWord* _last_obj_threshold;
 
 public:
   RemoveSelfForwardPtrObjClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
@@ -83,7 +86,10 @@
     _update_rset_cl(update_rset_cl),
     _during_initial_mark(during_initial_mark),
     _during_conc_mark(during_conc_mark),
-    _worker_id(worker_id) { }
+    _worker_id(worker_id),
+    _end_of_last_gap(hr->bottom()),
+    _last_gap_threshold(hr->bottom()),
+    _last_obj_threshold(hr->bottom()) { }
 
   size_t marked_bytes() { return _marked_bytes; }
 
@@ -107,7 +113,12 @@
     HeapWord* obj_addr = (HeapWord*) obj;
     assert(_hr->is_in(obj_addr), "sanity");
     size_t obj_size = obj->size();
-    _hr->update_bot_for_object(obj_addr, obj_size);
+    HeapWord* obj_end = obj_addr + obj_size;
+
+    if (_end_of_last_gap != obj_addr) {
+      // there was a gap before obj_addr
+      _last_gap_threshold = _hr->cross_threshold(_end_of_last_gap, obj_addr);
+    }
 
     if (obj->is_forwarded() && obj->forwardee() == obj) {
       // The object failed to move.
@@ -115,7 +126,9 @@
       // We consider all objects that we find self-forwarded to be
       // live. What we'll do is that we'll update the prev marking
       // info so that they are all under PTAMS and explicitly marked.
-      _cm->markPrev(obj);
+      if (!_cm->isPrevMarked(obj)) {
+        _cm->markPrev(obj);
+      }
       if (_during_initial_mark) {
         // For the next marking info we'll only mark the
         // self-forwarded objects explicitly if we are during
@@ -145,13 +158,18 @@
       // remembered set entries missing given that we skipped cards on
       // the collection set. So, we'll recreate such entries now.
       obj->oop_iterate(_update_rset_cl);
-      assert(_cm->isPrevMarked(obj), "Should be marked!");
     } else {
+
       // The object has been either evacuated or is dead. Fill it with a
       // dummy object.
-      MemRegion mr((HeapWord*) obj, obj_size);
+      MemRegion mr(obj_addr, obj_size);
       CollectedHeap::fill_with_object(mr);
+
+      // must nuke all dead objects which we skipped when iterating over the region
+      _cm->clearRangePrevBitmap(MemRegion(_end_of_last_gap, obj_end));
     }
+    _end_of_last_gap = obj_end;
+    _last_obj_threshold = _hr->cross_threshold(obj_addr, obj_end);
   }
 };
 
@@ -182,15 +200,9 @@
                                             during_conc_mark,
                                             _worker_id);
 
-        MemRegion mr(hr->bottom(), hr->end());
-        // We'll recreate the prev marking info so we'll first clear
-        // the prev bitmap range for this region. We never mark any
-        // CSet objects explicitly so the next bitmap range should be
-        // cleared anyway.
-        _cm->clearRangePrevBitmap(mr);
-
         hr->note_self_forwarding_removal_start(during_initial_mark,
                                                during_conc_mark);
+        _g1h->check_bitmaps("Self-Forwarding Ptr Removal", hr);
 
         // In the common case (i.e. when there is no evacuation
         // failure) we make sure that the following is done when
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -166,7 +166,6 @@
   _last_update_rs_processed_buffers(_max_gc_threads, "%d"),
   _last_scan_rs_times_ms(_max_gc_threads, "%.1lf"),
   _last_strong_code_root_scan_times_ms(_max_gc_threads, "%.1lf"),
-  _last_strong_code_root_mark_times_ms(_max_gc_threads, "%.1lf"),
   _last_obj_copy_times_ms(_max_gc_threads, "%.1lf"),
   _last_termination_times_ms(_max_gc_threads, "%.1lf"),
   _last_termination_attempts(_max_gc_threads, SIZE_FORMAT),
@@ -193,7 +192,6 @@
   _last_update_rs_processed_buffers.reset();
   _last_scan_rs_times_ms.reset();
   _last_strong_code_root_scan_times_ms.reset();
-  _last_strong_code_root_mark_times_ms.reset();
   _last_obj_copy_times_ms.reset();
   _last_termination_times_ms.reset();
   _last_termination_attempts.reset();
@@ -214,7 +212,6 @@
   _last_update_rs_processed_buffers.verify();
   _last_scan_rs_times_ms.verify();
   _last_strong_code_root_scan_times_ms.verify();
-  _last_strong_code_root_mark_times_ms.verify();
   _last_obj_copy_times_ms.verify();
   _last_termination_times_ms.verify();
   _last_termination_attempts.verify();
@@ -229,7 +226,6 @@
                                _last_update_rs_times_ms.get(i) +
                                _last_scan_rs_times_ms.get(i) +
                                _last_strong_code_root_scan_times_ms.get(i) +
-                               _last_strong_code_root_mark_times_ms.get(i) +
                                _last_obj_copy_times_ms.get(i) +
                                _last_termination_times_ms.get(i);
 
@@ -240,8 +236,10 @@
   _last_gc_worker_times_ms.verify();
   _last_gc_worker_other_times_ms.verify();
 
-  _last_redirty_logged_cards_time_ms.verify();
-  _last_redirty_logged_cards_processed_cards.verify();
+  if (G1DeferredRSUpdate) {
+    _last_redirty_logged_cards_time_ms.verify();
+    _last_redirty_logged_cards_processed_cards.verify();
+  }
 }
 
 void G1GCPhaseTimes::note_string_dedup_fixup_start() {
@@ -258,6 +256,10 @@
   LineBuffer(level).append_and_print_cr("[%s: %.1lf ms]", str, value);
 }
 
+void G1GCPhaseTimes::print_stats(int level, const char* str, size_t value) {
+  LineBuffer(level).append_and_print_cr("[%s: "SIZE_FORMAT"]", str, value);
+}
+
 void G1GCPhaseTimes::print_stats(int level, const char* str, double value, uint workers) {
   LineBuffer(level).append_and_print_cr("[%s: %.1lf ms, GC Workers: " UINT32_FORMAT "]", str, value, workers);
 }
@@ -301,9 +303,6 @@
     if (_last_satb_filtering_times_ms.sum() > 0.0) {
       _last_satb_filtering_times_ms.print(2, "SATB Filtering (ms)");
     }
-    if (_last_strong_code_root_mark_times_ms.sum() > 0.0) {
-     _last_strong_code_root_mark_times_ms.print(2, "Code Root Marking (ms)");
-    }
     _last_update_rs_times_ms.print(2, "Update RS (ms)");
       _last_update_rs_processed_buffers.print(3, "Processed Buffers");
     _last_scan_rs_times_ms.print(2, "Scan RS (ms)");
@@ -321,9 +320,6 @@
     if (_last_satb_filtering_times_ms.sum() > 0.0) {
       _last_satb_filtering_times_ms.print(1, "SATB Filtering (ms)");
     }
-    if (_last_strong_code_root_mark_times_ms.sum() > 0.0) {
-      _last_strong_code_root_mark_times_ms.print(1, "Code Root Marking (ms)");
-    }
     _last_update_rs_times_ms.print(1, "Update RS (ms)");
       _last_update_rs_processed_buffers.print(2, "Processed Buffers");
     _last_scan_rs_times_ms.print(1, "Scan RS (ms)");
@@ -366,6 +362,14 @@
       _last_redirty_logged_cards_processed_cards.print(3, "Redirtied Cards");
     }
   }
+  if (G1ReclaimDeadHumongousObjectsAtYoungGC) {
+    print_stats(2, "Humongous Reclaim", _cur_fast_reclaim_humongous_time_ms);
+    if (G1Log::finest()) {
+      print_stats(3, "Humongous Total", _cur_fast_reclaim_humongous_total);
+      print_stats(3, "Humongous Candidate", _cur_fast_reclaim_humongous_candidates);
+      print_stats(3, "Humongous Reclaimed", _cur_fast_reclaim_humongous_reclaimed);
+    }
+  }
   print_stats(2, "Free CSet",
     (_recorded_young_free_cset_time_ms +
     _recorded_non_young_free_cset_time_ms));
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -120,7 +120,6 @@
   WorkerDataArray<int>    _last_update_rs_processed_buffers;
   WorkerDataArray<double> _last_scan_rs_times_ms;
   WorkerDataArray<double> _last_strong_code_root_scan_times_ms;
-  WorkerDataArray<double> _last_strong_code_root_mark_times_ms;
   WorkerDataArray<double> _last_obj_copy_times_ms;
   WorkerDataArray<double> _last_termination_times_ms;
   WorkerDataArray<size_t> _last_termination_attempts;
@@ -158,11 +157,17 @@
   double _recorded_young_free_cset_time_ms;
   double _recorded_non_young_free_cset_time_ms;
 
+  double _cur_fast_reclaim_humongous_time_ms;
+  size_t _cur_fast_reclaim_humongous_total;
+  size_t _cur_fast_reclaim_humongous_candidates;
+  size_t _cur_fast_reclaim_humongous_reclaimed;
+
   double _cur_verify_before_time_ms;
   double _cur_verify_after_time_ms;
 
   // Helper methods for detailed logging
   void print_stats(int level, const char* str, double value);
+  void print_stats(int level, const char* str, size_t value);
   void print_stats(int level, const char* str, double value, uint workers);
 
  public:
@@ -199,10 +204,6 @@
     _last_strong_code_root_scan_times_ms.set(worker_i, ms);
   }
 
-  void record_strong_code_root_mark_time(uint worker_i, double ms) {
-    _last_strong_code_root_mark_times_ms.set(worker_i, ms);
-  }
-
   void record_obj_copy_time(uint worker_i, double ms) {
     _last_obj_copy_times_ms.set(worker_i, ms);
   }
@@ -287,6 +288,16 @@
     _recorded_non_young_free_cset_time_ms = time_ms;
   }
 
+  void record_fast_reclaim_humongous_stats(size_t total, size_t candidates) {
+    _cur_fast_reclaim_humongous_total = total;
+    _cur_fast_reclaim_humongous_candidates = candidates;
+  }
+
+  void record_fast_reclaim_humongous_time_ms(double value, size_t reclaimed) {
+    _cur_fast_reclaim_humongous_time_ms = value;
+    _cur_fast_reclaim_humongous_reclaimed = reclaimed;
+  }
+
   void record_young_cset_choice_time_ms(double time_ms) {
     _recorded_young_cset_choice_time_ms = time_ms;
   }
@@ -353,6 +364,10 @@
     return _recorded_non_young_free_cset_time_ms;
   }
 
+  double fast_reclaim_humongous_time_ms() {
+    return _cur_fast_reclaim_humongous_time_ms;
+  }
+
   double average_last_update_rs_time() {
     return _last_update_rs_times_ms.average();
   }
@@ -369,10 +384,6 @@
     return _last_strong_code_root_scan_times_ms.average();
   }
 
-  double average_last_strong_code_root_mark_time(){
-    return _last_strong_code_root_mark_times_ms.average();
-  }
-
   double average_last_obj_copy_time() {
     return _last_obj_copy_times_ms.average();
   }
--- a/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -128,15 +128,15 @@
 
   SharedHeap* sh = SharedHeap::heap();
 
-  // Need cleared claim bits for the strong roots processing
+  // Need cleared claim bits for the roots processing
   ClassLoaderDataGraph::clear_claimed_marks();
 
-  sh->process_strong_roots(true,  // activate StrongRootsScope
-                           false, // not scavenging.
-                           SharedHeap::SO_SystemClasses,
+  MarkingCodeBlobClosure follow_code_closure(&GenMarkSweep::follow_root_closure, !CodeBlobToOopClosure::FixRelocations);
+  sh->process_strong_roots(true,   // activate StrongRootsScope
+                           SharedHeap::SO_None,
                            &GenMarkSweep::follow_root_closure,
-                           &GenMarkSweep::follow_code_root_closure,
-                           &GenMarkSweep::follow_klass_closure);
+                           &GenMarkSweep::follow_cld_closure,
+                           &follow_code_closure);
 
   // Process reference objects found during marking
   ReferenceProcessor* rp = GenMarkSweep::ref_processor();
@@ -200,6 +200,23 @@
   CompactPoint _cp;
   HeapRegionSetCount _humongous_regions_removed;
 
+  bool is_cp_initialized() const {
+    return _cp.space != NULL;
+  }
+
+  void prepare_for_compaction(HeapRegion* hr, HeapWord* end) {
+    // If this is the first live region that we came across which we can compact,
+    // initialize the CompactPoint.
+    if (!is_cp_initialized()) {
+      _cp.space = hr;
+      _cp.threshold = hr->initialize_threshold();
+    }
+    hr->prepare_for_compaction(&_cp);
+    // Also clear the part of the card table that will be unused after
+    // compaction.
+    _mrbs->clear(MemRegion(hr->compaction_top(), end));
+  }
+
   void free_humongous_region(HeapRegion* hr) {
     HeapWord* end = hr->end();
     FreeRegionList dummy_free_list("Dummy Free List for G1MarkSweep");
@@ -211,18 +228,15 @@
     _humongous_regions_removed.increment(1u, hr->capacity());
 
     _g1h->free_humongous_region(hr, &dummy_free_list, false /* par */);
-    hr->prepare_for_compaction(&_cp);
-    // Also clear the part of the card table that will be unused after
-    // compaction.
-    _mrbs->clear(MemRegion(hr->compaction_top(), end));
+    prepare_for_compaction(hr, end);
     dummy_free_list.remove_all();
   }
 
 public:
-  G1PrepareCompactClosure(CompactibleSpace* cs)
+  G1PrepareCompactClosure()
   : _g1h(G1CollectedHeap::heap()),
     _mrbs(_g1h->g1_barrier_set()),
-    _cp(NULL, cs, cs->initialize_threshold()),
+    _cp(NULL),
     _humongous_regions_removed() { }
 
   void update_sets() {
@@ -245,10 +259,7 @@
         assert(hr->continuesHumongous(), "Invalid humongous.");
       }
     } else {
-      hr->prepare_for_compaction(&_cp);
-      // Also clear the part of the card table that will be unused after
-      // compaction.
-      _mrbs->clear(MemRegion(hr->compaction_top(), hr->end()));
+      prepare_for_compaction(hr, hr->end());
     }
     return false;
   }
@@ -266,14 +277,7 @@
   GCTraceTime tm("phase 2", G1Log::fine() && Verbose, true, gc_timer(), gc_tracer()->gc_id());
   GenMarkSweep::trace("2");
 
-  // find the first region
-  HeapRegion* r = g1h->region_at(0);
-  CompactibleSpace* sp = r;
-  if (r->isHumongous() && oop(r->bottom())->is_gc_marked()) {
-    sp = r->next_compaction_space();
-  }
-
-  G1PrepareCompactClosure blk(sp);
+  G1PrepareCompactClosure blk;
   g1h->heap_region_iterate(&blk);
   blk.update_sets();
 }
@@ -305,22 +309,22 @@
 
   SharedHeap* sh = SharedHeap::heap();
 
-  // Need cleared claim bits for the strong roots processing
+  // Need cleared claim bits for the roots processing
   ClassLoaderDataGraph::clear_claimed_marks();
 
-  sh->process_strong_roots(true,  // activate StrongRootsScope
-                           false, // not scavenging.
-                           SharedHeap::SO_AllClasses,
-                           &GenMarkSweep::adjust_pointer_closure,
-                           NULL,  // do not touch code cache here
-                           &GenMarkSweep::adjust_klass_closure);
+  CodeBlobToOopClosure adjust_code_closure(&GenMarkSweep::adjust_pointer_closure, CodeBlobToOopClosure::FixRelocations);
+  sh->process_all_roots(true,  // activate StrongRootsScope
+                        SharedHeap::SO_AllCodeCache,
+                        &GenMarkSweep::adjust_pointer_closure,
+                        &GenMarkSweep::adjust_cld_closure,
+                        &adjust_code_closure);
 
   assert(GenMarkSweep::ref_processor() == g1h->ref_processor_stw(), "Sanity");
   g1h->ref_processor_stw()->weak_oops_do(&GenMarkSweep::adjust_pointer_closure);
 
   // Now adjust pointers in remaining weak roots.  (All of which should
   // have been cleared if they pointed to non-surviving objects.)
-  g1h->g1_process_weak_roots(&GenMarkSweep::adjust_pointer_closure);
+  sh->process_weak_roots(&GenMarkSweep::adjust_pointer_closure);
 
   if (G1StringDedup::is_enabled()) {
     G1StringDedup::oops_do(&GenMarkSweep::adjust_pointer_closure);
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -25,6 +25,8 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_HPP
 
+#include "memory/iterator.hpp"
+
 class HeapRegion;
 class G1CollectedHeap;
 class G1RemSet;
@@ -106,7 +108,7 @@
   template <class T> void do_klass_barrier(T* p, oop new_obj);
 };
 
-template <G1Barrier barrier, bool do_mark_object>
+template <G1Barrier barrier, G1Mark do_mark_object>
 class G1ParCopyClosure : public G1ParCopyHelper {
 private:
   template <class T> void do_oop_work(T* p);
@@ -121,19 +123,19 @@
   template <class T> void do_oop_nv(T* p) { do_oop_work(p); }
   virtual void do_oop(oop* p)       { do_oop_nv(p); }
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+
+  G1CollectedHeap*      g1()  { return _g1; };
+  G1ParScanThreadState* pss() { return _par_scan_state; }
+  ReferenceProcessor*   rp()  { return _ref_processor; };
 };
 
-typedef G1ParCopyClosure<G1BarrierNone, false> G1ParScanExtRootClosure;
-typedef G1ParCopyClosure<G1BarrierKlass, false> G1ParScanMetadataClosure;
-
-
-typedef G1ParCopyClosure<G1BarrierNone, true> G1ParScanAndMarkExtRootClosure;
-typedef G1ParCopyClosure<G1BarrierKlass, true> G1ParScanAndMarkMetadataClosure;
-
+typedef G1ParCopyClosure<G1BarrierNone,  G1MarkNone>             G1ParScanExtRootClosure;
+typedef G1ParCopyClosure<G1BarrierNone,  G1MarkFromRoot>         G1ParScanAndMarkExtRootClosure;
+typedef G1ParCopyClosure<G1BarrierNone,  G1MarkPromotedFromRoot> G1ParScanAndMarkWeakExtRootClosure;
 // We use a separate closure to handle references during evacuation
 // failure processing.
 
-typedef G1ParCopyClosure<G1BarrierEvac, false> G1ParScanHeapEvacFailureClosure;
+typedef G1ParCopyClosure<G1BarrierEvac, G1MarkNone> G1ParScanHeapEvacFailureClosure;
 
 class FilterIntoCSClosure: public ExtendedOopClosure {
   G1CollectedHeap* _g1;
@@ -164,10 +166,11 @@
 };
 
 // Closure for iterating over object fields during concurrent marking
-class G1CMOopClosure : public ExtendedOopClosure {
+class G1CMOopClosure : public MetadataAwareOopClosure {
+protected:
+  ConcurrentMark*    _cm;
 private:
   G1CollectedHeap*   _g1h;
-  ConcurrentMark*    _cm;
   CMTask*            _task;
 public:
   G1CMOopClosure(G1CollectedHeap* g1h, ConcurrentMark* cm, CMTask* task);
@@ -177,7 +180,7 @@
 };
 
 // Closure to scan the root regions during concurrent marking
-class G1RootRegionScanClosure : public ExtendedOopClosure {
+class G1RootRegionScanClosure : public MetadataAwareOopClosure {
 private:
   G1CollectedHeap* _g1h;
   ConcurrentMark*  _cm;
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -32,6 +32,7 @@
 #include "gc_implementation/g1/g1RemSet.hpp"
 #include "gc_implementation/g1/g1RemSet.inline.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
+#include "memory/iterator.inline.hpp"
 #include "runtime/prefetch.inline.hpp"
 
 /*
@@ -43,7 +44,7 @@
 inline void FilterIntoCSClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
   if (!oopDesc::is_null(heap_oop) &&
-      _g1->obj_in_cs(oopDesc::decode_heap_oop_not_null(heap_oop))) {
+      _g1->is_in_cset_or_humongous(oopDesc::decode_heap_oop_not_null(heap_oop))) {
     _oc->do_oop(p);
   }
 }
@@ -66,7 +67,8 @@
 
   if (!oopDesc::is_null(heap_oop)) {
     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    if (_g1->in_cset_fast_test(obj)) {
+    G1CollectedHeap::in_cset_state_t state = _g1->in_cset_state(obj);
+    if (state == G1CollectedHeap::InCSet) {
       // We're not going to even bother checking whether the object is
       // already forwarded or not, as this usually causes an immediate
       // stall. We'll try to prefetch the object (for write, given that
@@ -85,6 +87,9 @@
 
       _par_scan_state->push_on_queue(p);
     } else {
+      if (state == G1CollectedHeap::IsHumongous) {
+        _g1->set_humongous_is_live(obj);
+      }
       _par_scan_state->update_rs(_from, p, _worker_id);
     }
   }
@@ -96,22 +101,20 @@
 
   if (!oopDesc::is_null(heap_oop)) {
     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    if (_g1->in_cset_fast_test(obj)) {
+    if (_g1->is_in_cset_or_humongous(obj)) {
       Prefetch::write(obj->mark_addr(), 0);
       Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
 
       // Place on the references queue
       _par_scan_state->push_on_queue(p);
+    } else {
+      assert(!_g1->obj_in_cs(obj), "checking");
     }
   }
 }
 
 template <class T>
 inline void G1CMOopClosure::do_oop_nv(T* p) {
-  assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
-  assert(!_g1h->is_on_master_free_list(
-                    _g1h->heap_region_containing((HeapWord*) p)), "invariant");
-
   oop obj = oopDesc::load_decode_heap_oop(p);
   if (_cm->verbose_high()) {
     gclog_or_tty->print_cr("[%u] we're looking at location "
--- a/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -288,7 +288,12 @@
 }
 
 HeapWord* G1ParScanThreadState::allocate(GCAllocPurpose purpose, size_t word_sz) {
-  HeapWord* obj = alloc_buffer(purpose)->allocate(word_sz);
+  HeapWord* obj = NULL;
+  if (purpose == GCAllocForSurvived) {
+    obj = alloc_buffer(GCAllocForSurvived)->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
+  } else {
+    obj = alloc_buffer(GCAllocForTenured)->allocate(word_sz);
+  }
   if (obj != NULL) {
     return obj;
   }
--- a/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -52,15 +52,20 @@
   // set, due to (benign) races in the claim mechanism during RSet scanning more
   // than one thread might claim the same card. So the same card may be
   // processed multiple times. So redo this check.
-  if (_g1h->in_cset_fast_test(obj)) {
+  G1CollectedHeap::in_cset_state_t in_cset_state = _g1h->in_cset_state(obj);
+  if (in_cset_state == G1CollectedHeap::InCSet) {
     oop forwardee;
     if (obj->is_forwarded()) {
       forwardee = obj->forwardee();
     } else {
       forwardee = copy_to_survivor_space(obj);
     }
-    assert(forwardee != NULL, "forwardee should not be NULL");
     oopDesc::encode_store_heap_oop(p, forwardee);
+  } else if (in_cset_state == G1CollectedHeap::IsHumongous) {
+    _g1h->set_humongous_is_live(obj);
+  } else {
+    assert(in_cset_state == G1CollectedHeap::InNeither,
+           err_msg("In_cset_state must be InNeither here, but is %d", in_cset_state));
   }
 
   assert(obj != NULL, "Must be");
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -23,7 +23,6 @@
  */
 
 #include "precompiled.hpp"
-#include "gc_implementation/g1/bufferingOopClosure.hpp"
 #include "gc_implementation/g1/concurrentG1Refine.hpp"
 #include "gc_implementation/g1/concurrentG1RefineThread.hpp"
 #include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp"
--- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
--- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -65,6 +65,17 @@
   }
 }
 
+void G1SATBCardTableModRefBS::write_ref_array_pre(oop* dst, int count, bool dest_uninitialized) {
+  if (!dest_uninitialized) {
+    write_ref_array_pre_work(dst, count);
+  }
+}
+void G1SATBCardTableModRefBS::write_ref_array_pre(narrowOop* dst, int count, bool dest_uninitialized) {
+  if (!dest_uninitialized) {
+    write_ref_array_pre_work(dst, count);
+  }
+}
+
 bool G1SATBCardTableModRefBS::mark_card_deferred(size_t card_index) {
   jbyte val = _byte_map[card_index];
   // It's already processed
--- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -86,16 +86,8 @@
   }
 
   template <class T> void write_ref_array_pre_work(T* dst, int count);
-  virtual void write_ref_array_pre(oop* dst, int count, bool dest_uninitialized) {
-    if (!dest_uninitialized) {
-      write_ref_array_pre_work(dst, count);
-    }
-  }
-  virtual void write_ref_array_pre(narrowOop* dst, int count, bool dest_uninitialized) {
-    if (!dest_uninitialized) {
-      write_ref_array_pre_work(dst, count);
-    }
-  }
+  virtual void write_ref_array_pre(oop* dst, int count, bool dest_uninitialized);
+  virtual void write_ref_array_pre(narrowOop* dst, int count, bool dest_uninitialized);
 
 /*
    Claimed and deferred bits are used together in G1 during the evacuation
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -289,6 +289,13 @@
           "The amount of code root chunks that should be kept at most "     \
           "as percentage of already allocated.")                            \
                                                                             \
+  experimental(bool, G1ReclaimDeadHumongousObjectsAtYoungGC, true,          \
+          "Try to reclaim dead large objects at every young GC.")           \
+                                                                            \
+  experimental(bool, G1TraceReclaimDeadHumongousObjectsAtYoungGC, false,    \
+          "Print some information about large object liveness "             \
+          "at every young GC.")                                             \
+                                                                            \
   experimental(uintx, G1OldCSetRegionThresholdPercent, 10,                  \
           "An upper bound for the number of old CSet regions expressed "    \
           "as a percentage of the heap size.")                              \
@@ -325,11 +332,14 @@
           "evacuation pauses")                                              \
                                                                             \
   diagnostic(bool, G1VerifyRSetsDuringFullGC, false,                        \
-             "If true, perform verification of each heap region's "         \
-             "remembered set when verifying the heap during a full GC.")    \
+          "If true, perform verification of each heap region's "            \
+          "remembered set when verifying the heap during a full GC.")       \
                                                                             \
   diagnostic(bool, G1VerifyHeapRegionCodeRoots, false,                      \
-             "Verify the code root lists attached to each heap region.")
+          "Verify the code root lists attached to each heap region.")       \
+                                                                            \
+  develop(bool, G1VerifyBitmaps, false,                                     \
+          "Verifies the consistency of the marking bitmaps")
 
 G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
 
--- a/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -30,14 +30,21 @@
 // non-virtually, using a mechanism defined in this file.  Extend these
 // macros in the obvious way to add specializations for new closures.
 
-// Forward declarations.
 enum G1Barrier {
   G1BarrierNone,
   G1BarrierEvac,
   G1BarrierKlass
 };
 
-template<G1Barrier barrier, bool do_mark_object>
+enum G1Mark {
+  G1MarkNone,
+  G1MarkFromRoot,
+  G1MarkPromotedFromRoot
+};
+
+// Forward declarations.
+
+template<G1Barrier barrier, G1Mark do_mark_object>
 class G1ParCopyClosure;
 
 class G1ParScanClosure;
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -30,6 +30,7 @@
 #include "gc_implementation/g1/heapRegion.inline.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
+#include "gc_implementation/shared/liveRange.hpp"
 #include "memory/genOopClosures.inline.hpp"
 #include "memory/iterator.hpp"
 #include "memory/space.inline.hpp"
@@ -48,7 +49,7 @@
                                  HeapRegion* hr, ExtendedOopClosure* cl,
                                  CardTableModRefBS::PrecisionStyle precision,
                                  FilterKind fk) :
-  ContiguousSpaceDCTOC(hr, cl, precision, NULL),
+  DirtyCardToOopClosure(hr, cl, precision, NULL),
   _hr(hr), _fk(fk), _g1(g1) { }
 
 FilterOutOfRegionClosure::FilterOutOfRegionClosure(HeapRegion* r,
@@ -60,7 +61,7 @@
                                HeapRegion* hr,
                                HeapWord* cur, HeapWord* top) {
   oop cur_oop = oop(cur);
-  int oop_size = cur_oop->size();
+  size_t oop_size = hr->block_size(cur);
   HeapWord* next_obj = cur + oop_size;
   while (next_obj < top) {
     // Keep filtering the remembered set.
@@ -71,25 +72,24 @@
     }
     cur = next_obj;
     cur_oop = oop(cur);
-    oop_size = cur_oop->size();
+    oop_size = hr->block_size(cur);
     next_obj = cur + oop_size;
   }
   return cur;
 }
 
-void HeapRegionDCTOC::walk_mem_region_with_cl(MemRegion mr,
-                                              HeapWord* bottom,
-                                              HeapWord* top,
-                                              ExtendedOopClosure* cl) {
+void HeapRegionDCTOC::walk_mem_region(MemRegion mr,
+                                      HeapWord* bottom,
+                                      HeapWord* top) {
   G1CollectedHeap* g1h = _g1;
-  int oop_size;
+  size_t oop_size;
   ExtendedOopClosure* cl2 = NULL;
 
-  FilterIntoCSClosure intoCSFilt(this, g1h, cl);
-  FilterOutOfRegionClosure outOfRegionFilt(_hr, cl);
+  FilterIntoCSClosure intoCSFilt(this, g1h, _cl);
+  FilterOutOfRegionClosure outOfRegionFilt(_hr, _cl);
 
   switch (_fk) {
-  case NoFilterKind:          cl2 = cl; break;
+  case NoFilterKind:          cl2 = _cl; break;
   case IntoCSFilterKind:      cl2 = &intoCSFilt; break;
   case OutOfRegionFilterKind: cl2 = &outOfRegionFilt; break;
   default:                    ShouldNotReachHere();
@@ -102,7 +102,7 @@
   if (!g1h->is_obj_dead(oop(bottom), _hr)) {
     oop_size = oop(bottom)->oop_iterate(cl2, mr);
   } else {
-    oop_size = oop(bottom)->size();
+    oop_size = _hr->block_size(bottom);
   }
 
   bottom += oop_size;
@@ -111,17 +111,17 @@
     // We replicate the loop below for several kinds of possible filters.
     switch (_fk) {
     case NoFilterKind:
-      bottom = walk_mem_region_loop(cl, g1h, _hr, bottom, top);
+      bottom = walk_mem_region_loop(_cl, g1h, _hr, bottom, top);
       break;
 
     case IntoCSFilterKind: {
-      FilterIntoCSClosure filt(this, g1h, cl);
+      FilterIntoCSClosure filt(this, g1h, _cl);
       bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top);
       break;
     }
 
     case OutOfRegionFilterKind: {
-      FilterOutOfRegionClosure filt(_hr, cl);
+      FilterOutOfRegionClosure filt(_hr, _cl);
       bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top);
       break;
     }
@@ -374,50 +374,13 @@
   // region.
   hr_clear(false /*par*/, false /*clear_space*/);
   set_top(bottom());
-  set_saved_mark();
+  record_top_and_timestamp();
 
   assert(HeapRegionRemSet::num_par_rem_sets() > 0, "Invariant.");
 }
 
 CompactibleSpace* HeapRegion::next_compaction_space() const {
-  // We're not using an iterator given that it will wrap around when
-  // it reaches the last region and this is not what we want here.
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  uint index = hrs_index() + 1;
-  while (index < g1h->n_regions()) {
-    HeapRegion* hr = g1h->region_at(index);
-    if (!hr->isHumongous()) {
-      return hr;
-    }
-    index += 1;
-  }
-  return NULL;
-}
-
-void HeapRegion::save_marks() {
-  set_saved_mark();
-}
-
-void HeapRegion::oops_in_mr_iterate(MemRegion mr, ExtendedOopClosure* cl) {
-  HeapWord* p = mr.start();
-  HeapWord* e = mr.end();
-  oop obj;
-  while (p < e) {
-    obj = oop(p);
-    p += obj->oop_iterate(cl);
-  }
-  assert(p == e, "bad memregion: doesn't end on obj boundary");
-}
-
-#define HeapRegion_OOP_SINCE_SAVE_MARKS_DEFN(OopClosureType, nv_suffix) \
-void HeapRegion::oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl) { \
-  ContiguousSpace::oop_since_save_marks_iterate##nv_suffix(cl);              \
-}
-SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(HeapRegion_OOP_SINCE_SAVE_MARKS_DEFN)
-
-
-void HeapRegion::oop_before_save_marks_iterate(ExtendedOopClosure* cl) {
-  oops_in_mr_iterate(MemRegion(bottom(), saved_mark_word()), cl);
+  return G1CollectedHeap::heap()->next_compaction_region(this);
 }
 
 void HeapRegion::note_self_forwarding_removal_start(bool during_initial_mark,
@@ -425,7 +388,6 @@
   // We always recreate the prev marking info and we'll explicitly
   // mark all objects we find to be self-forwarded on the prev
   // bitmap. So all objects need to be below PTAMS.
-  _prev_top_at_mark_start = top();
   _prev_marked_bytes = 0;
 
   if (during_initial_mark) {
@@ -449,6 +411,7 @@
   assert(0 <= marked_bytes && marked_bytes <= used(),
          err_msg("marked: "SIZE_FORMAT" used: "SIZE_FORMAT,
                  marked_bytes, used()));
+  _prev_top_at_mark_start = top();
   _prev_marked_bytes = marked_bytes;
 }
 
@@ -479,7 +442,7 @@
     if (cl->abort()) return cur;
     // The check above must occur before the operation below, since an
     // abort might invalidate the "size" operation.
-    cur += obj->size();
+    cur += block_size(cur);
   }
   return NULL;
 }
@@ -551,7 +514,7 @@
       return cur;
     }
     // Otherwise...
-    next = (cur + obj->size());
+    next = cur + block_size(cur);
   }
 
   // If we finish the above loop...We have a parseable object that
@@ -559,10 +522,9 @@
   // inside or spans the entire region.
 
   assert(obj == oop(cur), "sanity");
-  assert(cur <= start &&
-         obj->klass_or_null() != NULL &&
-         (cur + obj->size()) > start,
-         "Loop postcondition");
+  assert(cur <= start, "Loop postcondition");
+  assert(obj->klass_or_null() != NULL, "Loop postcondition");
+  assert((cur + block_size(cur)) > start, "Loop postcondition");
 
   if (!g1h->is_obj_dead(obj)) {
     obj->oop_iterate(cl, mr);
@@ -576,7 +538,7 @@
     };
 
     // Otherwise:
-    next = (cur + obj->size());
+    next = cur + block_size(cur);
 
     if (!g1h->is_obj_dead(obj)) {
       if (next < end || !obj->is_objArray()) {
@@ -931,10 +893,11 @@
   size_t object_num = 0;
   while (p < top()) {
     oop obj = oop(p);
-    size_t obj_size = obj->size();
+    size_t obj_size = block_size(p);
     object_num += 1;
 
-    if (is_humongous != g1->isHumongous(obj_size)) {
+    if (is_humongous != g1->isHumongous(obj_size) &&
+        !g1->is_obj_dead(obj, this)) { // Dead objects may have bigger block_size since they span several objects.
       gclog_or_tty->print_cr("obj "PTR_FORMAT" is of %shumongous size ("
                              SIZE_FORMAT" words) in a %shumongous region",
                              p, g1->isHumongous(obj_size) ? "" : "non-",
@@ -945,7 +908,9 @@
 
     // If it returns false, verify_for_object() will output the
     // appropriate messasge.
-    if (do_bot_verify && !_offsets.verify_for_object(p, obj_size)) {
+    if (do_bot_verify &&
+        !g1->is_obj_dead(obj, this) &&
+        !_offsets.verify_for_object(p, obj_size)) {
       *failures = true;
       return;
     }
@@ -953,7 +918,10 @@
     if (!g1->is_obj_dead_cond(obj, this, vo)) {
       if (obj->is_oop()) {
         Klass* klass = obj->klass();
-        if (!klass->is_metaspace_object()) {
+        bool is_metaspace_object = Metaspace::contains(klass) ||
+                                   (vo == VerifyOption_G1UsePrevMarking &&
+                                   ClassLoaderDataGraph::unload_list_contains(klass));
+        if (!is_metaspace_object) {
           gclog_or_tty->print_cr("klass "PTR_FORMAT" of object "PTR_FORMAT" "
                                  "not metadata", klass, (void *)obj);
           *failures = true;
@@ -1067,7 +1035,9 @@
 // away eventually.
 
 void G1OffsetTableContigSpace::clear(bool mangle_space) {
-  ContiguousSpace::clear(mangle_space);
+  set_top(bottom());
+  set_saved_mark_word(bottom());
+  CompactibleSpace::clear(mangle_space);
   _offsets.zero_bottom_entry();
   _offsets.initialize_threshold();
 }
@@ -1105,10 +1075,10 @@
   if (_gc_time_stamp < g1h->get_gc_time_stamp())
     return top();
   else
-    return ContiguousSpace::saved_mark_word();
+    return Space::saved_mark_word();
 }
 
-void G1OffsetTableContigSpace::set_saved_mark() {
+void G1OffsetTableContigSpace::record_top_and_timestamp() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   unsigned curr_gc_time_stamp = g1h->get_gc_time_stamp();
 
@@ -1120,7 +1090,7 @@
     // of region. If it does so after _gc_time_stamp = ..., then it
     // will pick up the right saved_mark_word() as the high water mark
     // of the region. Either way, the behaviour will be correct.
-    ContiguousSpace::set_saved_mark();
+    Space::set_saved_mark_word(top());
     OrderAccess::storestore();
     _gc_time_stamp = curr_gc_time_stamp;
     // No need to do another barrier to flush the writes above. If
@@ -1131,6 +1101,26 @@
   }
 }
 
+void G1OffsetTableContigSpace::safe_object_iterate(ObjectClosure* blk) {
+  object_iterate(blk);
+}
+
+void G1OffsetTableContigSpace::object_iterate(ObjectClosure* blk) {
+  HeapWord* p = bottom();
+  while (p < top()) {
+    if (block_is_obj(p)) {
+      blk->do_object(oop(p));
+    }
+    p += block_size(p);
+  }
+}
+
+#define block_is_always_obj(q) true
+void G1OffsetTableContigSpace::prepare_for_compaction(CompactPoint* cp) {
+  SCAN_AND_FORWARD(cp, top, block_is_always_obj, block_size);
+}
+#undef block_is_always_obj
+
 G1OffsetTableContigSpace::
 G1OffsetTableContigSpace(G1BlockOffsetSharedArray* sharedOffsetArray,
                          MemRegion mr) :
@@ -1140,7 +1130,8 @@
 {
   _offsets.set_space(this);
   // false ==> we'll do the clearing if there's clearing to be done.
-  ContiguousSpace::initialize(mr, false, SpaceDecorator::Mangle);
+  CompactibleSpace::initialize(mr, false, SpaceDecorator::Mangle);
+  _top = bottom();
   _offsets.zero_bottom_entry();
   _offsets.initialize_threshold();
 }
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -25,7 +25,7 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGION_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGION_HPP
 
-#include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp"
+#include "gc_implementation/g1/g1BlockOffsetTable.hpp"
 #include "gc_implementation/g1/g1_specialized_oop_closures.hpp"
 #include "gc_implementation/g1/survRateGroup.hpp"
 #include "gc_implementation/shared/ageTable.hpp"
@@ -46,8 +46,6 @@
 // The solution is to remove this method from the definition
 // of a Space.
 
-class CompactibleSpace;
-class ContiguousSpace;
 class HeapRegionRemSet;
 class HeapRegionRemSetIterator;
 class HeapRegion;
@@ -71,7 +69,7 @@
 // in the concurrent marker used by G1 to filter remembered
 // sets.
 
-class HeapRegionDCTOC : public ContiguousSpaceDCTOC {
+class HeapRegionDCTOC : public DirtyCardToOopClosure {
 public:
   // Specification of possible DirtyCardToOopClosure filtering.
   enum FilterKind {
@@ -85,39 +83,13 @@
   FilterKind _fk;
   G1CollectedHeap* _g1;
 
-  void walk_mem_region_with_cl(MemRegion mr,
-                               HeapWord* bottom, HeapWord* top,
-                               ExtendedOopClosure* cl);
-
-  // We don't specialize this for FilteringClosure; filtering is handled by
-  // the "FilterKind" mechanism.  But we provide this to avoid a compiler
-  // warning.
-  void walk_mem_region_with_cl(MemRegion mr,
-                               HeapWord* bottom, HeapWord* top,
-                               FilteringClosure* cl) {
-    HeapRegionDCTOC::walk_mem_region_with_cl(mr, bottom, top,
-                                             (ExtendedOopClosure*)cl);
-  }
-
-  // Get the actual top of the area on which the closure will
-  // operate, given where the top is assumed to be (the end of the
-  // memory region passed to do_MemRegion) and where the object
-  // at the top is assumed to start. For example, an object may
-  // start at the top but actually extend past the assumed top,
-  // in which case the top becomes the end of the object.
-  HeapWord* get_actual_top(HeapWord* top, HeapWord* top_obj) {
-    return ContiguousSpaceDCTOC::get_actual_top(top, top_obj);
-  }
-
   // Walk the given memory region from bottom to (actual) top
   // looking for objects and applying the oop closure (_cl) to
   // them. The base implementation of this treats the area as
   // blocks, where a block may or may not be an object. Sub-
   // classes should override this to provide more accurate
   // or possibly more efficient walking.
-  void walk_mem_region(MemRegion mr, HeapWord* bottom, HeapWord* top) {
-    Filtering_DCTOC::walk_mem_region(mr, bottom, top);
-  }
+  void walk_mem_region(MemRegion mr, HeapWord* bottom, HeapWord* top);
 
 public:
   HeapRegionDCTOC(G1CollectedHeap* g1,
@@ -151,9 +123,9 @@
 // the regions anyway) and at the end of a Full GC. The current scheme
 // that uses sequential unsigned ints will fail only if we have 4b
 // evacuation pauses between two cleanups, which is _highly_ unlikely.
-
-class G1OffsetTableContigSpace: public ContiguousSpace {
+class G1OffsetTableContigSpace: public CompactibleSpace {
   friend class VMStructs;
+  HeapWord* _top;
  protected:
   G1BlockOffsetArrayContigSpace _offsets;
   Mutex _par_alloc_lock;
@@ -170,11 +142,32 @@
   G1OffsetTableContigSpace(G1BlockOffsetSharedArray* sharedOffsetArray,
                            MemRegion mr);
 
+  void set_top(HeapWord* value) { _top = value; }
+  HeapWord* top() const { return _top; }
+
+ protected:
+  HeapWord** top_addr() { return &_top; }
+  // Allocation helpers (return NULL if full).
+  inline HeapWord* allocate_impl(size_t word_size, HeapWord* end_value);
+  inline HeapWord* par_allocate_impl(size_t word_size, HeapWord* end_value);
+
+ public:
+  void reset_after_compaction() { set_top(compaction_top()); }
+
+  size_t used() const { return byte_size(bottom(), top()); }
+  size_t free() const { return byte_size(top(), end()); }
+  bool is_free_block(const HeapWord* p) const { return p >= top(); }
+
+  MemRegion used_region() const { return MemRegion(bottom(), top()); }
+
+  void object_iterate(ObjectClosure* blk);
+  void safe_object_iterate(ObjectClosure* blk);
+
   void set_bottom(HeapWord* value);
   void set_end(HeapWord* value);
 
   virtual HeapWord* saved_mark_word() const;
-  virtual void set_saved_mark();
+  void record_top_and_timestamp();
   void reset_gc_time_stamp() { _gc_time_stamp = 0; }
   unsigned get_gc_time_stamp() { return _gc_time_stamp; }
 
@@ -194,6 +187,8 @@
   HeapWord* block_start(const void* p);
   HeapWord* block_start_const(const void* p) const;
 
+  void prepare_for_compaction(CompactPoint* cp);
+
   // Add offset table update.
   virtual HeapWord* allocate(size_t word_size);
   HeapWord* par_allocate(size_t word_size);
@@ -228,10 +223,6 @@
     ContinuesHumongous
   };
 
-  // Requires that the region "mr" be dense with objects, and begin and end
-  // with an object.
-  void oops_in_mr_iterate(MemRegion mr, ExtendedOopClosure* cl);
-
   // The remembered set for this region.
   // (Might want to make this "inline" later, to avoid some alloc failure
   // issues.)
@@ -256,11 +247,9 @@
   bool _evacuation_failed;
 
   // A heap region may be a member one of a number of special subsets, each
-  // represented as linked lists through the field below.  Currently, these
-  // sets include:
+  // represented as linked lists through the field below.  Currently, there
+  // is only one set:
   //   The collection set.
-  //   The set of allocation regions used in a collection pause.
-  //   Spaces that may contain gray objects.
   HeapRegion* _next_in_special_set;
 
   // next region in the young "generation" region set
@@ -379,14 +368,15 @@
     ParMarkRootClaimValue      = 9
   };
 
-  inline HeapWord* par_allocate_no_bot_updates(size_t word_size) {
-    assert(is_young(), "we can only skip BOT updates on young regions");
-    return ContiguousSpace::par_allocate(word_size);
-  }
-  inline HeapWord* allocate_no_bot_updates(size_t word_size) {
-    assert(is_young(), "we can only skip BOT updates on young regions");
-    return ContiguousSpace::allocate(word_size);
-  }
+  // All allocated blocks are occupied by objects in a HeapRegion
+  bool block_is_obj(const HeapWord* p) const;
+
+  // Returns the object size for all valid block starts
+  // and the amount of unallocated words if called on top()
+  size_t block_size(const HeapWord* p) const;
+
+  inline HeapWord* par_allocate_no_bot_updates(size_t word_size);
+  inline HeapWord* allocate_no_bot_updates(size_t word_size);
 
   // If this region is a member of a HeapRegionSeq, the index in that
   // sequence, otherwise -1.
@@ -595,9 +585,6 @@
 
   HeapWord* orig_end() { return _orig_end; }
 
-  // Allows logical separation between objects allocated before and after.
-  void save_marks();
-
   // Reset HR stuff to default values.
   void hr_clear(bool par, bool clear_space, bool locked = false);
   void par_clear();
@@ -606,10 +593,6 @@
   HeapWord* prev_top_at_mark_start() const { return _prev_top_at_mark_start; }
   HeapWord* next_top_at_mark_start() const { return _next_top_at_mark_start; }
 
-  // Apply "cl->do_oop" to (the addresses of) all reference fields in objects
-  // allocated in the current region before the last call to "save_mark".
-  void oop_before_save_marks_iterate(ExtendedOopClosure* cl);
-
   // Note the start or end of marking. This tells the heap region
   // that the collector is about to start or has finished (concurrently)
   // marking the heap.
@@ -795,10 +778,6 @@
     _predicted_bytes_to_copy = bytes;
   }
 
-#define HeapRegion_OOP_SINCE_SAVE_MARKS_DECL(OopClosureType, nv_suffix)  \
-  virtual void oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl);
-  SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(HeapRegion_OOP_SINCE_SAVE_MARKS_DECL)
-
   virtual CompactibleSpace* next_compaction_space() const;
 
   virtual void reset_after_compaction();
--- a/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -25,8 +25,49 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGION_INLINE_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGION_INLINE_HPP
 
+#include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
+#include "memory/space.hpp"
+#include "runtime/atomic.inline.hpp"
+
+// This version requires locking.
+inline HeapWord* G1OffsetTableContigSpace::allocate_impl(size_t size,
+                                                HeapWord* const end_value) {
+  HeapWord* obj = top();
+  if (pointer_delta(end_value, obj) >= size) {
+    HeapWord* new_top = obj + size;
+    set_top(new_top);
+    assert(is_aligned(obj) && is_aligned(new_top), "checking alignment");
+    return obj;
+  } else {
+    return NULL;
+  }
+}
+
+// This version is lock-free.
+inline HeapWord* G1OffsetTableContigSpace::par_allocate_impl(size_t size,
+                                                    HeapWord* const end_value) {
+  do {
+    HeapWord* obj = top();
+    if (pointer_delta(end_value, obj) >= size) {
+      HeapWord* new_top = obj + size;
+      HeapWord* result = (HeapWord*)Atomic::cmpxchg_ptr(new_top, top_addr(), obj);
+      // result can be one of two:
+      //  the old top value: the exchange succeeded
+      //  otherwise: the new value of the top is returned.
+      if (result == obj) {
+        assert(is_aligned(obj) && is_aligned(new_top), "checking alignment");
+        return obj;
+      }
+    } else {
+      return NULL;
+    }
+  } while (true);
+}
+
 inline HeapWord* G1OffsetTableContigSpace::allocate(size_t size) {
-  HeapWord* res = ContiguousSpace::allocate(size);
+  HeapWord* res = allocate_impl(size, end());
   if (res != NULL) {
     _offsets.alloc_block(res, size);
   }
@@ -38,12 +79,7 @@
 // this is used for larger LAB allocations only.
 inline HeapWord* G1OffsetTableContigSpace::par_allocate(size_t size) {
   MutexLocker x(&_par_alloc_lock);
-  // Given that we take the lock no need to use par_allocate() here.
-  HeapWord* res = ContiguousSpace::allocate(size);
-  if (res != NULL) {
-    _offsets.alloc_block(res, size);
-  }
-  return res;
+  return allocate(size);
 }
 
 inline HeapWord* G1OffsetTableContigSpace::block_start(const void* p) {
@@ -55,6 +91,52 @@
   return _offsets.block_start_const(p);
 }
 
+inline bool
+HeapRegion::block_is_obj(const HeapWord* p) const {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  if (ClassUnloadingWithConcurrentMark) {
+    return !g1h->is_obj_dead(oop(p), this);
+  }
+  return p < top();
+}
+
+inline size_t
+HeapRegion::block_size(const HeapWord *addr) const {
+  if (addr == top()) {
+    return pointer_delta(end(), addr);
+  }
+
+  if (block_is_obj(addr)) {
+    return oop(addr)->size();
+  }
+
+  assert(ClassUnloadingWithConcurrentMark,
+      err_msg("All blocks should be objects if G1 Class Unloading isn't used. "
+              "HR: ["PTR_FORMAT", "PTR_FORMAT", "PTR_FORMAT") "
+              "addr: " PTR_FORMAT,
+              p2i(bottom()), p2i(top()), p2i(end()), p2i(addr)));
+
+  // Old regions' dead objects may have dead classes
+  // We need to find the next live object in some other
+  // manner than getting the oop size
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  HeapWord* next = g1h->concurrent_mark()->prevMarkBitMap()->
+      getNextMarkedWordAddress(addr, prev_top_at_mark_start());
+
+  assert(next > addr, "must get the next live object");
+  return pointer_delta(next, addr);
+}
+
+inline HeapWord* HeapRegion::par_allocate_no_bot_updates(size_t word_size) {
+  assert(is_young(), "we can only skip BOT updates on young regions");
+  return par_allocate_impl(word_size, end());
+}
+
+inline HeapWord* HeapRegion::allocate_no_bot_updates(size_t word_size) {
+  assert(is_young(), "we can only skip BOT updates on young regions");
+  return allocate_impl(word_size, end());
+}
+
 inline void HeapRegion::note_start_of_marking() {
   _next_marked_bytes = 0;
   _next_top_at_mark_start = top();
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -694,6 +694,9 @@
   clear_fcc();
 }
 
+bool OtherRegionsTable::is_empty() const {
+  return occ_sparse() == 0 && occ_coarse() == 0 && _first_all_fine_prts == NULL;
+}
 
 size_t OtherRegionsTable::occupied() const {
   size_t sum = occ_fine();
@@ -929,7 +932,10 @@
 
 void HeapRegionRemSet::remove_strong_code_root(nmethod* nm) {
   assert(nm != NULL, "sanity");
-  _code_roots.remove(nm);
+  assert_locked_or_safepoint(CodeCache_lock);
+
+  _code_roots.remove_lock_free(nm);
+
   // Check that there were no duplicates
   guarantee(!_code_roots.contains(nm), "duplicate entry found");
 }
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -185,6 +185,9 @@
   // objects.
   void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm);
 
+  // Returns whether this remembered set (and all sub-sets) contain no entries.
+  bool is_empty() const;
+
   size_t occupied() const;
   size_t occ_fine() const;
   size_t occ_coarse() const;
@@ -269,6 +272,10 @@
     return _other_regions.hr();
   }
 
+  bool is_empty() const {
+    return (strong_code_roots_list_length() == 0) && _other_regions.is_empty();
+  }
+
   size_t occupied() {
     MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
     return occupied_locked();
@@ -375,7 +382,7 @@
   void strong_code_roots_do(CodeBlobClosure* blk) const;
 
   // Returns the number of elements in the strong code roots list
-  size_t strong_code_roots_list_length() {
+  size_t strong_code_roots_list_length() const {
     return _code_roots.length();
   }
 
--- a/src/share/vm/gc_implementation/g1/heapRegionSet.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionSet.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -119,7 +119,7 @@
 public:
   const char* name() { return _name; }
 
-  uint length() { return _count.length(); }
+  uint length() const { return _count.length(); }
 
   bool is_empty() { return _count.length() == 0; }
 
--- a/src/share/vm/gc_implementation/g1/satbQueue.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -285,37 +285,6 @@
   _par_closures[i] = par_closure;
 }
 
-void SATBMarkQueueSet::iterate_closure_all_threads() {
-  for(JavaThread* t = Threads::first(); t; t = t->next()) {
-    t->satb_mark_queue().apply_closure_and_empty(_closure);
-  }
-  shared_satb_queue()->apply_closure_and_empty(_closure);
-}
-
-void SATBMarkQueueSet::par_iterate_closure_all_threads(uint worker) {
-  SharedHeap* sh = SharedHeap::heap();
-  int parity = sh->strong_roots_parity();
-
-  for(JavaThread* t = Threads::first(); t; t = t->next()) {
-    if (t->claim_oops_do(true, parity)) {
-      t->satb_mark_queue().apply_closure_and_empty(_par_closures[worker]);
-    }
-  }
-
-  // We also need to claim the VMThread so that its parity is updated
-  // otherwise the next call to Thread::possibly_parallel_oops_do inside
-  // a StrongRootsScope might skip the VMThread because it has a stale
-  // parity that matches the parity set by the StrongRootsScope
-  //
-  // Whichever worker succeeds in claiming the VMThread gets to do
-  // the shared queue.
-
-  VMThread* vmt = VMThread::vm_thread();
-  if (vmt->claim_oops_do(true, parity)) {
-    shared_satb_queue()->apply_closure_and_empty(_par_closures[worker]);
-  }
-}
-
 bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par,
                                                               uint worker) {
   BufferNode* nd = NULL;
--- a/src/share/vm/gc_implementation/g1/satbQueue.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -33,7 +33,9 @@
 
 // A ptrQueue whose elements are "oops", pointers to object heads.
 class ObjPtrQueue: public PtrQueue {
+  friend class Threads;
   friend class SATBMarkQueueSet;
+  friend class G1RemarkThreadsClosure;
 
 private:
   // Filter out unwanted entries from the buffer.
@@ -119,13 +121,6 @@
   // closures, one for each parallel GC thread.
   void set_par_closure(int i, ObjectClosure* closure);
 
-  // Apply the registered closure to all entries on each
-  // currently-active buffer and then empty the buffer. It should only
-  // be called serially and at a safepoint.
-  void iterate_closure_all_threads();
-  // Parallel version of the above.
-  void par_iterate_closure_all_threads(uint worker);
-
   // If there exists some completed buffer, pop it, then apply the
   // registered closure to all its elements, and return true.  If no
   // completed buffers exist, return false.
--- a/src/share/vm/gc_implementation/g1/vmStructs_g1.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/vmStructs_g1.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -34,6 +34,8 @@
   static_field(HeapRegion, GrainBytes,        size_t)                         \
   static_field(HeapRegion, LogOfHRGrainBytes, int)                            \
                                                                               \
+  nonstatic_field(G1OffsetTableContigSpace, _top,       HeapWord*)            \
+                                                                              \
   nonstatic_field(G1HeapRegionTable, _base,             address)              \
   nonstatic_field(G1HeapRegionTable, _length,           size_t)               \
   nonstatic_field(G1HeapRegionTable, _biased_base,      address)              \
@@ -69,7 +71,8 @@
                                                                               \
   declare_type(G1CollectedHeap, SharedHeap)                                   \
                                                                               \
-  declare_type(HeapRegion, ContiguousSpace)                                   \
+  declare_type(G1OffsetTableContigSpace, CompactibleSpace)                    \
+  declare_type(HeapRegion, G1OffsetTableContigSpace)                          \
   declare_toplevel_type(HeapRegionSeq)                                        \
   declare_toplevel_type(HeapRegionSetBase)                                    \
   declare_toplevel_type(HeapRegionSetCount)                                   \
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -28,12 +28,12 @@
 #include "gc_implementation/parNew/parOopClosures.inline.hpp"
 #include "gc_implementation/shared/adaptiveSizePolicy.hpp"
 #include "gc_implementation/shared/ageTable.hpp"
-#include "gc_implementation/shared/parGCAllocBuffer.hpp"
+#include "gc_implementation/shared/copyFailedInfo.hpp"
 #include "gc_implementation/shared/gcHeapSummary.hpp"
 #include "gc_implementation/shared/gcTimer.hpp"
 #include "gc_implementation/shared/gcTrace.hpp"
 #include "gc_implementation/shared/gcTraceTime.hpp"
-#include "gc_implementation/shared/copyFailedInfo.hpp"
+#include "gc_implementation/shared/parGCAllocBuffer.inline.hpp"
 #include "gc_implementation/shared/spaceDecorator.hpp"
 #include "memory/defNewGeneration.inline.hpp"
 #include "memory/genCollectedHeap.hpp"
@@ -251,7 +251,7 @@
         plab->set_word_size(buf_size);
         plab->set_buf(buf_space);
         record_survivor_plab(buf_space, buf_size);
-        obj = plab->allocate(word_sz);
+        obj = plab->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
         // Note that we cannot compare buf_size < word_sz below
         // because of AlignmentReserve (see ParGCAllocBuffer::allocate()).
         assert(obj != NULL || plab->words_remaining() < word_sz,
@@ -613,20 +613,21 @@
 
   KlassScanClosure klass_scan_closure(&par_scan_state.to_space_root_closure(),
                                       gch->rem_set()->klass_rem_set());
-
-  int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;
+  CLDToKlassAndOopClosure cld_scan_closure(&klass_scan_closure,
+                                           &par_scan_state.to_space_root_closure(),
+                                           false);
 
   par_scan_state.start_strong_roots();
-  gch->gen_process_strong_roots(_gen->level(),
-                                true,  // Process younger gens, if any,
-                                       // as strong roots.
-                                false, // no scope; this is parallel code
-                                true,  // is scavenging
-                                SharedHeap::ScanningOption(so),
-                                &par_scan_state.to_space_root_closure(),
-                                true,   // walk *all* scavengable nmethods
-                                &par_scan_state.older_gen_closure(),
-                                &klass_scan_closure);
+  gch->gen_process_roots(_gen->level(),
+                         true,  // Process younger gens, if any,
+                                // as strong roots.
+                         false, // no scope; this is parallel code
+                         SharedHeap::SO_ScavengeCodeCache,
+                         GenCollectedHeap::StrongAndWeakRoots,
+                         &par_scan_state.to_space_root_closure(),
+                         &par_scan_state.older_gen_closure(),
+                         &cld_scan_closure);
+
   par_scan_state.end_strong_roots();
 
   // "evacuate followers".
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -69,7 +69,7 @@
   ParScanWithoutBarrierClosure         _to_space_closure; // scan_without_gc_barrier
   ParScanWithBarrierClosure            _old_gen_closure; // scan_with_gc_barrier
   ParRootScanWithoutBarrierClosure     _to_space_root_closure; // scan_root_without_gc_barrier
-  // One of these two will be passed to process_strong_roots, which will
+  // One of these two will be passed to process_roots, which will
   // set its generation.  The first is for two-gen configs where the
   // old gen collects the perm gen; the second is for arbitrary configs.
   // The second isn't used right now (it used to be used for the train, an
@@ -168,7 +168,7 @@
   HeapWord* alloc_in_to_space_slow(size_t word_sz);
 
   HeapWord* alloc_in_to_space(size_t word_sz) {
-    HeapWord* obj = to_space_alloc_buffer()->allocate(word_sz);
+    HeapWord* obj = to_space_alloc_buffer()->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
     if (obj != NULL) return obj;
     else return alloc_in_to_space_slow(word_sz);
   }
--- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -59,7 +59,7 @@
 
   PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
   CLDToOopClosure mark_and_push_from_clds(&mark_and_push_closure, true);
-  CodeBlobToOopClosure mark_and_push_in_blobs(&mark_and_push_closure, /*do_marking=*/ true);
+  MarkingCodeBlobClosure mark_and_push_in_blobs(&mark_and_push_closure, !CodeBlobToOopClosure::FixRelocations);
 
   if (_java_thread != NULL)
     _java_thread->oops_do(
@@ -100,7 +100,7 @@
     case threads:
     {
       ResourceMark rm;
-      CodeBlobToOopClosure each_active_code_blob(&mark_and_push_closure, /*do_marking=*/ true);
+      MarkingCodeBlobClosure each_active_code_blob(&mark_and_push_closure, !CodeBlobToOopClosure::FixRelocations);
       CLDToOopClosure mark_and_push_from_cld(&mark_and_push_closure);
       Threads::oops_do(&mark_and_push_closure, &mark_and_push_from_cld, &each_active_code_blob);
     }
--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -528,14 +528,14 @@
     Universe::oops_do(mark_and_push_closure());
     JNIHandles::oops_do(mark_and_push_closure());   // Global (strong) JNI handles
     CLDToOopClosure mark_and_push_from_cld(mark_and_push_closure());
-    CodeBlobToOopClosure each_active_code_blob(mark_and_push_closure(), /*do_marking=*/ true);
+    MarkingCodeBlobClosure each_active_code_blob(mark_and_push_closure(), !CodeBlobToOopClosure::FixRelocations);
     Threads::oops_do(mark_and_push_closure(), &mark_and_push_from_cld, &each_active_code_blob);
     ObjectSynchronizer::oops_do(mark_and_push_closure());
     FlatProfiler::oops_do(mark_and_push_closure());
     Management::oops_do(mark_and_push_closure());
     JvmtiExport::oops_do(mark_and_push_closure());
     SystemDictionary::always_strong_oops_do(mark_and_push_closure());
-    ClassLoaderDataGraph::always_strong_oops_do(mark_and_push_closure(), follow_klass_closure(), true);
+    ClassLoaderDataGraph::always_strong_cld_do(follow_cld_closure());
     // Do not treat nmethods as strong roots for mark/sweep, since we can unload them.
     //CodeCache::scavenge_root_nmethods_do(CodeBlobToOopClosure(mark_and_push_closure()));
   }
@@ -625,16 +625,16 @@
   FlatProfiler::oops_do(adjust_pointer_closure());
   Management::oops_do(adjust_pointer_closure());
   JvmtiExport::oops_do(adjust_pointer_closure());
-  // SO_AllClasses
   SystemDictionary::oops_do(adjust_pointer_closure());
-  ClassLoaderDataGraph::oops_do(adjust_pointer_closure(), adjust_klass_closure(), true);
+  ClassLoaderDataGraph::cld_do(adjust_cld_closure());
 
   // Now adjust pointers in remaining weak roots.  (All of which should
   // have been cleared if they pointed to non-surviving objects.)
   // Global (weak) JNI handles
   JNIHandles::weak_oops_do(&always_true, adjust_pointer_closure());
 
-  CodeCache::oops_do(adjust_pointer_closure());
+  CodeBlobToOopClosure adjust_from_blobs(adjust_pointer_closure(), CodeBlobToOopClosure::FixRelocations);
+  CodeCache::blobs_do(&adjust_from_blobs);
   StringTable::oops_do(adjust_pointer_closure());
   ref_processor()->weak_oops_do(adjust_pointer_closure());
   PSScavenge::reference_processor()->weak_oops_do(adjust_pointer_closure());
--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -40,11 +40,11 @@
   static CollectorCounters*  _counters;
 
   // Closure accessors
-  static OopClosure* mark_and_push_closure() { return &MarkSweep::mark_and_push_closure; }
-  static KlassClosure* follow_klass_closure() { return &MarkSweep::follow_klass_closure; }
-  static VoidClosure* follow_stack_closure() { return (VoidClosure*)&MarkSweep::follow_stack_closure; }
-  static OopClosure* adjust_pointer_closure() { return (OopClosure*)&MarkSweep::adjust_pointer_closure; }
-  static KlassClosure* adjust_klass_closure() { return &MarkSweep::adjust_klass_closure; }
+  static OopClosure* mark_and_push_closure()   { return &MarkSweep::mark_and_push_closure; }
+  static VoidClosure* follow_stack_closure()   { return (VoidClosure*)&MarkSweep::follow_stack_closure; }
+  static CLDClosure* follow_cld_closure()      { return &MarkSweep::follow_cld_closure; }
+  static OopClosure* adjust_pointer_closure()  { return (OopClosure*)&MarkSweep::adjust_pointer_closure; }
+  static CLDClosure* adjust_cld_closure()      { return &MarkSweep::adjust_cld_closure; }
   static BoolObjectClosure* is_alive_closure() { return (BoolObjectClosure*)&MarkSweep::is_alive; }
 
  debug_only(public:)  // Used for PSParallelCompact debugging
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -2465,7 +2465,6 @@
   FlatProfiler::oops_do(adjust_pointer_closure());
   Management::oops_do(adjust_pointer_closure());
   JvmtiExport::oops_do(adjust_pointer_closure());
-  // SO_AllClasses
   SystemDictionary::oops_do(adjust_pointer_closure());
   ClassLoaderDataGraph::oops_do(adjust_pointer_closure(), adjust_klass_closure(), true);
 
@@ -2474,7 +2473,8 @@
   // Global (weak) JNI handles
   JNIHandles::weak_oops_do(&always_true, adjust_pointer_closure());
 
-  CodeCache::oops_do(adjust_pointer_closure());
+  CodeBlobToOopClosure adjust_from_blobs(adjust_pointer_closure(), CodeBlobToOopClosure::FixRelocations);
+  CodeCache::blobs_do(&adjust_from_blobs);
   StringTable::oops_do(adjust_pointer_closure());
   ref_processor()->weak_oops_do(adjust_pointer_closure());
   // Roots were visited so references into the young gen in roots
--- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionLAB.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionLAB.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPROMOTIONLAB_HPP
 
 #include "gc_implementation/parallelScavenge/objectStartArray.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
 #include "memory/allocation.hpp"
 
 //
@@ -94,23 +95,9 @@
   PSYoungPromotionLAB() { }
 
   // Not MT safe
-  HeapWord* allocate(size_t size) {
-    // Can't assert this, when young fills, we keep the LAB around, but flushed.
-    // assert(_state != flushed, "Sanity");
-    HeapWord* obj = top();
-    HeapWord* new_top = obj + size;
-    // The 'new_top>obj' check is needed to detect overflow of obj+size.
-    if (new_top > obj && new_top <= end()) {
-      set_top(new_top);
-      assert(is_object_aligned((intptr_t)obj) && is_object_aligned((intptr_t)new_top),
-             "checking alignment");
-      return obj;
-    }
+  inline HeapWord* allocate(size_t size);
 
-    return NULL;
-  }
-
-  debug_only(virtual bool lab_is_valid(MemRegion lab));
+  debug_only(virtual bool lab_is_valid(MemRegion lab);)
 };
 
 class PSOldPromotionLAB : public PSPromotionLAB {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionLAB.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPROMOTIONLAB_INLINE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPROMOTIONLAB_INLINE_HPP
+
+#include "gc_implementation/parallelScavenge/psPromotionLAB.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+
+HeapWord* PSYoungPromotionLAB::allocate(size_t size) {
+  // Can't assert this, when young fills, we keep the LAB around, but flushed.
+  // assert(_state != flushed, "Sanity");
+  HeapWord* obj = CollectedHeap::align_allocation_or_fail(top(), end(), SurvivorAlignmentInBytes);
+  if (obj == NULL) {
+    return NULL;
+  }
+
+  HeapWord* new_top = obj + size;
+  // The 'new_top>obj' check is needed to detect overflow of obj+size.
+  if (new_top > obj && new_top <= end()) {
+    set_top(new_top);
+    assert(is_ptr_aligned(obj, SurvivorAlignmentInBytes) && is_object_aligned((intptr_t)new_top),
+           "checking alignment");
+    return obj;
+  } else {
+    set_top(obj);
+    return NULL;
+  }
+}
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPROMOTIONLAB_INLINE_HPP
--- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -27,6 +27,7 @@
 
 #include "gc_implementation/parallelScavenge/psOldGen.hpp"
 #include "gc_implementation/parallelScavenge/psPromotionManager.hpp"
+#include "gc_implementation/parallelScavenge/psPromotionLAB.inline.hpp"
 #include "gc_implementation/parallelScavenge/psScavenge.hpp"
 #include "oops/oop.psgc.inline.hpp"
 
--- a/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -65,7 +65,7 @@
     case threads:
     {
       ResourceMark rm;
-      CLDToOopClosure* cld_closure = NULL; // Not needed. All CLDs are already visited.
+      CLDClosure* cld_closure = NULL; // Not needed. All CLDs are already visited.
       Threads::oops_do(&roots_closure, cld_closure, NULL);
     }
     break;
@@ -100,7 +100,7 @@
 
     case code_cache:
       {
-        CodeBlobToOopClosure each_scavengable_code_blob(&roots_to_old_closure, /*do_marking=*/ true);
+        MarkingCodeBlobClosure each_scavengable_code_blob(&roots_to_old_closure, CodeBlobToOopClosure::FixRelocations);
         CodeCache::scavenge_root_nmethods_do(&each_scavengable_code_blob);
       }
       break;
@@ -122,8 +122,8 @@
 
   PSPromotionManager* pm = PSPromotionManager::gc_thread_promotion_manager(which);
   PSScavengeRootsClosure roots_closure(pm);
-  CLDToOopClosure* roots_from_clds = NULL;  // Not needed. All CLDs are already visited.
-  CodeBlobToOopClosure roots_in_blobs(&roots_closure, /*do_marking=*/ true);
+  CLDClosure* roots_from_clds = NULL;  // Not needed. All CLDs are already visited.
+  MarkingCodeBlobClosure roots_in_blobs(&roots_closure, CodeBlobToOopClosure::FixRelocations);
 
   if (_java_thread != NULL)
     _java_thread->oops_do(&roots_closure, roots_from_clds, &roots_in_blobs);
--- a/src/share/vm/gc_implementation/shared/markSweep.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/shared/markSweep.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -49,27 +49,19 @@
 SerialOldTracer*        MarkSweep::_gc_tracer       = NULL;
 
 MarkSweep::FollowRootClosure  MarkSweep::follow_root_closure;
-CodeBlobToOopClosure MarkSweep::follow_code_root_closure(&MarkSweep::follow_root_closure, /*do_marking=*/ true);
 
 void MarkSweep::FollowRootClosure::do_oop(oop* p)       { follow_root(p); }
 void MarkSweep::FollowRootClosure::do_oop(narrowOop* p) { follow_root(p); }
 
 MarkSweep::MarkAndPushClosure MarkSweep::mark_and_push_closure;
-MarkSweep::FollowKlassClosure MarkSweep::follow_klass_closure;
-MarkSweep::AdjustKlassClosure MarkSweep::adjust_klass_closure;
+CLDToOopClosure               MarkSweep::follow_cld_closure(&mark_and_push_closure);
+CLDToOopClosure               MarkSweep::adjust_cld_closure(&adjust_pointer_closure);
 
 void MarkSweep::MarkAndPushClosure::do_oop(oop* p)       { mark_and_push(p); }
 void MarkSweep::MarkAndPushClosure::do_oop(narrowOop* p) { mark_and_push(p); }
 
-void MarkSweep::FollowKlassClosure::do_klass(Klass* klass) {
-  klass->oops_do(&MarkSweep::mark_and_push_closure);
-}
-void MarkSweep::AdjustKlassClosure::do_klass(Klass* klass) {
-  klass->oops_do(&MarkSweep::adjust_pointer_closure);
-}
-
 void MarkSweep::follow_class_loader(ClassLoaderData* cld) {
-  cld->oops_do(&MarkSweep::mark_and_push_closure, &MarkSweep::follow_klass_closure, true);
+  MarkSweep::follow_cld_closure.do_cld(cld);
 }
 
 void MarkSweep::follow_stack() {
--- a/src/share/vm/gc_implementation/shared/markSweep.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/shared/markSweep.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -65,17 +65,6 @@
     virtual void do_oop(narrowOop* p);
   };
 
-  // The one and only place to start following the classes.
-  // Should only be applied to the ClassLoaderData klasses list.
-  class FollowKlassClosure : public KlassClosure {
-   public:
-    void do_klass(Klass* klass);
-  };
-  class AdjustKlassClosure : public KlassClosure {
-   public:
-    void do_klass(Klass* klass);
-  };
-
   class FollowStackClosure: public VoidClosure {
    public:
     virtual void do_void();
@@ -143,12 +132,11 @@
   // Public closures
   static IsAliveClosure       is_alive;
   static FollowRootClosure    follow_root_closure;
-  static CodeBlobToOopClosure follow_code_root_closure; // => follow_root_closure
   static MarkAndPushClosure   mark_and_push_closure;
-  static FollowKlassClosure   follow_klass_closure;
   static FollowStackClosure   follow_stack_closure;
+  static CLDToOopClosure      follow_cld_closure;
   static AdjustPointerClosure adjust_pointer_closure;
-  static AdjustKlassClosure   adjust_klass_closure;
+  static CLDToOopClosure      adjust_cld_closure;
 
   // Accessors
   static uint total_invocations() { return _total_invocations; }
--- a/src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -24,7 +24,7 @@
 
 #ifndef SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
-
+#include "gc_interface/collectedHeap.hpp"
 #include "memory/allocation.hpp"
 #include "memory/blockOffsetTable.hpp"
 #include "memory/threadLocalAllocBuffer.hpp"
@@ -84,6 +84,9 @@
     }
   }
 
+  // Allocate the object aligned to "alignment_in_bytes".
+  HeapWord* allocate_aligned(size_t word_sz, unsigned short alignment_in_bytes);
+
   // Undo the last allocation in the buffer, which is required to be of the
   // "obj" of the given "word_sz".
   void undo_allocation(HeapWord* obj, size_t word_sz) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/shared/parGCAllocBuffer.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_SHARED_PARGCALLOCBUFFER_INLINE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_SHARED_PARGCALLOCBUFFER_INLINE_HPP
+
+#include "gc_implementation/shared/parGCAllocBuffer.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+
+HeapWord* ParGCAllocBuffer::allocate_aligned(size_t word_sz, unsigned short alignment_in_bytes) {
+
+  HeapWord* res = CollectedHeap::align_allocation_or_fail(_top, _end, alignment_in_bytes);
+  if (res == NULL) {
+    return NULL;
+  }
+
+  // Set _top so that allocate(), which expects _top to be correctly set,
+  // can be used below.
+  _top = res;
+  return allocate(word_sz);
+}
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_PARGCALLOCBUFFER_INLINE_HPP
--- a/src/share/vm/gc_implementation/shared/vmGCOperations.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/shared/vmGCOperations.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -209,6 +209,45 @@
   gch->do_full_collection(gch->must_clear_all_soft_refs(), _max_level);
 }
 
+// Returns true iff concurrent GCs unloads metadata.
+bool VM_CollectForMetadataAllocation::initiate_concurrent_GC() {
+#if INCLUDE_ALL_GCS
+  if (UseConcMarkSweepGC && CMSClassUnloadingEnabled) {
+    MetaspaceGC::set_should_concurrent_collect(true);
+    return true;
+  }
+
+  if (UseG1GC && ClassUnloadingWithConcurrentMark) {
+    G1CollectedHeap* g1h = G1CollectedHeap::heap();
+    g1h->g1_policy()->set_initiate_conc_mark_if_possible();
+
+    GCCauseSetter x(g1h, _gc_cause);
+
+    // At this point we are supposed to start a concurrent cycle. We
+    // will do so if one is not already in progress.
+    bool should_start = g1h->g1_policy()->force_initial_mark_if_outside_cycle(_gc_cause);
+
+    if (should_start) {
+      double pause_target = g1h->g1_policy()->max_pause_time_ms();
+      g1h->do_collection_pause_at_safepoint(pause_target);
+    }
+    return true;
+  }
+#endif
+
+  return false;
+}
+
+static void log_metaspace_alloc_failure_for_concurrent_GC() {
+  if (Verbose && PrintGCDetails) {
+    if (UseConcMarkSweepGC) {
+      gclog_or_tty->print_cr("\nCMS full GC for Metaspace");
+    } else if (UseG1GC) {
+      gclog_or_tty->print_cr("\nG1 full GC for Metaspace");
+    }
+  }
+}
+
 void VM_CollectForMetadataAllocation::doit() {
   SvcGCMarker sgcm(SvcGCMarker::FULL);
 
@@ -220,54 +259,57 @@
   // a GC that freed space for the allocation.
   if (!MetadataAllocationFailALot) {
     _result = _loader_data->metaspace_non_null()->allocate(_size, _mdtype);
-  }
-
-  if (_result == NULL) {
-    if (UseConcMarkSweepGC) {
-      if (CMSClassUnloadingEnabled) {
-        MetaspaceGC::set_should_concurrent_collect(true);
-      }
-      // For CMS expand since the collection is going to be concurrent.
-      _result =
-        _loader_data->metaspace_non_null()->expand_and_allocate(_size, _mdtype);
-    }
-    if (_result == NULL) {
-      // Don't clear the soft refs yet.
-      if (Verbose && PrintGCDetails && UseConcMarkSweepGC) {
-        gclog_or_tty->print_cr("\nCMS full GC for Metaspace");
-      }
-      heap->collect_as_vm_thread(GCCause::_metadata_GC_threshold);
-      // After a GC try to allocate without expanding.  Could fail
-      // and expansion will be tried below.
-      _result =
-        _loader_data->metaspace_non_null()->allocate(_size, _mdtype);
-    }
-    if (_result == NULL) {
-      // If still failing, allow the Metaspace to expand.
-      // See delta_capacity_until_GC() for explanation of the
-      // amount of the expansion.
-      // This should work unless there really is no more space
-      // or a MaxMetaspaceSize has been specified on the command line.
-      _result =
-        _loader_data->metaspace_non_null()->expand_and_allocate(_size, _mdtype);
-      if (_result == NULL) {
-        // If expansion failed, do a last-ditch collection and try allocating
-        // again.  A last-ditch collection will clear softrefs.  This
-        // behavior is similar to the last-ditch collection done for perm
-        // gen when it was full and a collection for failed allocation
-        // did not free perm gen space.
-        heap->collect_as_vm_thread(GCCause::_last_ditch_collection);
-        _result =
-          _loader_data->metaspace_non_null()->allocate(_size, _mdtype);
-      }
-    }
-    if (Verbose && PrintGCDetails && _result == NULL) {
-      gclog_or_tty->print_cr("\nAfter Metaspace GC failed to allocate size "
-                             SIZE_FORMAT, _size);
+    if (_result != NULL) {
+      return;
     }
   }
 
-  if (_result == NULL && GC_locker::is_active_and_needs_gc()) {
+  if (initiate_concurrent_GC()) {
+    // For CMS and G1 expand since the collection is going to be concurrent.
+    _result = _loader_data->metaspace_non_null()->expand_and_allocate(_size, _mdtype);
+    if (_result != NULL) {
+      return;
+    }
+
+    log_metaspace_alloc_failure_for_concurrent_GC();
+  }
+
+  // Don't clear the soft refs yet.
+  heap->collect_as_vm_thread(GCCause::_metadata_GC_threshold);
+  // After a GC try to allocate without expanding.  Could fail
+  // and expansion will be tried below.
+  _result = _loader_data->metaspace_non_null()->allocate(_size, _mdtype);
+  if (_result != NULL) {
+    return;
+  }
+
+  // If still failing, allow the Metaspace to expand.
+  // See delta_capacity_until_GC() for explanation of the
+  // amount of the expansion.
+  // This should work unless there really is no more space
+  // or a MaxMetaspaceSize has been specified on the command line.
+  _result = _loader_data->metaspace_non_null()->expand_and_allocate(_size, _mdtype);
+  if (_result != NULL) {
+    return;
+  }
+
+  // If expansion failed, do a last-ditch collection and try allocating
+  // again.  A last-ditch collection will clear softrefs.  This
+  // behavior is similar to the last-ditch collection done for perm
+  // gen when it was full and a collection for failed allocation
+  // did not free perm gen space.
+  heap->collect_as_vm_thread(GCCause::_last_ditch_collection);
+  _result = _loader_data->metaspace_non_null()->allocate(_size, _mdtype);
+  if (_result != NULL) {
+    return;
+  }
+
+  if (Verbose && PrintGCDetails) {
+    gclog_or_tty->print_cr("\nAfter Metaspace GC failed to allocate size "
+                           SIZE_FORMAT, _size);
+  }
+
+  if (GC_locker::is_active_and_needs_gc()) {
     set_gc_locked();
   }
 }
--- a/src/share/vm/gc_implementation/shared/vmGCOperations.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/shared/vmGCOperations.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -217,6 +217,8 @@
   virtual VMOp_Type type() const { return VMOp_CollectForMetadataAllocation; }
   virtual void doit();
   MetaWord* result() const       { return _result; }
+
+  bool initiate_concurrent_GC();
 };
 
 class SvcGCMarker : public StackObj {
--- a/src/share/vm/gc_interface/collectedHeap.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_interface/collectedHeap.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -351,6 +351,12 @@
     fill_with_object(start, pointer_delta(end, start), zap);
   }
 
+  // Return the address "addr" aligned by "alignment_in_bytes" if such
+  // an address is below "end".  Return NULL otherwise.
+  inline static HeapWord* align_allocation_or_fail(HeapWord* addr,
+                                                   HeapWord* end,
+                                                   unsigned short alignment_in_bytes);
+
   // Some heaps may offer a contiguous region for shared non-blocking
   // allocation, via inlined code (by exporting the address of the top and
   // end fields defining the extent of the contiguous allocation region.)
--- a/src/share/vm/gc_interface/collectedHeap.inline.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_interface/collectedHeap.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -241,6 +241,44 @@
   oop_iterate(&no_header_cl);
 }
 
+
+inline HeapWord* CollectedHeap::align_allocation_or_fail(HeapWord* addr,
+                                                         HeapWord* end,
+                                                         unsigned short alignment_in_bytes) {
+  if (alignment_in_bytes <= ObjectAlignmentInBytes) {
+    return addr;
+  }
+
+  assert(is_ptr_aligned(addr, HeapWordSize),
+    err_msg("Address " PTR_FORMAT " is not properly aligned.", p2i(addr)));
+  assert(is_size_aligned(alignment_in_bytes, HeapWordSize),
+    err_msg("Alignment size %u is incorrect.", alignment_in_bytes));
+
+  HeapWord* new_addr = (HeapWord*) align_pointer_up(addr, alignment_in_bytes);
+  size_t padding = pointer_delta(new_addr, addr);
+
+  if (padding == 0) {
+    return addr;
+  }
+
+  if (padding < CollectedHeap::min_fill_size()) {
+    padding += alignment_in_bytes / HeapWordSize;
+    assert(padding >= CollectedHeap::min_fill_size(),
+      err_msg("alignment_in_bytes %u is expect to be larger "
+      "than the minimum object size", alignment_in_bytes));
+    new_addr = addr + padding;
+  }
+
+  assert(new_addr > addr, err_msg("Unexpected arithmetic overflow "
+    PTR_FORMAT " not greater than " PTR_FORMAT, p2i(new_addr), p2i(addr)));
+  if(new_addr < end) {
+    CollectedHeap::fill_with_object(addr, padding);
+    return new_addr;
+  } else {
+    return NULL;
+  }
+}
+
 #ifndef PRODUCT
 
 inline bool
--- a/src/share/vm/memory/cardTableModRefBS.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/cardTableModRefBS.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -429,7 +429,7 @@
                                                                  OopsInGenClosure* cl,
                                                                  CardTableRS* ct) {
   if (!mr.is_empty()) {
-    // Caller (process_strong_roots()) claims that all GC threads
+    // Caller (process_roots()) claims that all GC threads
     // execute this call.  With UseDynamicNumberOfGCThreads now all
     // active GC threads execute this call.  The number of active GC
     // threads needs to be passed to par_non_clean_card_iterate_work()
@@ -438,7 +438,7 @@
     // This is an example of where n_par_threads() is used instead
     // of workers()->active_workers().  n_par_threads can be set to 0 to
     // turn off parallelism.  For example when this code is called as
-    // part of verification and SharedHeap::process_strong_roots() is being
+    // part of verification and SharedHeap::process_roots() is being
     // used, then n_par_threads() may have been set to 0.  active_workers
     // is not overloaded with the meaning that it is a switch to disable
     // parallelism and so keeps the meaning of the number of
--- a/src/share/vm/memory/defNewGeneration.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/defNewGeneration.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -613,6 +613,9 @@
 
   KlassScanClosure klass_scan_closure(&fsc_with_no_gc_barrier,
                                       gch->rem_set()->klass_rem_set());
+  CLDToKlassAndOopClosure cld_scan_closure(&klass_scan_closure,
+                                           &fsc_with_no_gc_barrier,
+                                           false);
 
   set_promo_failure_scan_stack_closure(&fsc_with_no_gc_barrier);
   FastEvacuateFollowersClosure evacuate_followers(gch, _level, this,
@@ -622,18 +625,15 @@
   assert(gch->no_allocs_since_save_marks(0),
          "save marks have not been newly set.");
 
-  int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;
-
-  gch->gen_process_strong_roots(_level,
-                                true,  // Process younger gens, if any,
-                                       // as strong roots.
-                                true,  // activate StrongRootsScope
-                                true,  // is scavenging
-                                SharedHeap::ScanningOption(so),
-                                &fsc_with_no_gc_barrier,
-                                true,   // walk *all* scavengable nmethods
-                                &fsc_with_gc_barrier,
-                                &klass_scan_closure);
+  gch->gen_process_roots(_level,
+                         true,  // Process younger gens, if any,
+                                // as strong roots.
+                         true,  // activate StrongRootsScope
+                         SharedHeap::SO_ScavengeCodeCache,
+                         GenCollectedHeap::StrongAndWeakRoots,
+                         &fsc_with_no_gc_barrier,
+                         &fsc_with_gc_barrier,
+                         &cld_scan_closure);
 
   // "evacuate followers".
   evacuate_followers.do_void();
@@ -789,7 +789,7 @@
 
   // Try allocating obj in to-space (unless too old)
   if (old->age() < tenuring_threshold()) {
-    obj = (oop) to()->allocate(s);
+    obj = (oop) to()->allocate_aligned(s);
   }
 
   // Otherwise try allocating obj tenured
--- a/src/share/vm/memory/genCollectedHeap.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/genCollectedHeap.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -61,8 +61,8 @@
 GenCollectedHeap* GenCollectedHeap::_gch;
 NOT_PRODUCT(size_t GenCollectedHeap::_skip_header_HeapWords = 0;)
 
-// The set of potentially parallel tasks in strong root scanning.
-enum GCH_process_strong_roots_tasks {
+// The set of potentially parallel tasks in root scanning.
+enum GCH_strong_roots_tasks {
   // We probably want to parallelize both of these internally, but for now...
   GCH_PS_younger_gens,
   // Leave this one last.
@@ -72,11 +72,11 @@
 GenCollectedHeap::GenCollectedHeap(GenCollectorPolicy *policy) :
   SharedHeap(policy),
   _gen_policy(policy),
-  _gen_process_strong_tasks(new SubTasksDone(GCH_PS_NumElements)),
+  _gen_process_roots_tasks(new SubTasksDone(GCH_PS_NumElements)),
   _full_collections_completed(0)
 {
-  if (_gen_process_strong_tasks == NULL ||
-      !_gen_process_strong_tasks->valid()) {
+  if (_gen_process_roots_tasks == NULL ||
+      !_gen_process_roots_tasks->valid()) {
     vm_exit_during_initialization("Failed necessary allocation.");
   }
   assert(policy != NULL, "Sanity check");
@@ -590,33 +590,29 @@
 
 void GenCollectedHeap::set_par_threads(uint t) {
   SharedHeap::set_par_threads(t);
-  _gen_process_strong_tasks->set_n_threads(t);
+  _gen_process_roots_tasks->set_n_threads(t);
 }
 
 void GenCollectedHeap::
-gen_process_strong_roots(int level,
-                         bool younger_gens_as_roots,
-                         bool activate_scope,
-                         bool is_scavenging,
-                         SharedHeap::ScanningOption so,
-                         OopsInGenClosure* not_older_gens,
-                         bool do_code_roots,
-                         OopsInGenClosure* older_gens,
-                         KlassClosure* klass_closure) {
-  // General strong roots.
+gen_process_roots(int level,
+                  bool younger_gens_as_roots,
+                  bool activate_scope,
+                  SharedHeap::ScanningOption so,
+                  OopsInGenClosure* not_older_gens,
+                  OopsInGenClosure* weak_roots,
+                  OopsInGenClosure* older_gens,
+                  CLDClosure* cld_closure,
+                  CLDClosure* weak_cld_closure,
+                  CodeBlobClosure* code_closure) {
 
-  if (!do_code_roots) {
-    SharedHeap::process_strong_roots(activate_scope, is_scavenging, so,
-                                     not_older_gens, NULL, klass_closure);
-  } else {
-    bool do_code_marking = (activate_scope || nmethod::oops_do_marking_is_active());
-    CodeBlobToOopClosure code_roots(not_older_gens, /*do_marking=*/ do_code_marking);
-    SharedHeap::process_strong_roots(activate_scope, is_scavenging, so,
-                                     not_older_gens, &code_roots, klass_closure);
-  }
+  // General roots.
+  SharedHeap::process_roots(activate_scope, so,
+                            not_older_gens, weak_roots,
+                            cld_closure, weak_cld_closure,
+                            code_closure);
 
   if (younger_gens_as_roots) {
-    if (!_gen_process_strong_tasks->is_task_claimed(GCH_PS_younger_gens)) {
+    if (!_gen_process_roots_tasks->is_task_claimed(GCH_PS_younger_gens)) {
       for (int i = 0; i < level; i++) {
         not_older_gens->set_generation(_gens[i]);
         _gens[i]->oop_iterate(not_older_gens);
@@ -632,12 +628,42 @@
     older_gens->reset_generation();
   }
 
-  _gen_process_strong_tasks->all_tasks_completed();
+  _gen_process_roots_tasks->all_tasks_completed();
 }
 
-void GenCollectedHeap::gen_process_weak_roots(OopClosure* root_closure,
-                                              CodeBlobClosure* code_roots) {
-  SharedHeap::process_weak_roots(root_closure, code_roots);
+void GenCollectedHeap::
+gen_process_roots(int level,
+                  bool younger_gens_as_roots,
+                  bool activate_scope,
+                  SharedHeap::ScanningOption so,
+                  bool only_strong_roots,
+                  OopsInGenClosure* not_older_gens,
+                  OopsInGenClosure* older_gens,
+                  CLDClosure* cld_closure) {
+
+  const bool is_adjust_phase = !only_strong_roots && !younger_gens_as_roots;
+
+  bool is_moving_collection = false;
+  if (level == 0 || is_adjust_phase) {
+    // young collections are always moving
+    is_moving_collection = true;
+  }
+
+  MarkingCodeBlobClosure mark_code_closure(not_older_gens, is_moving_collection);
+  CodeBlobClosure* code_closure = &mark_code_closure;
+
+  gen_process_roots(level,
+                    younger_gens_as_roots,
+                    activate_scope, so,
+                    not_older_gens, only_strong_roots ? NULL : not_older_gens,
+                    older_gens,
+                    cld_closure, only_strong_roots ? NULL : cld_closure,
+                    code_closure);
+
+}
+
+void GenCollectedHeap::gen_process_weak_roots(OopClosure* root_closure) {
+  SharedHeap::process_weak_roots(root_closure);
   // "Local" "weak" refs
   for (int i = 0; i < _n_gens; i++) {
     _gens[i]->ref_processor()->weak_oops_do(root_closure);
@@ -856,12 +882,6 @@
   }
 }
 
-void GenCollectedHeap::oop_iterate(MemRegion mr, ExtendedOopClosure* cl) {
-  for (int i = 0; i < _n_gens; i++) {
-    _gens[i]->oop_iterate(mr, cl);
-  }
-}
-
 void GenCollectedHeap::object_iterate(ObjectClosure* cl) {
   for (int i = 0; i < _n_gens; i++) {
     _gens[i]->object_iterate(cl);
@@ -1079,7 +1099,7 @@
   guarantee(_n_gens = 2, "Wrong number of generations");
   Generation* old_gen = _gens[1];
   // Start by compacting into same gen.
-  CompactPoint cp(old_gen, NULL, NULL);
+  CompactPoint cp(old_gen);
   old_gen->prepare_for_compaction(&cp);
   Generation* young_gen = _gens[0];
   young_gen->prepare_for_compaction(&cp);
--- a/src/share/vm/memory/genCollectedHeap.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/genCollectedHeap.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -78,9 +78,9 @@
   unsigned int _full_collections_completed;
 
   // Data structure for claiming the (potentially) parallel tasks in
-  // (gen-specific) strong roots processing.
-  SubTasksDone* _gen_process_strong_tasks;
-  SubTasksDone* gen_process_strong_tasks() { return _gen_process_strong_tasks; }
+  // (gen-specific) roots processing.
+  SubTasksDone* _gen_process_roots_tasks;
+  SubTasksDone* gen_process_roots_tasks() { return _gen_process_roots_tasks; }
 
   // In block contents verification, the number of header words to skip
   NOT_PRODUCT(static size_t _skip_header_HeapWords;)
@@ -220,7 +220,6 @@
 
   // Iteration functions.
   void oop_iterate(ExtendedOopClosure* cl);
-  void oop_iterate(MemRegion mr, ExtendedOopClosure* cl);
   void object_iterate(ObjectClosure* cl);
   void safe_object_iterate(ObjectClosure* cl);
   Space* space_containing(const void* addr) const;
@@ -412,26 +411,35 @@
   // The "so" argument determines which of the roots
   // the closure is applied to:
   // "SO_None" does none;
-  // "SO_AllClasses" applies the closure to all entries in the SystemDictionary;
-  // "SO_SystemClasses" to all the "system" classes and loaders;
-  // "SO_Strings" applies the closure to all entries in the StringTable.
-  void gen_process_strong_roots(int level,
-                                bool younger_gens_as_roots,
-                                // The remaining arguments are in an order
-                                // consistent with SharedHeap::process_strong_roots:
-                                bool activate_scope,
-                                bool is_scavenging,
-                                SharedHeap::ScanningOption so,
-                                OopsInGenClosure* not_older_gens,
-                                bool do_code_roots,
-                                OopsInGenClosure* older_gens,
-                                KlassClosure* klass_closure);
+ private:
+  void gen_process_roots(int level,
+                         bool younger_gens_as_roots,
+                         bool activate_scope,
+                         SharedHeap::ScanningOption so,
+                         OopsInGenClosure* not_older_gens,
+                         OopsInGenClosure* weak_roots,
+                         OopsInGenClosure* older_gens,
+                         CLDClosure* cld_closure,
+                         CLDClosure* weak_cld_closure,
+                         CodeBlobClosure* code_closure);
 
-  // Apply "blk" to all the weak roots of the system.  These include
-  // JNI weak roots, the code cache, system dictionary, symbol table,
-  // string table, and referents of reachable weak refs.
-  void gen_process_weak_roots(OopClosure* root_closure,
-                              CodeBlobClosure* code_roots);
+ public:
+  static const bool StrongAndWeakRoots = false;
+  static const bool StrongRootsOnly    = true;
+
+  void gen_process_roots(int level,
+                         bool younger_gens_as_roots,
+                         bool activate_scope,
+                         SharedHeap::ScanningOption so,
+                         bool only_strong_roots,
+                         OopsInGenClosure* not_older_gens,
+                         OopsInGenClosure* older_gens,
+                         CLDClosure* cld_closure);
+
+  // Apply "root_closure" to all the weak roots of the system.
+  // These include JNI weak roots, string table,
+  // and referents of reachable weak refs.
+  void gen_process_weak_roots(OopClosure* root_closure);
 
   // Set the saved marks of generations, if that makes sense.
   // In particular, if any generation might iterate over the oops
--- a/src/share/vm/memory/genMarkSweep.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/genMarkSweep.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -207,15 +207,14 @@
   // Need new claim bits before marking starts.
   ClassLoaderDataGraph::clear_claimed_marks();
 
-  gch->gen_process_strong_roots(level,
-                                false, // Younger gens are not roots.
-                                true,  // activate StrongRootsScope
-                                false, // not scavenging
-                                SharedHeap::SO_SystemClasses,
-                                &follow_root_closure,
-                                true,   // walk code active on stacks
-                                &follow_root_closure,
-                                &follow_klass_closure);
+  gch->gen_process_roots(level,
+                         false, // Younger gens are not roots.
+                         true,  // activate StrongRootsScope
+                         SharedHeap::SO_None,
+                         GenCollectedHeap::StrongRootsOnly,
+                         &follow_root_closure,
+                         &follow_root_closure,
+                         &follow_cld_closure);
 
   // Process reference objects found during marking
   {
@@ -293,22 +292,16 @@
   // are run.
   adjust_pointer_closure.set_orig_generation(gch->get_gen(level));
 
-  gch->gen_process_strong_roots(level,
-                                false, // Younger gens are not roots.
-                                true,  // activate StrongRootsScope
-                                false, // not scavenging
-                                SharedHeap::SO_AllClasses,
-                                &adjust_pointer_closure,
-                                false, // do not walk code
-                                &adjust_pointer_closure,
-                                &adjust_klass_closure);
+  gch->gen_process_roots(level,
+                         false, // Younger gens are not roots.
+                         true,  // activate StrongRootsScope
+                         SharedHeap::SO_AllCodeCache,
+                         GenCollectedHeap::StrongAndWeakRoots,
+                         &adjust_pointer_closure,
+                         &adjust_pointer_closure,
+                         &adjust_cld_closure);
 
-  // Now adjust pointers in remaining weak roots.  (All of which should
-  // have been cleared if they pointed to non-surviving objects.)
-  CodeBlobToOopClosure adjust_code_pointer_closure(&adjust_pointer_closure,
-                                                   /*do_marking=*/ false);
-  gch->gen_process_weak_roots(&adjust_pointer_closure,
-                              &adjust_code_pointer_closure);
+  gch->gen_process_weak_roots(&adjust_pointer_closure);
 
   adjust_marks();
   GenAdjustPointersClosure blk;
--- a/src/share/vm/memory/generation.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/generation.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -297,22 +297,16 @@
 
 class GenerationOopIterateClosure : public SpaceClosure {
  public:
-  ExtendedOopClosure* cl;
-  MemRegion mr;
+  ExtendedOopClosure* _cl;
   virtual void do_space(Space* s) {
-    s->oop_iterate(mr, cl);
+    s->oop_iterate(_cl);
   }
-  GenerationOopIterateClosure(ExtendedOopClosure* _cl, MemRegion _mr) :
-    cl(_cl), mr(_mr) {}
+  GenerationOopIterateClosure(ExtendedOopClosure* cl) :
+    _cl(cl) {}
 };
 
 void Generation::oop_iterate(ExtendedOopClosure* cl) {
-  GenerationOopIterateClosure blk(cl, _reserved);
-  space_iterate(&blk);
-}
-
-void Generation::oop_iterate(MemRegion mr, ExtendedOopClosure* cl) {
-  GenerationOopIterateClosure blk(cl, mr);
+  GenerationOopIterateClosure blk(cl);
   space_iterate(&blk);
 }
 
--- a/src/share/vm/memory/generation.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/generation.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -543,10 +543,6 @@
   // generation, calling "cl.do_oop" on each.
   virtual void oop_iterate(ExtendedOopClosure* cl);
 
-  // Same as above, restricted to the intersection of a memory region and
-  // the generation.
-  virtual void oop_iterate(MemRegion mr, ExtendedOopClosure* cl);
-
   // Iterate over all objects in the generation, calling "cl.do_object" on
   // each.
   virtual void object_iterate(ObjectClosure* cl);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/guardedMemory.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+#include "precompiled.hpp"
+#include "memory/allocation.hpp"
+#include "memory/allocation.inline.hpp"
+#include "memory/guardedMemory.hpp"
+#include "runtime/os.hpp"
+
+void* GuardedMemory::wrap_copy(const void* ptr, const size_t len, const void* tag) {
+  size_t total_sz = GuardedMemory::get_total_size(len);
+  void* outerp = os::malloc(total_sz, mtInternal);
+  if (outerp != NULL) {
+    GuardedMemory guarded(outerp, len, tag);
+    void* innerp = guarded.get_user_ptr();
+    memcpy(innerp, ptr, len);
+    return innerp;
+  }
+  return NULL; // OOM
+}
+
+bool GuardedMemory::free_copy(void* p) {
+  if (p == NULL) {
+    return true;
+  }
+  GuardedMemory guarded((u_char*)p);
+  bool verify_ok = guarded.verify_guards();
+
+  /* always attempt to free, pass problem on to any nested memchecker */
+  os::free(guarded.release_for_freeing());
+
+  return verify_ok;
+}
+
+void GuardedMemory::print_on(outputStream* st) const {
+  if (_base_addr == NULL) {
+    st->print_cr("GuardedMemory(" PTR_FORMAT ") not associated to any memory", p2i(this));
+    return;
+  }
+  st->print_cr("GuardedMemory(" PTR_FORMAT ") base_addr=" PTR_FORMAT
+      " tag=" PTR_FORMAT " user_size=" SIZE_FORMAT " user_data=" PTR_FORMAT,
+      p2i(this), p2i(_base_addr), p2i(get_tag()), get_user_size(), p2i(get_user_ptr()));
+
+  Guard* guard = get_head_guard();
+  st->print_cr("  Header guard @" PTR_FORMAT " is %s", p2i(guard), (guard->verify() ? "OK" : "BROKEN"));
+  guard = get_tail_guard();
+  st->print_cr("  Trailer guard @" PTR_FORMAT " is %s", p2i(guard), (guard->verify() ? "OK" : "BROKEN"));
+
+  u_char udata = *get_user_ptr();
+  switch (udata) {
+  case uninitBlockPad:
+    st->print_cr("  User data appears unused");
+    break;
+  case freeBlockPad:
+    st->print_cr("  User data appears to have been freed");
+    break;
+  default:
+    st->print_cr("  User data appears to be in use");
+    break;
+  }
+}
+
+// test code...
+
+#ifndef PRODUCT
+
+static void guarded_memory_test_check(void* p, size_t sz, void* tag) {
+  assert(p != NULL, "NULL pointer given to check");
+  u_char* c = (u_char*) p;
+  GuardedMemory guarded(c);
+  assert(guarded.get_tag() == tag, "Tag is not the same as supplied");
+  assert(guarded.get_user_ptr() == c, "User pointer is not the same as supplied");
+  assert(guarded.get_user_size() == sz, "User size is not the same as supplied");
+  assert(guarded.verify_guards(), "Guard broken");
+}
+
+void GuardedMemory::test_guarded_memory() {
+  // Test the basic characteristics...
+  size_t total_sz = GuardedMemory::get_total_size(1);
+  assert(total_sz > 1 && total_sz >= (sizeof(GuardHeader) + 1 + sizeof(Guard)), "Unexpected size");
+  u_char* basep = (u_char*) os::malloc(total_sz, mtInternal);
+
+  GuardedMemory guarded(basep, 1, (void*)0xf000f000);
+
+  assert(*basep == badResourceValue, "Expected guard in the form of badResourceValue");
+  u_char* userp = guarded.get_user_ptr();
+  assert(*userp == uninitBlockPad, "Expected uninitialized data in the form of uninitBlockPad");
+  guarded_memory_test_check(userp, 1, (void*)0xf000f000);
+
+  void* freep = guarded.release_for_freeing();
+  assert((u_char*)freep == basep, "Expected the same pointer guard was ");
+  assert(*userp == freeBlockPad, "Expected user data to be free block padded");
+  assert(!guarded.verify_guards(), "Expected failed");
+  os::free(freep);
+
+  // Test a number of odd sizes...
+  size_t sz = 0;
+  do {
+    void* p = os::malloc(GuardedMemory::get_total_size(sz), mtInternal);
+    void* up = guarded.wrap_with_guards(p, sz, (void*)1);
+    memset(up, 0, sz);
+    guarded_memory_test_check(up, sz, (void*)1);
+    os::free(guarded.release_for_freeing());
+    sz = (sz << 4) + 1;
+  } while (sz < (256 * 1024));
+
+  // Test buffer overrun into head...
+  basep = (u_char*) os::malloc(GuardedMemory::get_total_size(1), mtInternal);
+  guarded.wrap_with_guards(basep, 1);
+  *basep = 0;
+  assert(!guarded.verify_guards(), "Expected failure");
+  os::free(basep);
+
+  // Test buffer overrun into tail with a number of odd sizes...
+  sz = 1;
+  do {
+    void* p = os::malloc(GuardedMemory::get_total_size(sz), mtInternal);
+    void* up = guarded.wrap_with_guards(p, sz, (void*)1);
+    memset(up, 0, sz + 1); // Buffer-overwrite (within guard)
+    assert(!guarded.verify_guards(), "Guard was not broken as expected");
+    os::free(guarded.release_for_freeing());
+    sz = (sz << 4) + 1;
+  } while (sz < (256 * 1024));
+
+  // Test wrap_copy/wrap_free...
+  assert(GuardedMemory::free_copy(NULL), "Expected free NULL to be OK");
+
+  const char* str = "Check my bounds out";
+  size_t str_sz = strlen(str) + 1;
+  char* str_copy = (char*) GuardedMemory::wrap_copy(str, str_sz);
+  guarded_memory_test_check(str_copy, str_sz, NULL);
+  assert(strcmp(str, str_copy) == 0, "Not identical copy");
+  assert(GuardedMemory::free_copy(str_copy), "Free copy failed to verify");
+
+  void* no_data = NULL;
+  void* no_data_copy = GuardedMemory::wrap_copy(no_data, 0);
+  assert(GuardedMemory::free_copy(no_data_copy), "Expected valid guards even for no data copy");
+}
+
+#endif // !PRODUCT
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/guardedMemory.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -0,0 +1,326 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_MEMORY_GUARDED_MEMORY_HPP
+#define SHARE_VM_MEMORY_GUARDED_MEMORY_HPP
+
+#include "memory/allocation.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+/**
+ * Guarded memory for detecting buffer overrun.
+ *
+ * Allows allocations to be wrapped with padded bytes of a known byte pattern,
+ * that is a "guard". Guard patterns may be verified to detect buffer overruns.
+ *
+ * Primarily used by "debug malloc" and "checked JNI".
+ *
+ * Memory layout:
+ *
+ * |Offset             | Content              | Description    |
+ * |------------------------------------------------------------
+ * |base_addr          | 0xABABABABABABABAB   | Head guard     |
+ * |+16                | <size_t:user_size>   | User data size |
+ * |+sizeof(uintptr_t) | <tag>                | Tag word       |
+ * |+sizeof(void*)     | 0xF1 <user_data> (   | User data      |
+ * |+user_size         | 0xABABABABABABABAB   | Tail guard     |
+ * -------------------------------------------------------------
+ *
+ * Where:
+ *  - guard padding uses "badResourceValue" (0xAB)
+ *  - tag word is general purpose
+ *  - user data
+ *    -- initially padded with "uninitBlockPad" (0xF1),
+ *    -- to "freeBlockPad" (0xBA), when freed
+ *
+ * Usage:
+ *
+ * * Allocations: one may wrap allocations with guard memory:
+ * <code>
+ *   Thing* alloc_thing() {
+ *     void* mem = user_alloc_fn(GuardedMemory::get_total_size(sizeof(thing)));
+ *     GuardedMemory guarded(mem, sizeof(thing));
+ *     return (Thing*) guarded.get_user_ptr();
+ *   }
+ * </code>
+ * * Verify: memory guards are still in tact
+ * <code>
+ *   bool verify_thing(Thing* thing) {
+ *     GuardedMemory guarded((void*)thing);
+ *     return guarded.verify_guards();
+ *   }
+ * </code>
+ * * Free: one may mark bytes as freed (further debugging support)
+ * <code>
+ *   void free_thing(Thing* thing) {
+ *    GuardedMemory guarded((void*)thing);
+ *    assert(guarded.verify_guards(), "Corrupt thing");
+ *    user_free_fn(guards.release_for_freeing();
+ *   }
+ * </code>
+ */
+class GuardedMemory : StackObj { // Wrapper on stack
+
+  // Private inner classes for memory layout...
+
+protected:
+
+  /**
+   * Guard class for header and trailer known pattern to test for overwrites.
+   */
+  class Guard { // Class for raw memory (no vtbl allowed)
+    friend class GuardedMemory;
+   protected:
+    enum {
+      GUARD_SIZE = 16
+    };
+
+    u_char _guard[GUARD_SIZE];
+
+   public:
+
+    void build() {
+      u_char* c = _guard; // Possibly unaligned if tail guard
+      u_char* end = c + GUARD_SIZE;
+      while (c < end) {
+        *c = badResourceValue;
+        c++;
+      }
+    }
+
+    bool verify() const {
+      u_char* c = (u_char*) _guard;
+      u_char* end = c + GUARD_SIZE;
+      while (c < end) {
+        if (*c != badResourceValue) {
+          return false;
+        }
+        c++;
+      }
+      return true;
+    }
+
+  }; // GuardedMemory::Guard
+
+  /**
+   * Header guard and size
+   */
+  class GuardHeader : Guard {
+    friend class GuardedMemory;
+   protected:
+    // Take care in modifying fields here, will effect alignment
+    // e.g. x86 ABI 16 byte stack alignment
+    union {
+      uintptr_t __unused_full_word1;
+      size_t _user_size;
+    };
+    void* _tag;
+   public:
+    void set_user_size(const size_t usz) { _user_size = usz; }
+    size_t get_user_size() const { return _user_size; }
+
+    void set_tag(const void* tag) { _tag = (void*) tag; }
+    void* get_tag() const { return _tag; }
+
+  }; // GuardedMemory::GuardHeader
+
+  // Guarded Memory...
+
+ protected:
+  u_char* _base_addr;
+
+ public:
+
+  /**
+   * Create new guarded memory.
+   *
+   * Wraps, starting at the given "base_ptr" with guards. Use "get_user_ptr()"
+   * to return a pointer suitable for user data.
+   *
+   * @param base_ptr  allocation wishing to be wrapped, must be at least "GuardedMemory::get_total_size()" bytes.
+   * @param user_size the size of the user data to be wrapped.
+   * @param tag       optional general purpose tag.
+   */
+  GuardedMemory(void* base_ptr, const size_t user_size, const void* tag = NULL) {
+    wrap_with_guards(base_ptr, user_size, tag);
+  }
+
+  /**
+   * Wrap existing guarded memory.
+   *
+   * To use this constructor, one must have created guarded memory with
+   * "GuardedMemory(void*, size_t, void*)" (or indirectly via helper, e.g. "wrap_copy()").
+   *
+   * @param user_p  existing wrapped memory.
+   */
+  GuardedMemory(void* userp) {
+    u_char* user_ptr = (u_char*) userp;
+    assert((uintptr_t)user_ptr > (sizeof(GuardHeader) + 0x1000), "Invalid pointer");
+    _base_addr = (user_ptr - sizeof(GuardHeader));
+  }
+
+  /**
+   * Create new guarded memory.
+   *
+   * Wraps, starting at the given "base_ptr" with guards. Allows reuse of stack allocated helper.
+   *
+   * @param base_ptr  allocation wishing to be wrapped, must be at least "GuardedMemory::get_total_size()" bytes.
+   * @param user_size the size of the user data to be wrapped.
+   * @param tag       optional general purpose tag.
+   *
+   * @return user data pointer (inner pointer to supplied "base_ptr").
+   */
+  void* wrap_with_guards(void* base_ptr, size_t user_size, const void* tag = NULL) {
+    assert(base_ptr != NULL, "Attempt to wrap NULL with memory guard");
+    _base_addr = (u_char*)base_ptr;
+    get_head_guard()->build();
+    get_head_guard()->set_user_size(user_size);
+    get_tail_guard()->build();
+    set_tag(tag);
+    set_user_bytes(uninitBlockPad);
+    assert(verify_guards(), "Expected valid memory guards");
+    return get_user_ptr();
+  }
+
+  /**
+   * Verify head and tail guards.
+   *
+   * @return true if guards are intact, false would indicate a buffer overrun.
+   */
+  bool verify_guards() const {
+    if (_base_addr != NULL) {
+      return (get_head_guard()->verify() && get_tail_guard()->verify());
+    }
+    return false;
+  }
+
+  /**
+   * Set the general purpose tag.
+   *
+   * @param tag general purpose tag.
+   */
+  void set_tag(const void* tag) { get_head_guard()->set_tag(tag); }
+
+  /**
+   * Return the general purpose tag.
+   *
+   * @return the general purpose tag, defaults to NULL.
+   */
+  void* get_tag() const { return get_head_guard()->get_tag(); }
+
+  /**
+   * Return the size of the user data.
+   *
+   * @return the size of the user data.
+   */
+  size_t get_user_size() const {
+    assert(_base_addr, "Not wrapping any memory");
+    return get_head_guard()->get_user_size();
+  }
+
+  /**
+   * Return the user data pointer.
+   *
+   * @return the user data pointer.
+   */
+  u_char* get_user_ptr() const {
+    assert(_base_addr, "Not wrapping any memory");
+    return _base_addr + sizeof(GuardHeader);
+  }
+
+  /**
+   * Release the wrapped pointer for resource freeing.
+   *
+   * Pads the user data with "freeBlockPad", and dis-associates the helper.
+   *
+   * @return the original base pointer used to wrap the data.
+   */
+  void* release_for_freeing() {
+    set_user_bytes(freeBlockPad);
+    return release();
+  }
+
+  /**
+   * Dis-associate the help from the original base address.
+   *
+   * @return the original base pointer used to wrap the data.
+   */
+  void* release() {
+    void* p = (void*) _base_addr;
+    _base_addr = NULL;
+    return p;
+  }
+
+  virtual void print_on(outputStream* st) const;
+
+ protected:
+  GuardHeader*  get_head_guard() const { return (GuardHeader*) _base_addr; }
+  Guard*        get_tail_guard() const { return (Guard*) (get_user_ptr() + get_user_size()); };
+  void set_user_bytes(u_char ch) {
+    memset(get_user_ptr(), ch, get_user_size());
+  }
+
+public:
+  /**
+   * Return the total size required for wrapping the given user size.
+   *
+   * @return the total size required for wrapping the given user size.
+   */
+  static size_t get_total_size(size_t user_size) {
+    size_t total_size = sizeof(GuardHeader) + user_size + sizeof(Guard);
+    assert(total_size > user_size, "Unexpected wrap-around");
+    return total_size;
+  }
+
+  // Helper functions...
+
+  /**
+   * Wrap a copy of size "len" of "ptr".
+   *
+   * @param ptr the memory to be copied
+   * @param len the length of the copy
+   * @param tag optional general purpose tag (see GuardedMemory::get_tag())
+   *
+   * @return guarded wrapped memory pointer to the user area, or NULL if OOM.
+   */
+  static void* wrap_copy(const void* p, const size_t len, const void* tag = NULL);
+
+  /**
+   * Free wrapped copy.
+   *
+   * Frees memory copied with "wrap_copy()".
+   *
+   * @param p memory returned by "wrap_copy()".
+   *
+   * @return true if guards were verified as intact. false indicates a buffer overrun.
+   */
+  static bool free_copy(void* p);
+
+  // Testing...
+#ifndef PRODUCT
+  static void test_guarded_memory(void);
+#endif
+}; // GuardedMemory
+
+#endif // SHARE_VM_MEMORY_GUARDED_MEMORY_HPP
--- a/src/share/vm/memory/iterator.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/iterator.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -27,6 +27,7 @@
 #include "oops/oop.inline.hpp"
 
 void KlassToOopClosure::do_klass(Klass* k) {
+  assert(_oop_closure != NULL, "Not initialized?");
   k->oops_do(_oop_closure);
 }
 
@@ -34,6 +35,10 @@
   cld->oops_do(_oop_closure, &_klass_closure, _must_claim_cld);
 }
 
+void CLDToKlassAndOopClosure::do_cld(ClassLoaderData* cld) {
+  cld->oops_do(_oop_closure, _klass_closure, _must_claim_cld);
+}
+
 void ObjectToOopClosure::do_object(oop obj) {
   obj->oop_iterate(_cl);
 }
@@ -42,6 +47,20 @@
   ShouldNotCallThis();
 }
 
+void CodeBlobToOopClosure::do_nmethod(nmethod* nm) {
+  nm->oops_do(_cl);
+  if (_fix_relocations) {
+    nm->fix_oop_relocations();
+  }
+}
+
+void CodeBlobToOopClosure::do_code_blob(CodeBlob* cb) {
+  nmethod* nm = cb->as_nmethod_or_null();
+  if (nm != NULL) {
+    do_nmethod(nm);
+  }
+}
+
 MarkingCodeBlobClosure::MarkScope::MarkScope(bool activate)
   : _active(activate)
 {
@@ -54,32 +73,7 @@
 
 void MarkingCodeBlobClosure::do_code_blob(CodeBlob* cb) {
   nmethod* nm = cb->as_nmethod_or_null();
-  if (nm == NULL)  return;
-  if (!nm->test_set_oops_do_mark()) {
-    NOT_PRODUCT(if (TraceScavenge)  nm->print_on(tty, "oops_do, 1st visit\n"));
-    do_newly_marked_nmethod(nm);
-  } else {
-    NOT_PRODUCT(if (TraceScavenge)  nm->print_on(tty, "oops_do, skipped on 2nd visit\n"));
+  if (nm != NULL && !nm->test_set_oops_do_mark()) {
+    do_nmethod(nm);
   }
 }
-
-void CodeBlobToOopClosure::do_newly_marked_nmethod(nmethod* nm) {
-  nm->oops_do(_cl, /*allow_zombie=*/ false);
-}
-
-void CodeBlobToOopClosure::do_code_blob(CodeBlob* cb) {
-  if (!_do_marking) {
-    nmethod* nm = cb->as_nmethod_or_null();
-    NOT_PRODUCT(if (TraceScavenge && Verbose && nm != NULL)  nm->print_on(tty, "oops_do, unmarked visit\n"));
-    // This assert won't work, since there are lots of mini-passes
-    // (mostly in debug mode) that co-exist with marking phases.
-    //assert(!(cb->is_nmethod() && ((nmethod*)cb)->test_oops_do_mark()), "found marked nmethod during mark-free phase");
-    if (nm != NULL) {
-      nm->oops_do(_cl);
-    }
-  } else {
-    MarkingCodeBlobClosure::do_code_blob(cb);
-  }
-}
-
-
--- a/src/share/vm/memory/iterator.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/iterator.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -84,8 +84,8 @@
   //
   // Providing default implementations of the _nv functions unfortunately
   // removes the compile-time safeness, but reduces the clutter for the
-  // ExtendedOopClosures that don't need to walk the metadata. Currently,
-  // only CMS needs these.
+  // ExtendedOopClosures that don't need to walk the metadata.
+  // Currently, only CMS and G1 need these.
 
   virtual bool do_metadata() { return do_metadata_nv(); }
   bool do_metadata_v()       { return do_metadata(); }
@@ -128,17 +128,33 @@
   virtual void do_klass(Klass* k) = 0;
 };
 
+class CLDClosure : public Closure {
+ public:
+  virtual void do_cld(ClassLoaderData* cld) = 0;
+};
+
 class KlassToOopClosure : public KlassClosure {
+  friend class MetadataAwareOopClosure;
+  friend class MetadataAwareOopsInGenClosure;
+
   OopClosure* _oop_closure;
+
+  // Used when _oop_closure couldn't be set in an initialization list.
+  void initialize(OopClosure* oop_closure) {
+    assert(_oop_closure == NULL, "Should only be called once");
+    _oop_closure = oop_closure;
+  }
+
  public:
-  KlassToOopClosure(OopClosure* oop_closure) : _oop_closure(oop_closure) {}
+  KlassToOopClosure(OopClosure* oop_closure = NULL) : _oop_closure(oop_closure) {}
+
   virtual void do_klass(Klass* k);
 };
 
-class CLDToOopClosure {
-  OopClosure* _oop_closure;
+class CLDToOopClosure : public CLDClosure {
+  OopClosure*       _oop_closure;
   KlassToOopClosure _klass_closure;
-  bool _must_claim_cld;
+  bool              _must_claim_cld;
 
  public:
   CLDToOopClosure(OopClosure* oop_closure, bool must_claim_cld = true) :
@@ -149,6 +165,46 @@
   void do_cld(ClassLoaderData* cld);
 };
 
+class CLDToKlassAndOopClosure : public CLDClosure {
+  friend class SharedHeap;
+  friend class G1CollectedHeap;
+ protected:
+  OopClosure*   _oop_closure;
+  KlassClosure* _klass_closure;
+  bool          _must_claim_cld;
+ public:
+  CLDToKlassAndOopClosure(KlassClosure* klass_closure,
+                          OopClosure* oop_closure,
+                          bool must_claim_cld) :
+                              _oop_closure(oop_closure),
+                              _klass_closure(klass_closure),
+                              _must_claim_cld(must_claim_cld) {}
+  void do_cld(ClassLoaderData* cld);
+};
+
+// The base class for all concurrent marking closures,
+// that participates in class unloading.
+// It's used to proxy through the metadata to the oops defined in them.
+class MetadataAwareOopClosure: public ExtendedOopClosure {
+  KlassToOopClosure _klass_closure;
+
+ public:
+  MetadataAwareOopClosure() : ExtendedOopClosure() {
+    _klass_closure.initialize(this);
+  }
+  MetadataAwareOopClosure(ReferenceProcessor* rp) : ExtendedOopClosure(rp) {
+    _klass_closure.initialize(this);
+  }
+
+  virtual bool do_metadata()    { return do_metadata_nv(); }
+  inline  bool do_metadata_nv() { return true; }
+
+  virtual void do_klass(Klass* k);
+  void do_klass_nv(Klass* k);
+
+  virtual void do_class_loader_data(ClassLoaderData* cld);
+};
+
 // ObjectClosure is used for iterating through an object space
 
 class ObjectClosure : public Closure {
@@ -172,19 +228,6 @@
   ObjectToOopClosure(ExtendedOopClosure* cl) : _cl(cl) {}
 };
 
-// A version of ObjectClosure with "memory" (see _previous_address below)
-class UpwardsObjectClosure: public BoolObjectClosure {
-  HeapWord* _previous_address;
- public:
-  UpwardsObjectClosure() : _previous_address(NULL) { }
-  void set_previous(HeapWord* addr) { _previous_address = addr; }
-  HeapWord* previous()              { return _previous_address; }
-  // A return value of "true" can be used by the caller to decide
-  // if this object's end should *NOT* be recorded in
-  // _previous_address above.
-  virtual bool do_object_bm(oop obj, MemRegion mr) = 0;
-};
-
 // A version of ObjectClosure that is expected to be robust
 // in the face of possibly uninitialized objects.
 class ObjectClosureCareful : public ObjectClosure {
@@ -240,14 +283,26 @@
   virtual void do_code_blob(CodeBlob* cb) = 0;
 };
 
-
-class MarkingCodeBlobClosure : public CodeBlobClosure {
+// Applies an oop closure to all ref fields in code blobs
+// iterated over in an object iteration.
+class CodeBlobToOopClosure : public CodeBlobClosure {
+  OopClosure* _cl;
+  bool _fix_relocations;
+ protected:
+  void do_nmethod(nmethod* nm);
  public:
+  CodeBlobToOopClosure(OopClosure* cl, bool fix_relocations) : _cl(cl), _fix_relocations(fix_relocations) {}
+  virtual void do_code_blob(CodeBlob* cb);
+
+  const static bool FixRelocations = true;
+};
+
+class MarkingCodeBlobClosure : public CodeBlobToOopClosure {
+ public:
+  MarkingCodeBlobClosure(OopClosure* cl, bool fix_relocations) : CodeBlobToOopClosure(cl, fix_relocations) {}
   // Called for each code blob, but at most once per unique blob.
-  virtual void do_newly_marked_nmethod(nmethod* nm) = 0;
 
   virtual void do_code_blob(CodeBlob* cb);
-    // = { if (!nmethod(cb)->test_set_oops_do_mark())  do_newly_marked_nmethod(cb); }
 
   class MarkScope : public StackObj {
   protected:
@@ -260,23 +315,6 @@
   };
 };
 
-
-// Applies an oop closure to all ref fields in code blobs
-// iterated over in an object iteration.
-class CodeBlobToOopClosure: public MarkingCodeBlobClosure {
-  OopClosure* _cl;
-  bool _do_marking;
-public:
-  virtual void do_newly_marked_nmethod(nmethod* cb);
-    // = { cb->oops_do(_cl); }
-  virtual void do_code_blob(CodeBlob* cb);
-    // = { if (_do_marking)  super::do_code_blob(cb); else cb->oops_do(_cl); }
-  CodeBlobToOopClosure(OopClosure* cl, bool do_marking)
-    : _cl(cl), _do_marking(do_marking) {}
-};
-
-
-
 // MonitorClosure is used for iterating over monitors in the monitors cache
 
 class ObjectMonitor;
@@ -345,4 +383,16 @@
   }
 };
 
+
+// Helper defines for ExtendOopClosure
+
+#define if_do_metadata_checked(closure, nv_suffix)       \
+  /* Make sure the non-virtual and the virtual versions match. */     \
+  assert(closure->do_metadata##nv_suffix() == closure->do_metadata(), \
+      "Inconsistency in do_metadata");                                \
+  if (closure->do_metadata##nv_suffix())
+
+#define assert_should_ignore_metadata(closure, nv_suffix)                                  \
+  assert(!closure->do_metadata##nv_suffix(), "Code to handle metadata is not implemented")
+
 #endif // SHARE_VM_MEMORY_ITERATOR_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/iterator.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_MEMORY_ITERATOR_INLINE_HPP
+#define SHARE_VM_MEMORY_ITERATOR_INLINE_HPP
+
+#include "classfile/classLoaderData.hpp"
+#include "memory/iterator.hpp"
+#include "oops/klass.hpp"
+#include "utilities/debug.hpp"
+
+inline void MetadataAwareOopClosure::do_class_loader_data(ClassLoaderData* cld) {
+  assert(_klass_closure._oop_closure == this, "Must be");
+
+  bool claim = true;  // Must claim the class loader data before processing.
+  cld->oops_do(_klass_closure._oop_closure, &_klass_closure, claim);
+}
+
+inline void MetadataAwareOopClosure::do_klass_nv(Klass* k) {
+  ClassLoaderData* cld = k->class_loader_data();
+  do_class_loader_data(cld);
+}
+
+inline void MetadataAwareOopClosure::do_klass(Klass* k)       { do_klass_nv(k); }
+
+#endif // SHARE_VM_MEMORY_ITERATOR_INLINE_HPP
--- a/src/share/vm/memory/metadataFactory.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/metadataFactory.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -25,6 +25,7 @@
 #ifndef SHARE_VM_MEMORY_METADATAFACTORY_HPP
 #define SHARE_VM_MEMORY_METADATAFACTORY_HPP
 
+#include "classfile/classLoaderData.hpp"
 #include "utilities/array.hpp"
 #include "utilities/exceptions.hpp"
 #include "utilities/globalDefinitions.hpp"
--- a/src/share/vm/memory/sharedHeap.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/sharedHeap.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -29,6 +29,7 @@
 #include "gc_interface/collectedHeap.inline.hpp"
 #include "memory/sharedHeap.hpp"
 #include "oops/oop.inline.hpp"
+#include "runtime/atomic.inline.hpp"
 #include "runtime/fprofiler.hpp"
 #include "runtime/java.hpp"
 #include "services/management.hpp"
@@ -39,8 +40,8 @@
 
 SharedHeap* SharedHeap::_sh;
 
-// The set of potentially parallel tasks in strong root scanning.
-enum SH_process_strong_roots_tasks {
+// The set of potentially parallel tasks in root scanning.
+enum SH_process_roots_tasks {
   SH_PS_Universe_oops_do,
   SH_PS_JNIHandles_oops_do,
   SH_PS_ObjectSynchronizer_oops_do,
@@ -58,6 +59,7 @@
   CollectedHeap(),
   _collector_policy(policy_),
   _rem_set(NULL),
+  _strong_roots_scope(NULL),
   _strong_roots_parity(0),
   _process_strong_tasks(new SubTasksDone(SH_PS_NumElements)),
   _workers(NULL)
@@ -114,6 +116,19 @@
 static AssertNonScavengableClosure assert_is_non_scavengable_closure;
 #endif
 
+SharedHeap::StrongRootsScope* SharedHeap::active_strong_roots_scope() const {
+  return _strong_roots_scope;
+}
+void SharedHeap::register_strong_roots_scope(SharedHeap::StrongRootsScope* scope) {
+  assert(_strong_roots_scope == NULL, "Should only have one StrongRootsScope active");
+  assert(scope != NULL, "Illegal argument");
+  _strong_roots_scope = scope;
+}
+void SharedHeap::unregister_strong_roots_scope(SharedHeap::StrongRootsScope* scope) {
+  assert(_strong_roots_scope == scope, "Wrong scope unregistered");
+  _strong_roots_scope = NULL;
+}
+
 void SharedHeap::change_strong_roots_parity() {
   // Also set the new collection parity.
   assert(_strong_roots_parity >= 0 && _strong_roots_parity <= 2,
@@ -124,122 +139,173 @@
          "Not in range.");
 }
 
-SharedHeap::StrongRootsScope::StrongRootsScope(SharedHeap* outer, bool activate)
-  : MarkScope(activate)
+SharedHeap::StrongRootsScope::StrongRootsScope(SharedHeap* heap, bool activate)
+  : MarkScope(activate), _sh(heap), _n_workers_done_with_threads(0)
 {
   if (_active) {
-    outer->change_strong_roots_parity();
+    _sh->register_strong_roots_scope(this);
+    _sh->change_strong_roots_parity();
     // Zero the claimed high water mark in the StringTable
     StringTable::clear_parallel_claimed_index();
   }
 }
 
 SharedHeap::StrongRootsScope::~StrongRootsScope() {
-  // nothing particular
+  if (_active) {
+    _sh->unregister_strong_roots_scope(this);
+  }
+}
+
+Monitor* SharedHeap::StrongRootsScope::_lock = new Monitor(Mutex::leaf, "StrongRootsScope lock", false);
+
+void SharedHeap::StrongRootsScope::mark_worker_done_with_threads(uint n_workers) {
+  // The Thread work barrier is only needed by G1 Class Unloading.
+  // No need to use the barrier if this is single-threaded code.
+  if (UseG1GC && ClassUnloadingWithConcurrentMark && n_workers > 0) {
+    uint new_value = (uint)Atomic::add(1, &_n_workers_done_with_threads);
+    if (new_value == n_workers) {
+      // This thread is last. Notify the others.
+      MonitorLockerEx ml(_lock, Mutex::_no_safepoint_check_flag);
+      _lock->notify_all();
+    }
+  }
 }
 
-void SharedHeap::process_strong_roots(bool activate_scope,
-                                      bool is_scavenging,
-                                      ScanningOption so,
-                                      OopClosure* roots,
-                                      CodeBlobClosure* code_roots,
-                                      KlassClosure* klass_closure) {
+void SharedHeap::StrongRootsScope::wait_until_all_workers_done_with_threads(uint n_workers) {
+  assert(UseG1GC,                          "Currently only used by G1");
+  assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading");
+
+  // No need to use the barrier if this is single-threaded code.
+  if (n_workers > 0 && (uint)_n_workers_done_with_threads != n_workers) {
+    MonitorLockerEx ml(_lock, Mutex::_no_safepoint_check_flag);
+    while ((uint)_n_workers_done_with_threads != n_workers) {
+      _lock->wait(Mutex::_no_safepoint_check_flag, 0, false);
+    }
+  }
+}
+
+void SharedHeap::process_roots(bool activate_scope,
+                               ScanningOption so,
+                               OopClosure* strong_roots,
+                               OopClosure* weak_roots,
+                               CLDClosure* strong_cld_closure,
+                               CLDClosure* weak_cld_closure,
+                               CodeBlobClosure* code_roots) {
   StrongRootsScope srs(this, activate_scope);
 
-  // General strong roots.
+  // General roots.
   assert(_strong_roots_parity != 0, "must have called prologue code");
+  assert(code_roots != NULL, "code root closure should always be set");
   // _n_termination for _process_strong_tasks should be set up stream
   // in a method not running in a GC worker.  Otherwise the GC worker
   // could be trying to change the termination condition while the task
   // is executing in another GC worker.
+
+  // Iterating over the CLDG and the Threads are done early to allow G1 to
+  // first process the strong CLDs and nmethods and then, after a barrier,
+  // let the thread process the weak CLDs and nmethods.
+
+  if (!_process_strong_tasks->is_task_claimed(SH_PS_ClassLoaderDataGraph_oops_do)) {
+    ClassLoaderDataGraph::roots_cld_do(strong_cld_closure, weak_cld_closure);
+  }
+
+  // Some CLDs contained in the thread frames should be considered strong.
+  // Don't process them if they will be processed during the ClassLoaderDataGraph phase.
+  CLDClosure* roots_from_clds_p = (strong_cld_closure != weak_cld_closure) ? strong_cld_closure : NULL;
+  // Only process code roots from thread stacks if we aren't visiting the entire CodeCache anyway
+  CodeBlobClosure* roots_from_code_p = (so & SO_AllCodeCache) ? NULL : code_roots;
+
+  Threads::possibly_parallel_oops_do(strong_roots, roots_from_clds_p, roots_from_code_p);
+
+  // This is the point where this worker thread will not find more strong CLDs/nmethods.
+  // Report this so G1 can synchronize the strong and weak CLDs/nmethods processing.
+  active_strong_roots_scope()->mark_worker_done_with_threads(n_par_threads());
+
   if (!_process_strong_tasks->is_task_claimed(SH_PS_Universe_oops_do)) {
-    Universe::oops_do(roots);
+    Universe::oops_do(strong_roots);
   }
   // Global (strong) JNI handles
   if (!_process_strong_tasks->is_task_claimed(SH_PS_JNIHandles_oops_do))
-    JNIHandles::oops_do(roots);
-
-  // All threads execute this; the individual threads are task groups.
-  CLDToOopClosure roots_from_clds(roots);
-  CLDToOopClosure* roots_from_clds_p = (is_scavenging ? NULL : &roots_from_clds);
-  if (CollectedHeap::use_parallel_gc_threads()) {
-    Threads::possibly_parallel_oops_do(roots, roots_from_clds_p, code_roots);
-  } else {
-    Threads::oops_do(roots, roots_from_clds_p, code_roots);
-  }
+    JNIHandles::oops_do(strong_roots);
 
   if (!_process_strong_tasks-> is_task_claimed(SH_PS_ObjectSynchronizer_oops_do))
-    ObjectSynchronizer::oops_do(roots);
+    ObjectSynchronizer::oops_do(strong_roots);
   if (!_process_strong_tasks->is_task_claimed(SH_PS_FlatProfiler_oops_do))
-    FlatProfiler::oops_do(roots);
+    FlatProfiler::oops_do(strong_roots);
   if (!_process_strong_tasks->is_task_claimed(SH_PS_Management_oops_do))
-    Management::oops_do(roots);
+    Management::oops_do(strong_roots);
   if (!_process_strong_tasks->is_task_claimed(SH_PS_jvmti_oops_do))
-    JvmtiExport::oops_do(roots);
+    JvmtiExport::oops_do(strong_roots);
 
   if (!_process_strong_tasks->is_task_claimed(SH_PS_SystemDictionary_oops_do)) {
-    if (so & SO_AllClasses) {
-      SystemDictionary::oops_do(roots);
-    } else if (so & SO_SystemClasses) {
-      SystemDictionary::always_strong_oops_do(roots);
-    } else {
-      fatal("We should always have selected either SO_AllClasses or SO_SystemClasses");
-    }
-  }
-
-  if (!_process_strong_tasks->is_task_claimed(SH_PS_ClassLoaderDataGraph_oops_do)) {
-    if (so & SO_AllClasses) {
-      ClassLoaderDataGraph::oops_do(roots, klass_closure, !is_scavenging);
-    } else if (so & SO_SystemClasses) {
-      ClassLoaderDataGraph::always_strong_oops_do(roots, klass_closure, !is_scavenging);
-    }
+    SystemDictionary::roots_oops_do(strong_roots, weak_roots);
   }
 
   // All threads execute the following. A specific chunk of buckets
   // from the StringTable are the individual tasks.
-  if (so & SO_Strings) {
+  if (weak_roots != NULL) {
     if (CollectedHeap::use_parallel_gc_threads()) {
-      StringTable::possibly_parallel_oops_do(roots);
+      StringTable::possibly_parallel_oops_do(weak_roots);
     } else {
-      StringTable::oops_do(roots);
+      StringTable::oops_do(weak_roots);
     }
   }
 
   if (!_process_strong_tasks->is_task_claimed(SH_PS_CodeCache_oops_do)) {
-    if (so & SO_CodeCache) {
+    if (so & SO_ScavengeCodeCache) {
       assert(code_roots != NULL, "must supply closure for code cache");
 
-      if (is_scavenging) {
-        // We only visit parts of the CodeCache when scavenging.
-        CodeCache::scavenge_root_nmethods_do(code_roots);
-      } else {
-        // CMSCollector uses this to do intermediate-strength collections.
-        // We scan the entire code cache, since CodeCache::do_unloading is not called.
-        CodeCache::blobs_do(code_roots);
-      }
+      // We only visit parts of the CodeCache when scavenging.
+      CodeCache::scavenge_root_nmethods_do(code_roots);
+    }
+    if (so & SO_AllCodeCache) {
+      assert(code_roots != NULL, "must supply closure for code cache");
+
+      // CMSCollector uses this to do intermediate-strength collections.
+      // We scan the entire code cache, since CodeCache::do_unloading is not called.
+      CodeCache::blobs_do(code_roots);
     }
     // Verify that the code cache contents are not subject to
     // movement by a scavenging collection.
-    DEBUG_ONLY(CodeBlobToOopClosure assert_code_is_non_scavengable(&assert_is_non_scavengable_closure, /*do_marking=*/ false));
+    DEBUG_ONLY(CodeBlobToOopClosure assert_code_is_non_scavengable(&assert_is_non_scavengable_closure, !CodeBlobToOopClosure::FixRelocations));
     DEBUG_ONLY(CodeCache::asserted_non_scavengable_nmethods_do(&assert_code_is_non_scavengable));
   }
 
   _process_strong_tasks->all_tasks_completed();
 }
 
+void SharedHeap::process_all_roots(bool activate_scope,
+                                   ScanningOption so,
+                                   OopClosure* roots,
+                                   CLDClosure* cld_closure,
+                                   CodeBlobClosure* code_closure) {
+  process_roots(activate_scope, so,
+                roots, roots,
+                cld_closure, cld_closure,
+                code_closure);
+}
+
+void SharedHeap::process_strong_roots(bool activate_scope,
+                                      ScanningOption so,
+                                      OopClosure* roots,
+                                      CLDClosure* cld_closure,
+                                      CodeBlobClosure* code_closure) {
+  process_roots(activate_scope, so,
+                roots, NULL,
+                cld_closure, NULL,
+                code_closure);
+}
+
+
 class AlwaysTrueClosure: public BoolObjectClosure {
 public:
   bool do_object_b(oop p) { return true; }
 };
 static AlwaysTrueClosure always_true;
 
-void SharedHeap::process_weak_roots(OopClosure* root_closure,
-                                    CodeBlobClosure* code_roots) {
+void SharedHeap::process_weak_roots(OopClosure* root_closure) {
   // Global (weak) JNI handles
   JNIHandles::weak_oops_do(&always_true, root_closure);
-
-  CodeCache::blobs_do(code_roots);
-  StringTable::oops_do(root_closure);
 }
 
 void SharedHeap::set_barrier_set(BarrierSet* bs) {
--- a/src/share/vm/memory/sharedHeap.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/sharedHeap.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -69,14 +69,10 @@
 //    number of active GC workers.  CompactibleFreeListSpace and Space
 //    have SequentialSubTasksDone's.
 // Example of using SubTasksDone and SequentialSubTasksDone
-// G1CollectedHeap::g1_process_strong_roots() calls
-//  process_strong_roots(false, // no scoping; this is parallel code
-//                       is_scavenging, so,
-//                       &buf_scan_non_heap_roots,
-//                       &eager_scan_code_roots);
-//  which delegates to SharedHeap::process_strong_roots() and uses
+// G1CollectedHeap::g1_process_roots()
+//  to SharedHeap::process_roots() and uses
 //  SubTasksDone* _process_strong_tasks to claim tasks.
-//  process_strong_roots() calls
+//  process_roots() calls
 //      rem_set()->younger_refs_iterate()
 //  to scan the card table and which eventually calls down into
 //  CardTableModRefBS::par_non_clean_card_iterate_work().  This method
@@ -163,9 +159,6 @@
   // Iteration functions.
   void oop_iterate(ExtendedOopClosure* cl) = 0;
 
-  // Same as above, restricted to a memory region.
-  virtual void oop_iterate(MemRegion mr, ExtendedOopClosure* cl) = 0;
-
   // Iterate over all spaces in use in the heap, in an undefined order.
   virtual void space_iterate(SpaceClosure* cl) = 0;
 
@@ -185,12 +178,12 @@
   // task.  (This also means that a parallel thread may only call
   // process_strong_roots once.)
   //
-  // For calls to process_strong_roots by sequential code, the parity is
+  // For calls to process_roots by sequential code, the parity is
   // updated automatically.
   //
   // The idea is that objects representing fine-grained tasks, such as
   // threads, will contain a "parity" field.  A task will is claimed in the
-  // current "process_strong_roots" call only if its parity field is the
+  // current "process_roots" call only if its parity field is the
   // same as the "strong_roots_parity"; task claiming is accomplished by
   // updating the parity field to the strong_roots_parity with a CAS.
   //
@@ -201,27 +194,45 @@
   //   c) to never return a distinguished value (zero) with which such
   //      task-claiming variables may be initialized, to indicate "never
   //      claimed".
- private:
-  void change_strong_roots_parity();
  public:
   int strong_roots_parity() { return _strong_roots_parity; }
 
-  // Call these in sequential code around process_strong_roots.
+  // Call these in sequential code around process_roots.
   // strong_roots_prologue calls change_strong_roots_parity, if
   // parallel tasks are enabled.
   class StrongRootsScope : public MarkingCodeBlobClosure::MarkScope {
-  public:
-    StrongRootsScope(SharedHeap* outer, bool activate = true);
+    // Used to implement the Thread work barrier.
+    static Monitor* _lock;
+
+    SharedHeap*   _sh;
+    volatile jint _n_workers_done_with_threads;
+
+   public:
+    StrongRootsScope(SharedHeap* heap, bool activate = true);
     ~StrongRootsScope();
+
+    // Mark that this thread is done with the Threads work.
+    void mark_worker_done_with_threads(uint n_workers);
+    // Wait until all n_workers are done with the Threads work.
+    void wait_until_all_workers_done_with_threads(uint n_workers);
   };
   friend class StrongRootsScope;
 
+  // The current active StrongRootScope
+  StrongRootsScope* _strong_roots_scope;
+
+  StrongRootsScope* active_strong_roots_scope() const;
+
+ private:
+  void register_strong_roots_scope(StrongRootsScope* scope);
+  void unregister_strong_roots_scope(StrongRootsScope* scope);
+  void change_strong_roots_parity();
+
+ public:
   enum ScanningOption {
-    SO_None                = 0x0,
-    SO_AllClasses          = 0x1,
-    SO_SystemClasses       = 0x2,
-    SO_Strings             = 0x4,
-    SO_CodeCache           = 0x8
+    SO_None                =  0x0,
+    SO_AllCodeCache        =  0x8,
+    SO_ScavengeCodeCache   = 0x10
   };
 
   FlexibleWorkGang* workers() const { return _workers; }
@@ -229,22 +240,29 @@
   // Invoke the "do_oop" method the closure "roots" on all root locations.
   // The "so" argument determines which roots the closure is applied to:
   // "SO_None" does none;
-  // "SO_AllClasses" applies the closure to all entries in the SystemDictionary;
-  // "SO_SystemClasses" to all the "system" classes and loaders;
-  // "SO_Strings" applies the closure to all entries in StringTable;
-  // "SO_CodeCache" applies the closure to all elements of the CodeCache.
+  // "SO_AllCodeCache" applies the closure to all elements of the CodeCache.
+  // "SO_ScavengeCodeCache" applies the closure to elements on the scavenge root list in the CodeCache.
+  void process_roots(bool activate_scope,
+                     ScanningOption so,
+                     OopClosure* strong_roots,
+                     OopClosure* weak_roots,
+                     CLDClosure* strong_cld_closure,
+                     CLDClosure* weak_cld_closure,
+                     CodeBlobClosure* code_roots);
+  void process_all_roots(bool activate_scope,
+                         ScanningOption so,
+                         OopClosure* roots,
+                         CLDClosure* cld_closure,
+                         CodeBlobClosure* code_roots);
   void process_strong_roots(bool activate_scope,
-                            bool is_scavenging,
                             ScanningOption so,
                             OopClosure* roots,
-                            CodeBlobClosure* code_roots,
-                            KlassClosure* klass_closure);
+                            CLDClosure* cld_closure,
+                            CodeBlobClosure* code_roots);
 
-  // Apply "blk" to all the weak roots of the system.  These include
-  // JNI weak roots, the code cache, system dictionary, symbol table,
-  // string table.
-  void process_weak_roots(OopClosure* root_closure,
-                          CodeBlobClosure* code_roots);
+
+  // Apply "root_closure" to the JNI weak roots..
+  void process_weak_roots(OopClosure* root_closure);
 
   // The functions below are helper functions that a subclass of
   // "SharedHeap" can use in the implementation of its virtual
@@ -257,7 +275,7 @@
   virtual void gc_epilogue(bool full) = 0;
 
   // Sets the number of parallel threads that will be doing tasks
-  // (such as process strong roots) subsequently.
+  // (such as process roots) subsequently.
   virtual void set_par_threads(uint t);
 
   int n_termination();
@@ -274,4 +292,8 @@
                              size_t capacity);
 };
 
+inline SharedHeap::ScanningOption operator|(SharedHeap::ScanningOption so0, SharedHeap::ScanningOption so1) {
+  return static_cast<SharedHeap::ScanningOption>(static_cast<int>(so0) | static_cast<int>(so1));
+}
+
 #endif // SHARE_VM_MEMORY_SHAREDHEAP_HPP
--- a/src/share/vm/memory/space.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/space.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -28,6 +28,7 @@
 #include "gc_implementation/shared/liveRange.hpp"
 #include "gc_implementation/shared/markSweep.hpp"
 #include "gc_implementation/shared/spaceDecorator.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
 #include "memory/blockOffsetTable.inline.hpp"
 #include "memory/defNewGeneration.hpp"
 #include "memory/genCollectedHeap.hpp"
@@ -44,9 +45,6 @@
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/macros.hpp"
 
-void SpaceMemRegionOopsIterClosure::do_oop(oop* p)       { SpaceMemRegionOopsIterClosure::do_oop_work(p); }
-void SpaceMemRegionOopsIterClosure::do_oop(narrowOop* p) { SpaceMemRegionOopsIterClosure::do_oop_work(p); }
-
 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
 
 HeapWord* DirtyCardToOopClosure::get_actual_top(HeapWord* top,
@@ -309,10 +307,6 @@
   CompactibleSpace::clear(mangle_space);
 }
 
-bool ContiguousSpace::is_in(const void* p) const {
-  return _bottom <= p && p < _top;
-}
-
 bool ContiguousSpace::is_free_block(const HeapWord* p) const {
   return p >= _top;
 }
@@ -554,115 +548,11 @@
   object_iterate(&blk2);
 }
 
-HeapWord* Space::object_iterate_careful(ObjectClosureCareful* cl) {
-  guarantee(false, "NYI");
-  return bottom();
-}
-
-HeapWord* Space::object_iterate_careful_m(MemRegion mr,
-                                          ObjectClosureCareful* cl) {
-  guarantee(false, "NYI");
-  return bottom();
-}
-
-
-void Space::object_iterate_mem(MemRegion mr, UpwardsObjectClosure* cl) {
-  assert(!mr.is_empty(), "Should be non-empty");
-  // We use MemRegion(bottom(), end()) rather than used_region() below
-  // because the two are not necessarily equal for some kinds of
-  // spaces, in particular, certain kinds of free list spaces.
-  // We could use the more complicated but more precise:
-  // MemRegion(used_region().start(), round_to(used_region().end(), CardSize))
-  // but the slight imprecision seems acceptable in the assertion check.
-  assert(MemRegion(bottom(), end()).contains(mr),
-         "Should be within used space");
-  HeapWord* prev = cl->previous();   // max address from last time
-  if (prev >= mr.end()) { // nothing to do
-    return;
-  }
-  // This assert will not work when we go from cms space to perm
-  // space, and use same closure. Easy fix deferred for later. XXX YSR
-  // assert(prev == NULL || contains(prev), "Should be within space");
-
-  bool last_was_obj_array = false;
-  HeapWord *blk_start_addr, *region_start_addr;
-  if (prev > mr.start()) {
-    region_start_addr = prev;
-    blk_start_addr    = prev;
-    // The previous invocation may have pushed "prev" beyond the
-    // last allocated block yet there may be still be blocks
-    // in this region due to a particular coalescing policy.
-    // Relax the assertion so that the case where the unallocated
-    // block is maintained and "prev" is beyond the unallocated
-    // block does not cause the assertion to fire.
-    assert((BlockOffsetArrayUseUnallocatedBlock &&
-            (!is_in(prev))) ||
-           (blk_start_addr == block_start(region_start_addr)), "invariant");
-  } else {
-    region_start_addr = mr.start();
-    blk_start_addr    = block_start(region_start_addr);
-  }
-  HeapWord* region_end_addr = mr.end();
-  MemRegion derived_mr(region_start_addr, region_end_addr);
-  while (blk_start_addr < region_end_addr) {
-    const size_t size = block_size(blk_start_addr);
-    if (block_is_obj(blk_start_addr)) {
-      last_was_obj_array = cl->do_object_bm(oop(blk_start_addr), derived_mr);
-    } else {
-      last_was_obj_array = false;
-    }
-    blk_start_addr += size;
-  }
-  if (!last_was_obj_array) {
-    assert((bottom() <= blk_start_addr) && (blk_start_addr <= end()),
-           "Should be within (closed) used space");
-    assert(blk_start_addr > prev, "Invariant");
-    cl->set_previous(blk_start_addr); // min address for next time
-  }
-}
-
 bool Space::obj_is_alive(const HeapWord* p) const {
   assert (block_is_obj(p), "The address should point to an object");
   return true;
 }
 
-void ContiguousSpace::object_iterate_mem(MemRegion mr, UpwardsObjectClosure* cl) {
-  assert(!mr.is_empty(), "Should be non-empty");
-  assert(used_region().contains(mr), "Should be within used space");
-  HeapWord* prev = cl->previous();   // max address from last time
-  if (prev >= mr.end()) { // nothing to do
-    return;
-  }
-  // See comment above (in more general method above) in case you
-  // happen to use this method.
-  assert(prev == NULL || is_in_reserved(prev), "Should be within space");
-
-  bool last_was_obj_array = false;
-  HeapWord *obj_start_addr, *region_start_addr;
-  if (prev > mr.start()) {
-    region_start_addr = prev;
-    obj_start_addr    = prev;
-    assert(obj_start_addr == block_start(region_start_addr), "invariant");
-  } else {
-    region_start_addr = mr.start();
-    obj_start_addr    = block_start(region_start_addr);
-  }
-  HeapWord* region_end_addr = mr.end();
-  MemRegion derived_mr(region_start_addr, region_end_addr);
-  while (obj_start_addr < region_end_addr) {
-    oop obj = oop(obj_start_addr);
-    const size_t size = obj->size();
-    last_was_obj_array = cl->do_object_bm(obj, derived_mr);
-    obj_start_addr += size;
-  }
-  if (!last_was_obj_array) {
-    assert((bottom() <= obj_start_addr)  && (obj_start_addr <= end()),
-           "Should be within (closed) used space");
-    assert(obj_start_addr > prev, "Invariant");
-    cl->set_previous(obj_start_addr); // min address for next time
-  }
-}
-
 #if INCLUDE_ALL_GCS
 #define ContigSpace_PAR_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)         \
                                                                             \
@@ -690,43 +580,6 @@
   }
 }
 
-void ContiguousSpace::oop_iterate(MemRegion mr, ExtendedOopClosure* blk) {
-  if (is_empty()) {
-    return;
-  }
-  MemRegion cur = MemRegion(bottom(), top());
-  mr = mr.intersection(cur);
-  if (mr.is_empty()) {
-    return;
-  }
-  if (mr.equals(cur)) {
-    oop_iterate(blk);
-    return;
-  }
-  assert(mr.end() <= top(), "just took an intersection above");
-  HeapWord* obj_addr = block_start(mr.start());
-  HeapWord* t = mr.end();
-
-  // Handle first object specially.
-  oop obj = oop(obj_addr);
-  SpaceMemRegionOopsIterClosure smr_blk(blk, mr);
-  obj_addr += obj->oop_iterate(&smr_blk);
-  while (obj_addr < t) {
-    oop obj = oop(obj_addr);
-    assert(obj->is_oop(), "expected an oop");
-    obj_addr += obj->size();
-    // If "obj_addr" is not greater than top, then the
-    // entire object "obj" is within the region.
-    if (obj_addr <= t) {
-      obj->oop_iterate(blk);
-    } else {
-      // "obj" extends beyond end of region
-      obj->oop_iterate(&smr_blk);
-      break;
-    }
-  };
-}
-
 void ContiguousSpace::object_iterate(ObjectClosure* blk) {
   if (is_empty()) return;
   WaterMark bm = bottom_mark();
@@ -832,14 +685,8 @@
 // This version requires locking.
 inline HeapWord* ContiguousSpace::allocate_impl(size_t size,
                                                 HeapWord* const end_value) {
-  // In G1 there are places where a GC worker can allocates into a
-  // region using this serial allocation code without being prone to a
-  // race with other GC workers (we ensure that no other GC worker can
-  // access the same region at the same time). So the assert below is
-  // too strong in the case of G1.
   assert(Heap_lock->owned_by_self() ||
-         (SafepointSynchronize::is_at_safepoint() &&
-                               (Thread::current()->is_VM_thread() || UseG1GC)),
+         (SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread()),
          "not locked");
   HeapWord* obj = top();
   if (pointer_delta(end_value, obj) >= size) {
@@ -873,6 +720,27 @@
   } while (true);
 }
 
+HeapWord* ContiguousSpace::allocate_aligned(size_t size) {
+  assert(Heap_lock->owned_by_self() || (SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread()), "not locked");
+  HeapWord* end_value = end();
+
+  HeapWord* obj = CollectedHeap::align_allocation_or_fail(top(), end_value, SurvivorAlignmentInBytes);
+  if (obj == NULL) {
+    return NULL;
+  }
+
+  if (pointer_delta(end_value, obj) >= size) {
+    HeapWord* new_top = obj + size;
+    set_top(new_top);
+    assert(is_ptr_aligned(obj, SurvivorAlignmentInBytes) && is_aligned(new_top),
+      "checking alignment");
+    return obj;
+  } else {
+    set_top(obj);
+    return NULL;
+  }
+}
+
 // Requires locking.
 HeapWord* ContiguousSpace::allocate(size_t size) {
   return allocate_impl(size, end());
--- a/src/share/vm/memory/space.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/space.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -65,31 +65,6 @@
 class CardTableRS;
 class DirtyCardToOopClosure;
 
-// An oop closure that is circumscribed by a filtering memory region.
-class SpaceMemRegionOopsIterClosure: public ExtendedOopClosure {
- private:
-  ExtendedOopClosure* _cl;
-  MemRegion   _mr;
- protected:
-  template <class T> void do_oop_work(T* p) {
-    if (_mr.contains(p)) {
-      _cl->do_oop(p);
-    }
-  }
- public:
-  SpaceMemRegionOopsIterClosure(ExtendedOopClosure* cl, MemRegion mr):
-    _cl(cl), _mr(mr) {}
-  virtual void do_oop(oop* p);
-  virtual void do_oop(narrowOop* p);
-  virtual bool do_metadata() {
-    // _cl is of type ExtendedOopClosure instead of OopClosure, so that we can check this.
-    assert(!_cl->do_metadata(), "I've checked all call paths, this shouldn't happen.");
-    return false;
-  }
-  virtual void do_klass(Klass* k)                         { ShouldNotReachHere(); }
-  virtual void do_class_loader_data(ClassLoaderData* cld) { ShouldNotReachHere(); }
-};
-
 // A Space describes a heap area. Class Space is an abstract
 // base class.
 //
@@ -129,6 +104,12 @@
 
   void set_saved_mark_word(HeapWord* p) { _saved_mark_word = p; }
 
+  // Returns true if this object has been allocated since a
+  // generation's "save_marks" call.
+  virtual bool obj_allocated_since_save_marks(const oop obj) const {
+    return (HeapWord*)obj >= saved_mark_word();
+  }
+
   MemRegionClosure* preconsumptionDirtyCardClosure() const {
     return _preconsumptionDirtyCardClosure;
   }
@@ -136,9 +117,9 @@
     _preconsumptionDirtyCardClosure = cl;
   }
 
-  // Returns a subregion of the space containing all the objects in
+  // Returns a subregion of the space containing only the allocated objects in
   // the space.
-  virtual MemRegion used_region() const { return MemRegion(bottom(), end()); }
+  virtual MemRegion used_region() const = 0;
 
   // Returns a region that is guaranteed to contain (at least) all objects
   // allocated at the time of the last call to "save_marks".  If the space
@@ -148,7 +129,7 @@
   // saved mark.  Otherwise, the "obj_allocated_since_save_marks" method of
   // the space must distiguish between objects in the region allocated before
   // and after the call to save marks.
-  virtual MemRegion used_region_at_save_marks() const {
+  MemRegion used_region_at_save_marks() const {
     return MemRegion(bottom(), saved_mark_word());
   }
 
@@ -181,7 +162,9 @@
   // expensive operation. To prevent performance problems
   // on account of its inadvertent use in product jvm's,
   // we restrict its use to assertion checks only.
-  virtual bool is_in(const void* p) const = 0;
+  bool is_in(const void* p) const {
+    return used_region().contains(p);
+  }
 
   // Returns true iff the given reserved memory of the space contains the
   // given address.
@@ -205,11 +188,6 @@
   // applications of the closure are not included in the iteration.
   virtual void oop_iterate(ExtendedOopClosure* cl);
 
-  // Same as above, restricted to the intersection of a memory region and
-  // the space.  Fields in objects allocated by applications of the closure
-  // are not included in the iteration.
-  virtual void oop_iterate(MemRegion mr, ExtendedOopClosure* cl) = 0;
-
   // Iterate over all objects in the space, calling "cl.do_object" on
   // each.  Objects allocated by applications of the closure are not
   // included in the iteration.
@@ -218,24 +196,6 @@
   // objects whose internal references point to objects in the space.
   virtual void safe_object_iterate(ObjectClosure* blk) = 0;
 
-  // Iterate over all objects that intersect with mr, calling "cl->do_object"
-  // on each.  There is an exception to this: if this closure has already
-  // been invoked on an object, it may skip such objects in some cases.  This is
-  // Most likely to happen in an "upwards" (ascending address) iteration of
-  // MemRegions.
-  virtual void object_iterate_mem(MemRegion mr, UpwardsObjectClosure* cl);
-
-  // Iterate over as many initialized objects in the space as possible,
-  // calling "cl.do_object_careful" on each. Return NULL if all objects
-  // in the space (at the start of the iteration) were iterated over.
-  // Return an address indicating the extent of the iteration in the
-  // event that the iteration had to return because of finding an
-  // uninitialized object in the space, or if the closure "cl"
-  // signalled early termination.
-  virtual HeapWord* object_iterate_careful(ObjectClosureCareful* cl);
-  virtual HeapWord* object_iterate_careful_m(MemRegion mr,
-                                             ObjectClosureCareful* cl);
-
   // Create and return a new dirty card to oop closure. Can be
   // overriden to return the appropriate type of closure
   // depending on the type of space in which the closure will
@@ -276,10 +236,6 @@
   // Allocation (return NULL if full).  Enforces mutual exclusion internally.
   virtual HeapWord* par_allocate(size_t word_size) = 0;
 
-  // Returns true if this object has been allocated since a
-  // generation's "save_marks" call.
-  virtual bool obj_allocated_since_save_marks(const oop obj) const = 0;
-
   // Mark-sweep-compact support: all spaces can update pointers to objects
   // moving as a part of compaction.
   virtual void adjust_pointers();
@@ -374,9 +330,9 @@
   Generation* gen;
   CompactibleSpace* space;
   HeapWord* threshold;
-  CompactPoint(Generation* _gen, CompactibleSpace* _space,
-               HeapWord* _threshold) :
-    gen(_gen), space(_space), threshold(_threshold) {}
+
+  CompactPoint(Generation* _gen) :
+    gen(_gen), space(NULL), threshold(0) {}
 };
 
 
@@ -411,7 +367,7 @@
 
   // Perform operations on the space needed after a compaction
   // has been performed.
-  virtual void reset_after_compaction() {}
+  virtual void reset_after_compaction() = 0;
 
   // Returns the next space (in the current generation) to be compacted in
   // the global compaction order.  Also is used to select the next
@@ -476,7 +432,7 @@
   HeapWord* _end_of_live;
 
   // Minimum size of a free block.
-  virtual size_t minimum_free_block_size() const = 0;
+  virtual size_t minimum_free_block_size() const { return 0; }
 
   // This the function is invoked when an allocation of an object covering
   // "start" to "end occurs crosses the threshold; returns the next
@@ -526,7 +482,7 @@
   HeapWord* top() const            { return _top;    }
   void set_top(HeapWord* value)    { _top = value; }
 
-  virtual void set_saved_mark()    { _saved_mark_word = top();    }
+  void set_saved_mark()            { _saved_mark_word = top();    }
   void reset_saved_mark()          { _saved_mark_word = bottom(); }
 
   WaterMark bottom_mark()     { return WaterMark(this, bottom()); }
@@ -561,36 +517,31 @@
   size_t used() const            { return byte_size(bottom(), top()); }
   size_t free() const            { return byte_size(top(),    end()); }
 
-  // Override from space.
-  bool is_in(const void* p) const;
-
   virtual bool is_free_block(const HeapWord* p) const;
 
   // In a contiguous space we have a more obvious bound on what parts
   // contain objects.
   MemRegion used_region() const { return MemRegion(bottom(), top()); }
 
-  MemRegion used_region_at_save_marks() const {
-    return MemRegion(bottom(), saved_mark_word());
-  }
-
   // Allocation (return NULL if full)
   virtual HeapWord* allocate(size_t word_size);
   virtual HeapWord* par_allocate(size_t word_size);
-
-  virtual bool obj_allocated_since_save_marks(const oop obj) const {
-    return (HeapWord*)obj >= saved_mark_word();
-  }
+  HeapWord* allocate_aligned(size_t word_size);
 
   // Iteration
   void oop_iterate(ExtendedOopClosure* cl);
-  void oop_iterate(MemRegion mr, ExtendedOopClosure* cl);
   void object_iterate(ObjectClosure* blk);
   // For contiguous spaces this method will iterate safely over objects
   // in the space (i.e., between bottom and top) when at a safepoint.
   void safe_object_iterate(ObjectClosure* blk);
-  void object_iterate_mem(MemRegion mr, UpwardsObjectClosure* cl);
-  // iterates on objects up to the safe limit
+
+  // Iterate over as many initialized objects in the space as possible,
+  // calling "cl.do_object_careful" on each. Return NULL if all objects
+  // in the space (at the start of the iteration) were iterated over.
+  // Return an address indicating the extent of the iteration in the
+  // event that the iteration had to return because of finding an
+  // uninitialized object in the space, or if the closure "cl"
+  // signaled early termination.
   HeapWord* object_iterate_careful(ObjectClosureCareful* cl);
   HeapWord* concurrent_iteration_safe_limit() {
     assert(_concurrent_iteration_safe_limit <= top(),
@@ -621,7 +572,6 @@
     // set new iteration safe limit
     set_concurrent_iteration_safe_limit(compaction_top());
   }
-  virtual size_t minimum_free_block_size() const { return 0; }
 
   // Override.
   DirtyCardToOopClosure* new_dcto_cl(ExtendedOopClosure* cl,
--- a/src/share/vm/oops/instanceClassLoaderKlass.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/oops/instanceClassLoaderKlass.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -28,6 +28,7 @@
 #include "gc_implementation/shared/markSweep.inline.hpp"
 #include "gc_interface/collectedHeap.inline.hpp"
 #include "memory/genOopClosures.inline.hpp"
+#include "memory/iterator.inline.hpp"
 #include "memory/oopFactory.hpp"
 #include "oops/instanceKlass.hpp"
 #include "oops/instanceClassLoaderKlass.hpp"
@@ -44,12 +45,6 @@
 #include "oops/oop.pcgc.inline.hpp"
 #endif // INCLUDE_ALL_GCS
 
-#define if_do_metadata_checked(closure, nv_suffix)                    \
-  /* Make sure the non-virtual and the virtual versions match. */     \
-  assert(closure->do_metadata##nv_suffix() == closure->do_metadata(), \
-      "Inconsistency in do_metadata");                                \
-  if (closure->do_metadata##nv_suffix())
-
 // Macro to define InstanceClassLoaderKlass::oop_oop_iterate for virtual/nonvirtual for
 // all closures.  Macros calling macros above for each oop size.
 // Since ClassLoader objects have only a pointer to the loader_data, they are not
--- a/src/share/vm/oops/instanceKlass.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/oops/instanceKlass.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -35,6 +35,7 @@
 #include "jvmtifiles/jvmti.h"
 #include "memory/genOopClosures.inline.hpp"
 #include "memory/heapInspection.hpp"
+#include "memory/iterator.inline.hpp"
 #include "memory/metadataFactory.hpp"
 #include "memory/oopFactory.hpp"
 #include "oops/fieldStreams.hpp"
@@ -288,6 +289,7 @@
   set_static_oop_field_count(0);
   set_nonstatic_field_size(0);
   set_is_marked_dependent(false);
+  set_has_unloaded_dependent(false);
   set_init_state(InstanceKlass::allocated);
   set_init_thread(NULL);
   set_reference_type(rt);
@@ -1818,6 +1820,9 @@
   return id;
 }
 
+int nmethodBucket::decrement() {
+  return Atomic::add(-1, (volatile int *)&_count);
+}
 
 //
 // Walk the list of dependent nmethods searching for nmethods which
@@ -1832,7 +1837,7 @@
     nmethod* nm = b->get_nmethod();
     // since dependencies aren't removed until an nmethod becomes a zombie,
     // the dependency list may contain nmethods which aren't alive.
-    if (nm->is_alive() && !nm->is_marked_for_deoptimization() && nm->check_dependency_on(changes)) {
+    if (b->count() > 0 && nm->is_alive() && !nm->is_marked_for_deoptimization() && nm->check_dependency_on(changes)) {
       if (TraceDependencies) {
         ResourceMark rm;
         tty->print_cr("Marked for deoptimization");
@@ -1849,6 +1854,43 @@
   return found;
 }
 
+void InstanceKlass::clean_dependent_nmethods() {
+  assert_locked_or_safepoint(CodeCache_lock);
+
+  if (has_unloaded_dependent()) {
+    nmethodBucket* b = _dependencies;
+    nmethodBucket* last = NULL;
+    while (b != NULL) {
+      assert(b->count() >= 0, err_msg("bucket count: %d", b->count()));
+
+      nmethodBucket* next = b->next();
+
+      if (b->count() == 0) {
+        if (last == NULL) {
+          _dependencies = next;
+        } else {
+          last->set_next(next);
+        }
+        delete b;
+        // last stays the same.
+      } else {
+        last = b;
+      }
+
+      b = next;
+    }
+    set_has_unloaded_dependent(false);
+  }
+#ifdef ASSERT
+  else {
+    // Verification
+    for (nmethodBucket* b = _dependencies; b != NULL; b = b->next()) {
+      assert(b->count() >= 0, err_msg("bucket count: %d", b->count()));
+      assert(b->count() != 0, "empty buckets need to be cleaned");
+    }
+  }
+#endif
+}
 
 //
 // Add an nmethodBucket to the list of dependencies for this nmethod.
@@ -1883,13 +1925,10 @@
   nmethodBucket* last = NULL;
   while (b != NULL) {
     if (nm == b->get_nmethod()) {
-      if (b->decrement() == 0) {
-        if (last == NULL) {
-          _dependencies = b->next();
-        } else {
-          last->set_next(b->next());
-        }
-        delete b;
+      int val = b->decrement();
+      guarantee(val >= 0, err_msg("Underflow: %d", val));
+      if (val == 0) {
+        set_has_unloaded_dependent(true);
       }
       return;
     }
@@ -1928,6 +1967,10 @@
   nmethodBucket* b = _dependencies;
   while (b != NULL) {
     if (nm == b->get_nmethod()) {
+#ifdef ASSERT
+      int count = b->count();
+      assert(count >= 0, err_msg("count shouldn't be negative: %d", count));
+#endif
       return true;
     }
     b = b->next();
@@ -2132,12 +2175,6 @@
 // closure's do_metadata() method dictates whether the given closure should be
 // applied to the klass ptr in the object header.
 
-#define if_do_metadata_checked(closure, nv_suffix)                    \
-  /* Make sure the non-virtual and the virtual versions match. */     \
-  assert(closure->do_metadata##nv_suffix() == closure->do_metadata(), \
-      "Inconsistency in do_metadata");                                \
-  if (closure->do_metadata##nv_suffix())
-
 #define InstanceKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)        \
                                                                              \
 int InstanceKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) { \
@@ -2161,10 +2198,9 @@
 int InstanceKlass::oop_oop_iterate_backwards##nv_suffix(oop obj,                \
                                               OopClosureType* closure) {        \
   SpecializationStats::record_iterate_call##nv_suffix(SpecializationStats::ik); \
-  /* header */                                                                  \
-  if_do_metadata_checked(closure, nv_suffix) {                                  \
-    closure->do_klass##nv_suffix(obj->klass());                                 \
-  }                                                                             \
+                                                                                \
+  assert_should_ignore_metadata(closure, nv_suffix);                            \
+                                                                                \
   /* instance variables */                                                      \
   InstanceKlass_OOP_MAP_REVERSE_ITERATE(                                        \
     obj,                                                                        \
@@ -2233,7 +2269,7 @@
 #endif // INCLUDE_ALL_GCS
 
 void InstanceKlass::clean_implementors_list(BoolObjectClosure* is_alive) {
-  assert(is_loader_alive(is_alive), "this klass should be live");
+  assert(class_loader_data()->is_alive(is_alive), "this klass should be live");
   if (is_interface()) {
     if (ClassUnloading) {
       Klass* impl = implementor();
@@ -3042,8 +3078,7 @@
         offset          <= (juint) value->length() &&
         offset + length <= (juint) value->length()) {
       st->print(BULLET"string: ");
-      Handle h_obj(obj);
-      java_lang_String::print(h_obj, st);
+      java_lang_String::print(obj, st);
       st->cr();
       if (!WizardMode)  return;  // that is enough
     }
--- a/src/share/vm/oops/instanceKlass.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/oops/instanceKlass.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -226,6 +226,7 @@
   // _is_marked_dependent can be set concurrently, thus cannot be part of the
   // _misc_flags.
   bool            _is_marked_dependent;  // used for marking during flushing and deoptimization
+  bool            _has_unloaded_dependent;
 
   enum {
     _misc_rewritten            = 1 << 0, // methods rewritten.
@@ -473,6 +474,9 @@
   bool is_marked_dependent() const         { return _is_marked_dependent; }
   void set_is_marked_dependent(bool value) { _is_marked_dependent = value; }
 
+  bool has_unloaded_dependent() const         { return _has_unloaded_dependent; }
+  void set_has_unloaded_dependent(bool value) { _has_unloaded_dependent = value; }
+
   // initialization (virtuals from Klass)
   bool should_be_initialized() const;  // means that initialize should be called
   void initialize(TRAPS);
@@ -946,6 +950,7 @@
 
   void clean_implementors_list(BoolObjectClosure* is_alive);
   void clean_method_data(BoolObjectClosure* is_alive);
+  void clean_dependent_nmethods();
 
   // Explicit metaspace deallocation of fields
   // For RedefineClasses and class file parsing errors, we need to deallocate
@@ -1234,7 +1239,7 @@
   }
   int count()                             { return _count; }
   int increment()                         { _count += 1; return _count; }
-  int decrement()                         { _count -= 1; assert(_count >= 0, "don't underflow"); return _count; }
+  int decrement();
   nmethodBucket* next()                   { return _next; }
   void set_next(nmethodBucket* b)         { _next = b; }
   nmethod* get_nmethod()                  { return _nmethod; }
--- a/src/share/vm/oops/instanceMirrorKlass.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/oops/instanceMirrorKlass.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -28,6 +28,7 @@
 #include "gc_implementation/shared/markSweep.inline.hpp"
 #include "gc_interface/collectedHeap.inline.hpp"
 #include "memory/genOopClosures.inline.hpp"
+#include "memory/iterator.inline.hpp"
 #include "memory/oopFactory.hpp"
 #include "oops/instanceKlass.hpp"
 #include "oops/instanceMirrorKlass.hpp"
@@ -241,12 +242,6 @@
   return oop_size(obj);                                                               \
 
 
-#define if_do_metadata_checked(closure, nv_suffix)                    \
-  /* Make sure the non-virtual and the virtual versions match. */     \
-  assert(closure->do_metadata##nv_suffix() == closure->do_metadata(), \
-      "Inconsistency in do_metadata");                                \
-  if (closure->do_metadata##nv_suffix())
-
 // Macro to define InstanceMirrorKlass::oop_oop_iterate for virtual/nonvirtual for
 // all closures.  Macros calling macros above for each oop size.
 
--- a/src/share/vm/oops/klass.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/oops/klass.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -42,6 +42,7 @@
 #include "utilities/stack.hpp"
 #include "utilities/macros.hpp"
 #if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc_implementation/parallelScavenge/psParallelCompact.hpp"
 #include "gc_implementation/parallelScavenge/psPromotionManager.hpp"
 #include "gc_implementation/parallelScavenge/psScavenge.hpp"
@@ -159,7 +160,12 @@
   _primary_supers[0] = k;
   set_super_check_offset(in_bytes(primary_supers_offset()));
 
-  set_java_mirror(NULL);
+  // The constructor is used from init_self_patching_vtbl_list,
+  // which doesn't zero out the memory before calling the constructor.
+  // Need to set the field explicitly to not hit an assert that the field
+  // should be NULL before setting it.
+  _java_mirror = NULL;
+
   set_modifier_flags(0);
   set_layout_helper(Klass::_lh_neutral_value);
   set_name(NULL);
@@ -391,7 +397,7 @@
   return mirror_alive;
 }
 
-void Klass::clean_weak_klass_links(BoolObjectClosure* is_alive) {
+void Klass::clean_weak_klass_links(BoolObjectClosure* is_alive, bool clean_alive_klasses) {
   if (!ClassUnloading) {
     return;
   }
@@ -436,7 +442,7 @@
     }
 
     // Clean the implementors list and method data.
-    if (current->oop_is_instance()) {
+    if (clean_alive_klasses && current->oop_is_instance()) {
       InstanceKlass* ik = InstanceKlass::cast(current);
       ik->clean_implementors_list(is_alive);
       ik->clean_method_data(is_alive);
@@ -448,12 +454,18 @@
   record_modified_oops();
 }
 
-void Klass::klass_update_barrier_set_pre(void* p, oop v) {
-  // This barrier used by G1, where it's used remember the old oop values,
-  // so that we don't forget any objects that were live at the snapshot at
-  // the beginning. This function is only used when we write oops into
-  // Klasses. Since the Klasses are used as roots in G1, we don't have to
-  // do anything here.
+// This barrier is used by G1 to remember the old oop values, so
+// that we don't forget any objects that were live at the snapshot at
+// the beginning. This function is only used when we write oops into Klasses.
+void Klass::klass_update_barrier_set_pre(oop* p, oop v) {
+#if INCLUDE_ALL_GCS
+  if (UseG1GC) {
+    oop obj = *p;
+    if (obj != NULL) {
+      G1SATBCardTableModRefBS::enqueue(obj);
+    }
+  }
+#endif
 }
 
 void Klass::klass_oop_store(oop* p, oop v) {
@@ -464,7 +476,7 @@
   if (always_do_update_barrier) {
     klass_oop_store((volatile oop*)p, v);
   } else {
-    klass_update_barrier_set_pre((void*)p, v);
+    klass_update_barrier_set_pre(p, v);
     *p = v;
     klass_update_barrier_set(v);
   }
@@ -474,7 +486,7 @@
   assert(!Universe::heap()->is_in_reserved((void*)p), "Should store pointer into metadata");
   assert(v == NULL || Universe::heap()->is_in_reserved((void*)v), "Should store pointer to an object");
 
-  klass_update_barrier_set_pre((void*)p, v);
+  klass_update_barrier_set_pre((oop*)p, v); // Cast away volatile.
   OrderAccess::release_store_ptr(p, v);
   klass_update_barrier_set(v);
 }
@@ -699,3 +711,21 @@
 }
 
 #endif
+
+/////////////// Unit tests ///////////////
+
+#ifndef PRODUCT
+
+class TestKlass {
+ public:
+  static void test_oop_is_instanceClassLoader() {
+    assert(SystemDictionary::ClassLoader_klass()->oop_is_instanceClassLoader(), "assert");
+    assert(!SystemDictionary::String_klass()->oop_is_instanceClassLoader(), "assert");
+  }
+};
+
+void TestKlass_test() {
+  TestKlass::test_oop_is_instanceClassLoader();
+}
+
+#endif
--- a/src/share/vm/oops/klass.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/oops/klass.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -499,6 +499,7 @@
   virtual bool oop_is_objArray_slow()       const { return false; }
   virtual bool oop_is_typeArray_slow()      const { return false; }
  public:
+  virtual bool oop_is_instanceClassLoader() const { return false; }
   virtual bool oop_is_instanceMirror()      const { return false; }
   virtual bool oop_is_instanceRef()         const { return false; }
 
@@ -582,7 +583,10 @@
   // The is_alive closure passed in depends on the Garbage Collector used.
   bool is_loader_alive(BoolObjectClosure* is_alive);
 
-  static void clean_weak_klass_links(BoolObjectClosure* is_alive);
+  static void clean_weak_klass_links(BoolObjectClosure* is_alive, bool clean_alive_klasses = true);
+  static void clean_subklass_tree(BoolObjectClosure* is_alive) {
+    clean_weak_klass_links(is_alive, false /* clean_alive_klasses */);
+  }
 
   // iterators
   virtual int oop_oop_iterate(oop obj, ExtendedOopClosure* blk) = 0;
@@ -689,7 +693,7 @@
  private:
   // barriers used by klass_oop_store
   void klass_update_barrier_set(oop v);
-  void klass_update_barrier_set_pre(void* p, oop v);
+  void klass_update_barrier_set_pre(oop* p, oop v);
 };
 
 #endif // SHARE_VM_OOPS_KLASS_HPP
--- a/src/share/vm/oops/objArrayKlass.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/oops/objArrayKlass.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -29,6 +29,7 @@
 #include "gc_implementation/shared/markSweep.inline.hpp"
 #include "gc_interface/collectedHeap.inline.hpp"
 #include "memory/genOopClosures.inline.hpp"
+#include "memory/iterator.inline.hpp"
 #include "memory/metadataFactory.hpp"
 #include "memory/resourceArea.hpp"
 #include "memory/universe.inline.hpp"
@@ -476,12 +477,6 @@
 }
 #endif // INCLUDE_ALL_GCS
 
-#define if_do_metadata_checked(closure, nv_suffix)                    \
-  /* Make sure the non-virtual and the virtual versions match. */     \
-  assert(closure->do_metadata##nv_suffix() == closure->do_metadata(), \
-      "Inconsistency in do_metadata");                                \
-  if (closure->do_metadata##nv_suffix())
-
 #define ObjArrayKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)           \
                                                                                 \
 int ObjArrayKlass::oop_oop_iterate##nv_suffix(oop obj,                          \
--- a/src/share/vm/oops/oop.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/oops/oop.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -109,12 +109,13 @@
   int size_given_klass(Klass* klass);
 
   // type test operations (inlined in oop.inline.h)
-  bool is_instance()           const;
-  bool is_instanceMirror()     const;
-  bool is_instanceRef()        const;
-  bool is_array()              const;
-  bool is_objArray()           const;
-  bool is_typeArray()          const;
+  bool is_instance()            const;
+  bool is_instanceMirror()      const;
+  bool is_instanceClassLoader() const;
+  bool is_instanceRef()         const;
+  bool is_array()               const;
+  bool is_objArray()            const;
+  bool is_typeArray()           const;
 
  private:
   // field addresses in oop
--- a/src/share/vm/oops/oop.inline.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/oops/oop.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -148,12 +148,13 @@
 
 inline bool oopDesc::is_a(Klass* k)        const { return klass()->is_subtype_of(k); }
 
-inline bool oopDesc::is_instance()           const { return klass()->oop_is_instance(); }
-inline bool oopDesc::is_instanceMirror()     const { return klass()->oop_is_instanceMirror(); }
-inline bool oopDesc::is_instanceRef()        const { return klass()->oop_is_instanceRef(); }
-inline bool oopDesc::is_array()              const { return klass()->oop_is_array(); }
-inline bool oopDesc::is_objArray()           const { return klass()->oop_is_objArray(); }
-inline bool oopDesc::is_typeArray()          const { return klass()->oop_is_typeArray(); }
+inline bool oopDesc::is_instance()            const { return klass()->oop_is_instance(); }
+inline bool oopDesc::is_instanceClassLoader() const { return klass()->oop_is_instanceClassLoader(); }
+inline bool oopDesc::is_instanceMirror()      const { return klass()->oop_is_instanceMirror(); }
+inline bool oopDesc::is_instanceRef()         const { return klass()->oop_is_instanceRef(); }
+inline bool oopDesc::is_array()               const { return klass()->oop_is_array(); }
+inline bool oopDesc::is_objArray()            const { return klass()->oop_is_objArray(); }
+inline bool oopDesc::is_typeArray()           const { return klass()->oop_is_typeArray(); }
 
 inline void*     oopDesc::field_base(int offset)        const { return (void*)&((char*)this)[offset]; }
 
--- a/src/share/vm/oops/oop.pcgc.inline.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/oops/oop.pcgc.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -54,8 +54,6 @@
   klass()->oop_follow_contents(cm, this);
 }
 
-// Used by parallel old GC.
-
 inline oop oopDesc::forward_to_atomic(oop p) {
   assert(ParNewGeneration::is_legal_forward_ptr(p),
          "illegal forwarding pointer value.");
--- a/src/share/vm/opto/callGenerator.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/opto/callGenerator.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -710,7 +710,15 @@
   Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO);
   iophi->set_req(2, slow_map->i_o());
   kit.set_i_o(gvn.transform(iophi));
+  // Merge memory
   kit.merge_memory(slow_map->merged_memory(), region, 2);
+  // Transform new memory Phis.
+  for (MergeMemStream mms(kit.merged_memory()); mms.next_non_empty();) {
+    Node* phi = mms.memory();
+    if (phi->is_Phi() && phi->in(0) == region) {
+      mms.set_memory(gvn.transform(phi));
+    }
+  }
   uint tos = kit.jvms()->stkoff() + kit.sp();
   uint limit = slow_map->req();
   for (uint i = TypeFunc::Parms; i < limit; i++) {
@@ -864,15 +872,15 @@
 }
 
 
-//------------------------PredictedIntrinsicGenerator------------------------------
-// Internal class which handles all predicted Intrinsic calls.
-class PredictedIntrinsicGenerator : public CallGenerator {
+//------------------------PredicatedIntrinsicGenerator------------------------------
+// Internal class which handles all predicated Intrinsic calls.
+class PredicatedIntrinsicGenerator : public CallGenerator {
   CallGenerator* _intrinsic;
   CallGenerator* _cg;
 
 public:
-  PredictedIntrinsicGenerator(CallGenerator* intrinsic,
-                              CallGenerator* cg)
+  PredicatedIntrinsicGenerator(CallGenerator* intrinsic,
+                               CallGenerator* cg)
     : CallGenerator(cg->method())
   {
     _intrinsic = intrinsic;
@@ -887,103 +895,182 @@
 };
 
 
-CallGenerator* CallGenerator::for_predicted_intrinsic(CallGenerator* intrinsic,
-                                                      CallGenerator* cg) {
-  return new PredictedIntrinsicGenerator(intrinsic, cg);
+CallGenerator* CallGenerator::for_predicated_intrinsic(CallGenerator* intrinsic,
+                                                       CallGenerator* cg) {
+  return new PredicatedIntrinsicGenerator(intrinsic, cg);
 }
 
 
-JVMState* PredictedIntrinsicGenerator::generate(JVMState* jvms, Parse* parent_parser) {
+JVMState* PredicatedIntrinsicGenerator::generate(JVMState* jvms, Parse* parent_parser) {
+  // The code we want to generate here is:
+  //    if (receiver == NULL)
+  //        uncommon_Trap
+  //    if (predicate(0))
+  //        do_intrinsic(0)
+  //    else
+  //    if (predicate(1))
+  //        do_intrinsic(1)
+  //    ...
+  //    else
+  //        do_java_comp
+
   GraphKit kit(jvms);
   PhaseGVN& gvn = kit.gvn();
 
   CompileLog* log = kit.C->log();
   if (log != NULL) {
-    log->elem("predicted_intrinsic bci='%d' method='%d'",
+    log->elem("predicated_intrinsic bci='%d' method='%d'",
               jvms->bci(), log->identify(method()));
   }
 
-  Node* slow_ctl = _intrinsic->generate_predicate(kit.sync_jvms());
-  if (kit.failing())
-    return NULL;  // might happen because of NodeCountInliningCutoff
-
-  SafePointNode* slow_map = NULL;
-  JVMState* slow_jvms;
-  if (slow_ctl != NULL) {
-    PreserveJVMState pjvms(&kit);
-    kit.set_control(slow_ctl);
-    if (!kit.stopped()) {
-      slow_jvms = _cg->generate(kit.sync_jvms(), parent_parser);
-      if (kit.failing())
-        return NULL;  // might happen because of NodeCountInliningCutoff
-      assert(slow_jvms != NULL, "must be");
-      kit.add_exception_states_from(slow_jvms);
-      kit.set_map(slow_jvms->map());
-      if (!kit.stopped())
-        slow_map = kit.stop();
+  if (!method()->is_static()) {
+    // We need an explicit receiver null_check before checking its type in predicate.
+    // We share a map with the caller, so his JVMS gets adjusted.
+    Node* receiver = kit.null_check_receiver_before_call(method());
+    if (kit.stopped()) {
+      return kit.transfer_exceptions_into_jvms();
     }
   }
 
-  if (kit.stopped()) {
-    // Predicate is always false.
-    kit.set_jvms(slow_jvms);
+  int n_predicates = _intrinsic->predicates_count();
+  assert(n_predicates > 0, "sanity");
+
+  JVMState** result_jvms = NEW_RESOURCE_ARRAY(JVMState*, (n_predicates+1));
+
+  // Region for normal compilation code if intrinsic failed.
+  Node* slow_region = new (kit.C) RegionNode(1);
+
+  int results = 0;
+  for (int predicate = 0; (predicate < n_predicates) && !kit.stopped(); predicate++) {
+#ifdef ASSERT
+    JVMState* old_jvms = kit.jvms();
+    SafePointNode* old_map = kit.map();
+    Node* old_io  = old_map->i_o();
+    Node* old_mem = old_map->memory();
+    Node* old_exc = old_map->next_exception();
+#endif
+    Node* else_ctrl = _intrinsic->generate_predicate(kit.sync_jvms(), predicate);
+#ifdef ASSERT
+    // Assert(no_new_memory && no_new_io && no_new_exceptions) after generate_predicate.
+    assert(old_jvms == kit.jvms(), "generate_predicate should not change jvm state");
+    SafePointNode* new_map = kit.map();
+    assert(old_io  == new_map->i_o(), "generate_predicate should not change i_o");
+    assert(old_mem == new_map->memory(), "generate_predicate should not change memory");
+    assert(old_exc == new_map->next_exception(), "generate_predicate should not add exceptions");
+#endif
+    if (!kit.stopped()) {
+      PreserveJVMState pjvms(&kit);
+      // Generate intrinsic code:
+      JVMState* new_jvms = _intrinsic->generate(kit.sync_jvms(), parent_parser);
+      if (new_jvms == NULL) {
+        // Intrinsic failed, use normal compilation path for this predicate.
+        slow_region->add_req(kit.control());
+      } else {
+        kit.add_exception_states_from(new_jvms);
+        kit.set_jvms(new_jvms);
+        if (!kit.stopped()) {
+          result_jvms[results++] = kit.jvms();
+        }
+      }
+    }
+    if (else_ctrl == NULL) {
+      else_ctrl = kit.C->top();
+    }
+    kit.set_control(else_ctrl);
+  }
+  if (!kit.stopped()) {
+    // Final 'else' after predicates.
+    slow_region->add_req(kit.control());
+  }
+  if (slow_region->req() > 1) {
+    PreserveJVMState pjvms(&kit);
+    // Generate normal compilation code:
+    kit.set_control(gvn.transform(slow_region));
+    JVMState* new_jvms = _cg->generate(kit.sync_jvms(), parent_parser);
+    if (kit.failing())
+      return NULL;  // might happen because of NodeCountInliningCutoff
+    assert(new_jvms != NULL, "must be");
+    kit.add_exception_states_from(new_jvms);
+    kit.set_jvms(new_jvms);
+    if (!kit.stopped()) {
+      result_jvms[results++] = kit.jvms();
+    }
+  }
+
+  if (results == 0) {
+    // All paths ended in uncommon traps.
+    (void) kit.stop();
     return kit.transfer_exceptions_into_jvms();
   }
 
-  // Generate intrinsic code:
-  JVMState* new_jvms = _intrinsic->generate(kit.sync_jvms(), parent_parser);
-  if (new_jvms == NULL) {
-    // Intrinsic failed, so use slow code or make a direct call.
-    if (slow_map == NULL) {
-      CallGenerator* cg = CallGenerator::for_direct_call(method());
-      new_jvms = cg->generate(kit.sync_jvms(), parent_parser);
-    } else {
-      kit.set_jvms(slow_jvms);
-      return kit.transfer_exceptions_into_jvms();
-    }
-  }
-  kit.add_exception_states_from(new_jvms);
-  kit.set_jvms(new_jvms);
-
-  // Need to merge slow and fast?
-  if (slow_map == NULL) {
-    // The fast path is the only path remaining.
+  if (results == 1) { // Only one path
+    kit.set_jvms(result_jvms[0]);
     return kit.transfer_exceptions_into_jvms();
   }
 
-  if (kit.stopped()) {
-    // Intrinsic method threw an exception, so it's just the slow path after all.
-    kit.set_jvms(slow_jvms);
-    return kit.transfer_exceptions_into_jvms();
+  // Merge all paths.
+  kit.C->set_has_split_ifs(true); // Has chance for split-if optimization
+  RegionNode* region = new (kit.C) RegionNode(results + 1);
+  Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO);
+  for (int i = 0; i < results; i++) {
+    JVMState* jvms = result_jvms[i];
+    int path = i + 1;
+    SafePointNode* map = jvms->map();
+    region->init_req(path, map->control());
+    iophi->set_req(path, map->i_o());
+    if (i == 0) {
+      kit.set_jvms(jvms);
+    } else {
+      kit.merge_memory(map->merged_memory(), region, path);
+    }
+  }
+  kit.set_control(gvn.transform(region));
+  kit.set_i_o(gvn.transform(iophi));
+  // Transform new memory Phis.
+  for (MergeMemStream mms(kit.merged_memory()); mms.next_non_empty();) {
+    Node* phi = mms.memory();
+    if (phi->is_Phi() && phi->in(0) == region) {
+      mms.set_memory(gvn.transform(phi));
+    }
   }
 
-  // Finish the diamond.
-  kit.C->set_has_split_ifs(true); // Has chance for split-if optimization
-  RegionNode* region = new (kit.C) RegionNode(3);
-  region->init_req(1, kit.control());
-  region->init_req(2, slow_map->control());
-  kit.set_control(gvn.transform(region));
-  Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO);
-  iophi->set_req(2, slow_map->i_o());
-  kit.set_i_o(gvn.transform(iophi));
-  kit.merge_memory(slow_map->merged_memory(), region, 2);
+  // Merge debug info.
+  Node** ins = NEW_RESOURCE_ARRAY(Node*, results);
   uint tos = kit.jvms()->stkoff() + kit.sp();
-  uint limit = slow_map->req();
+  Node* map = kit.map();
+  uint limit = map->req();
   for (uint i = TypeFunc::Parms; i < limit; i++) {
     // Skip unused stack slots; fast forward to monoff();
     if (i == tos) {
       i = kit.jvms()->monoff();
       if( i >= limit ) break;
     }
-    Node* m = kit.map()->in(i);
-    Node* n = slow_map->in(i);
-    if (m != n) {
-      const Type* t = gvn.type(m)->meet_speculative(gvn.type(n));
-      Node* phi = PhiNode::make(region, m, t);
-      phi->set_req(2, n);
-      kit.map()->set_req(i, gvn.transform(phi));
+    Node* n = map->in(i);
+    ins[0] = n;
+    const Type* t = gvn.type(n);
+    bool needs_phi = false;
+    for (int j = 1; j < results; j++) {
+      JVMState* jvms = result_jvms[j];
+      Node* jmap = jvms->map();
+      Node* m = NULL;
+      if (jmap->req() > i) {
+        m = jmap->in(i);
+        if (m != n) {
+          needs_phi = true;
+          t = t->meet_speculative(gvn.type(m));
+        }
+      }
+      ins[j] = m;
+    }
+    if (needs_phi) {
+      Node* phi = PhiNode::make(region, n, t);
+      for (int j = 1; j < results; j++) {
+        phi->set_req(j + 1, ins[j]);
+      }
+      map->set_req(i, gvn.transform(phi));
     }
   }
+
   return kit.transfer_exceptions_into_jvms();
 }
 
--- a/src/share/vm/opto/callGenerator.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/opto/callGenerator.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -63,8 +63,9 @@
   virtual bool      is_virtual() const          { return false; }
   // is_deferred: The decision whether to inline or not is deferred.
   virtual bool      is_deferred() const         { return false; }
-  // is_predicted: Uses an explicit check against a predicted type.
-  virtual bool      is_predicted() const        { return false; }
+  // is_predicated: Uses an explicit check (predicate).
+  virtual bool      is_predicated() const       { return false; }
+  virtual int       predicates_count() const    { return 0; }
   // is_trap: Does not return to the caller.  (E.g., uncommon trap.)
   virtual bool      is_trap() const             { return false; }
   // does_virtual_dispatch: Should try inlining as normal method first.
@@ -157,9 +158,9 @@
   // Registry for intrinsics:
   static CallGenerator* for_intrinsic(ciMethod* m);
   static void register_intrinsic(ciMethod* m, CallGenerator* cg);
-  static CallGenerator* for_predicted_intrinsic(CallGenerator* intrinsic,
-                                                CallGenerator* cg);
-  virtual Node* generate_predicate(JVMState* jvms) { return NULL; };
+  static CallGenerator* for_predicated_intrinsic(CallGenerator* intrinsic,
+                                                 CallGenerator* cg);
+  virtual Node* generate_predicate(JVMState* jvms, int predicate) { return NULL; };
 
   virtual void print_inlining_late(const char* msg) { ShouldNotReachHere(); }
 
--- a/src/share/vm/opto/doCall.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/opto/doCall.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -115,12 +115,12 @@
   if (allow_inline && allow_intrinsics) {
     CallGenerator* cg = find_intrinsic(callee, call_does_dispatch);
     if (cg != NULL) {
-      if (cg->is_predicted()) {
+      if (cg->is_predicated()) {
         // Code without intrinsic but, hopefully, inlined.
         CallGenerator* inline_cg = this->call_generator(callee,
               vtable_index, call_does_dispatch, jvms, allow_inline, prof_factor, speculative_receiver_type, false);
         if (inline_cg != NULL) {
-          cg = CallGenerator::for_predicted_intrinsic(cg, inline_cg);
+          cg = CallGenerator::for_predicated_intrinsic(cg, inline_cg);
         }
       }
 
--- a/src/share/vm/opto/escape.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/opto/escape.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -938,7 +938,13 @@
                   strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
-                  strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0)
+                  strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "sha1_implCompress") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "sha1_implCompressMB") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "sha256_implCompress") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "sha256_implCompressMB") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "sha512_implCompress") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "sha512_implCompressMB") == 0)
                   ))) {
             call->dump();
             fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));
--- a/src/share/vm/opto/graphKit.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/opto/graphKit.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -2435,23 +2435,24 @@
     Node* new_slice = mms.memory2();
     if (old_slice != new_slice) {
       PhiNode* phi;
-      if (new_slice->is_Phi() && new_slice->as_Phi()->region() == region) {
-        phi = new_slice->as_Phi();
-        #ifdef ASSERT
-        if (old_slice->is_Phi() && old_slice->as_Phi()->region() == region)
-          old_slice = old_slice->in(new_path);
-        // Caller is responsible for ensuring that any pre-existing
-        // phis are already aware of old memory.
-        int old_path = (new_path > 1) ? 1 : 2;  // choose old_path != new_path
-        assert(phi->in(old_path) == old_slice, "pre-existing phis OK");
-        #endif
-        mms.set_memory(phi);
+      if (old_slice->is_Phi() && old_slice->as_Phi()->region() == region) {
+        if (mms.is_empty()) {
+          // clone base memory Phi's inputs for this memory slice
+          assert(old_slice == mms.base_memory(), "sanity");
+          phi = PhiNode::make(region, NULL, Type::MEMORY, mms.adr_type(C));
+          _gvn.set_type(phi, Type::MEMORY);
+          for (uint i = 1; i < phi->req(); i++) {
+            phi->init_req(i, old_slice->in(i));
+          }
+        } else {
+          phi = old_slice->as_Phi(); // Phi was generated already
+        }
       } else {
         phi = PhiNode::make(region, old_slice, Type::MEMORY, mms.adr_type(C));
         _gvn.set_type(phi, Type::MEMORY);
-        phi->set_req(new_path, new_slice);
-        mms.set_memory(_gvn.transform(phi));  // assume it is complete
       }
+      phi->set_req(new_path, new_slice);
+      mms.set_memory(phi);
     }
   }
 }
--- a/src/share/vm/opto/lcm.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/opto/lcm.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -484,7 +484,9 @@
         iop == Op_CreateEx ||   // Create-exception must start block
         iop == Op_CheckCastPP
         ) {
-      worklist.map(i,worklist.pop());
+      // select the node n
+      // remove n from worklist and retain the order of remaining nodes
+      worklist.remove((uint)i);
       return n;
     }
 
@@ -570,7 +572,9 @@
   assert(idx >= 0, "index should be set");
   Node *n = worklist[(uint)idx];      // Get the winner
 
-  worklist.map((uint)idx, worklist.pop());     // Compress worklist
+  // select the node n
+  // remove n from worklist and retain the order of remaining nodes
+  worklist.remove((uint)idx);
   return n;
 }
 
--- a/src/share/vm/opto/library_call.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/opto/library_call.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -46,25 +46,28 @@
  public:
  private:
   bool             _is_virtual;
-  bool             _is_predicted;
   bool             _does_virtual_dispatch;
+  int8_t           _predicates_count;  // Intrinsic is predicated by several conditions
+  int8_t           _last_predicate; // Last generated predicate
   vmIntrinsics::ID _intrinsic_id;
 
  public:
-  LibraryIntrinsic(ciMethod* m, bool is_virtual, bool is_predicted, bool does_virtual_dispatch, vmIntrinsics::ID id)
+  LibraryIntrinsic(ciMethod* m, bool is_virtual, int predicates_count, bool does_virtual_dispatch, vmIntrinsics::ID id)
     : InlineCallGenerator(m),
       _is_virtual(is_virtual),
-      _is_predicted(is_predicted),
       _does_virtual_dispatch(does_virtual_dispatch),
+      _predicates_count((int8_t)predicates_count),
+      _last_predicate((int8_t)-1),
       _intrinsic_id(id)
   {
   }
   virtual bool is_intrinsic() const { return true; }
   virtual bool is_virtual()   const { return _is_virtual; }
-  virtual bool is_predicted()   const { return _is_predicted; }
+  virtual bool is_predicated() const { return _predicates_count > 0; }
+  virtual int  predicates_count() const { return _predicates_count; }
   virtual bool does_virtual_dispatch()   const { return _does_virtual_dispatch; }
   virtual JVMState* generate(JVMState* jvms, Parse* parent_parser);
-  virtual Node* generate_predicate(JVMState* jvms);
+  virtual Node* generate_predicate(JVMState* jvms, int predicate);
   vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; }
 };
 
@@ -107,8 +110,8 @@
   vmIntrinsics::ID  intrinsic_id() const { return _intrinsic->intrinsic_id(); }
   ciMethod*         callee()    const    { return _intrinsic->method(); }
 
-  bool try_to_inline();
-  Node* try_to_predicate();
+  bool  try_to_inline(int predicate);
+  Node* try_to_predicate(int predicate);
 
   void push_result() {
     // Push the result onto the stack.
@@ -307,6 +310,14 @@
   Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
   Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
   Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
+  bool inline_sha_implCompress(vmIntrinsics::ID id);
+  bool inline_digestBase_implCompressMB(int predicate);
+  bool inline_sha_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass_SHA,
+                                 bool long_state, address stubAddr, const char *stubName,
+                                 Node* src_start, Node* ofs, Node* limit);
+  Node* get_state_from_sha_object(Node *sha_object);
+  Node* get_state_from_sha5_object(Node *sha_object);
+  Node* inline_digestBase_implCompressMB_predicate(int predicate);
   bool inline_encodeISOArray();
   bool inline_updateCRC32();
   bool inline_updateBytesCRC32();
@@ -367,7 +378,7 @@
     }
   }
 
-  bool is_predicted = false;
+  int predicates = 0;
   bool does_virtual_dispatch = false;
 
   switch (id) {
@@ -508,7 +519,24 @@
   case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
     if (!UseAESIntrinsics) return NULL;
     // these two require the predicated logic
-    is_predicted = true;
+    predicates = 1;
+    break;
+
+  case vmIntrinsics::_sha_implCompress:
+    if (!UseSHA1Intrinsics) return NULL;
+    break;
+
+  case vmIntrinsics::_sha2_implCompress:
+    if (!UseSHA256Intrinsics) return NULL;
+    break;
+
+  case vmIntrinsics::_sha5_implCompress:
+    if (!UseSHA512Intrinsics) return NULL;
+    break;
+
+  case vmIntrinsics::_digestBase_implCompressMB:
+    if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) return NULL;
+    predicates = 3;
     break;
 
   case vmIntrinsics::_updateCRC32:
@@ -577,7 +605,7 @@
     if (!InlineUnsafeOps)  return NULL;
   }
 
-  return new LibraryIntrinsic(m, is_virtual, is_predicted, does_virtual_dispatch, (vmIntrinsics::ID) id);
+  return new LibraryIntrinsic(m, is_virtual, predicates, does_virtual_dispatch, (vmIntrinsics::ID) id);
 }
 
 //----------------------register_library_intrinsics-----------------------
@@ -601,7 +629,7 @@
   const int bci    = kit.bci();
 
   // Try to inline the intrinsic.
-  if (kit.try_to_inline()) {
+  if (kit.try_to_inline(_last_predicate)) {
     if (C->print_intrinsics() || C->print_inlining()) {
       C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
     }
@@ -634,12 +662,13 @@
   return NULL;
 }
 
-Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) {
+Node* LibraryIntrinsic::generate_predicate(JVMState* jvms, int predicate) {
   LibraryCallKit kit(jvms, this);
   Compile* C = kit.C;
   int nodes = C->unique();
+  _last_predicate = predicate;
 #ifndef PRODUCT
-  assert(is_predicted(), "sanity");
+  assert(is_predicated() && predicate < predicates_count(), "sanity");
   if ((C->print_intrinsics() || C->print_inlining()) && Verbose) {
     char buf[1000];
     const char* str = vmIntrinsics::short_name_as_C_string(intrinsic_id(), buf, sizeof(buf));
@@ -649,10 +678,10 @@
   ciMethod* callee = kit.callee();
   const int bci    = kit.bci();
 
-  Node* slow_ctl = kit.try_to_predicate();
+  Node* slow_ctl = kit.try_to_predicate(predicate);
   if (!kit.failing()) {
     if (C->print_intrinsics() || C->print_inlining()) {
-      C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
+      C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual, predicate)" : "(intrinsic, predicate)");
     }
     C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked);
     if (C->log()) {
@@ -681,7 +710,7 @@
   return NULL;
 }
 
-bool LibraryCallKit::try_to_inline() {
+bool LibraryCallKit::try_to_inline(int predicate) {
   // Handle symbolic names for otherwise undistinguished boolean switches:
   const bool is_store       = true;
   const bool is_native_ptr  = true;
@@ -875,6 +904,14 @@
   case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
     return inline_cipherBlockChaining_AESCrypt(intrinsic_id());
 
+  case vmIntrinsics::_sha_implCompress:
+  case vmIntrinsics::_sha2_implCompress:
+  case vmIntrinsics::_sha5_implCompress:
+    return inline_sha_implCompress(intrinsic_id());
+
+  case vmIntrinsics::_digestBase_implCompressMB:
+    return inline_digestBase_implCompressMB(predicate);
+
   case vmIntrinsics::_encodeISOArray:
     return inline_encodeISOArray();
 
@@ -898,7 +935,7 @@
   }
 }
 
-Node* LibraryCallKit::try_to_predicate() {
+Node* LibraryCallKit::try_to_predicate(int predicate) {
   if (!jvms()->has_method()) {
     // Root JVMState has a null method.
     assert(map()->memory()->Opcode() == Op_Parm, "");
@@ -912,6 +949,8 @@
     return inline_cipherBlockChaining_AESCrypt_predicate(false);
   case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
     return inline_cipherBlockChaining_AESCrypt_predicate(true);
+  case vmIntrinsics::_digestBase_implCompressMB:
+    return inline_digestBase_implCompressMB_predicate(predicate);
 
   default:
     // If you get here, it may be that someone has added a new intrinsic
@@ -5866,7 +5905,12 @@
   BasicType bt = field->layout_type();
 
   // Build the resultant type of the load
-  const Type *type = TypeOopPtr::make_from_klass(field_klass->as_klass());
+  const Type *type;
+  if (bt == T_OBJECT) {
+    type = TypeOopPtr::make_from_klass(field_klass->as_klass());
+  } else {
+    type = Type::get_const_basic_type(bt);
+  }
 
   // Build the load.
   Node* loadedField = make_load(NULL, adr, type, bt, adr_type, MemNode::unordered, is_vol);
@@ -5996,7 +6040,7 @@
   assert(tinst != NULL, "CBC obj is null");
   assert(tinst->klass()->is_loaded(), "CBC obj is not loaded");
   ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
-  if (!klass_AESCrypt->is_loaded()) return false;
+  assert(klass_AESCrypt->is_loaded(), "predicate checks that this class is loaded");
 
   ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
   const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_AESCrypt);
@@ -6071,11 +6115,8 @@
 //    note cipher==plain is more conservative than the original java code but that's OK
 //
 Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting) {
-  // First, check receiver for NULL since it is virtual method.
+  // The receiver was checked for NULL already.
   Node* objCBC = argument(0);
-  objCBC = null_check(objCBC);
-
-  if (stopped()) return NULL; // Always NULL
 
   // Load embeddedCipher field of CipherBlockChaining object.
   Node* embeddedCipherObj = load_field_from_object(objCBC, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false);
@@ -6122,3 +6163,258 @@
   record_for_igvn(region);
   return _gvn.transform(region);
 }
+
+//------------------------------inline_sha_implCompress-----------------------
+//
+// Calculate SHA (i.e., SHA-1) for single-block byte[] array.
+// void com.sun.security.provider.SHA.implCompress(byte[] buf, int ofs)
+//
+// Calculate SHA2 (i.e., SHA-244 or SHA-256) for single-block byte[] array.
+// void com.sun.security.provider.SHA2.implCompress(byte[] buf, int ofs)
+//
+// Calculate SHA5 (i.e., SHA-384 or SHA-512) for single-block byte[] array.
+// void com.sun.security.provider.SHA5.implCompress(byte[] buf, int ofs)
+//
+bool LibraryCallKit::inline_sha_implCompress(vmIntrinsics::ID id) {
+  assert(callee()->signature()->size() == 2, "sha_implCompress has 2 parameters");
+
+  Node* sha_obj = argument(0);
+  Node* src     = argument(1); // type oop
+  Node* ofs     = argument(2); // type int
+
+  const Type* src_type = src->Value(&_gvn);
+  const TypeAryPtr* top_src = src_type->isa_aryptr();
+  if (top_src  == NULL || top_src->klass()  == NULL) {
+    // failed array check
+    return false;
+  }
+  // Figure out the size and type of the elements we will be copying.
+  BasicType src_elem = src_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  if (src_elem != T_BYTE) {
+    return false;
+  }
+  // 'src_start' points to src array + offset
+  Node* src_start = array_element_address(src, ofs, src_elem);
+  Node* state = NULL;
+  address stubAddr;
+  const char *stubName;
+
+  switch(id) {
+  case vmIntrinsics::_sha_implCompress:
+    assert(UseSHA1Intrinsics, "need SHA1 instruction support");
+    state = get_state_from_sha_object(sha_obj);
+    stubAddr = StubRoutines::sha1_implCompress();
+    stubName = "sha1_implCompress";
+    break;
+  case vmIntrinsics::_sha2_implCompress:
+    assert(UseSHA256Intrinsics, "need SHA256 instruction support");
+    state = get_state_from_sha_object(sha_obj);
+    stubAddr = StubRoutines::sha256_implCompress();
+    stubName = "sha256_implCompress";
+    break;
+  case vmIntrinsics::_sha5_implCompress:
+    assert(UseSHA512Intrinsics, "need SHA512 instruction support");
+    state = get_state_from_sha5_object(sha_obj);
+    stubAddr = StubRoutines::sha512_implCompress();
+    stubName = "sha512_implCompress";
+    break;
+  default:
+    fatal_unexpected_iid(id);
+    return false;
+  }
+  if (state == NULL) return false;
+
+  // Call the stub.
+  Node* call = make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::sha_implCompress_Type(),
+                                 stubAddr, stubName, TypePtr::BOTTOM,
+                                 src_start, state);
+
+  return true;
+}
+
+//------------------------------inline_digestBase_implCompressMB-----------------------
+//
+// Calculate SHA/SHA2/SHA5 for multi-block byte[] array.
+// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
+//
+bool LibraryCallKit::inline_digestBase_implCompressMB(int predicate) {
+  assert(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics,
+         "need SHA1/SHA256/SHA512 instruction support");
+  assert((uint)predicate < 3, "sanity");
+  assert(callee()->signature()->size() == 3, "digestBase_implCompressMB has 3 parameters");
+
+  Node* digestBase_obj = argument(0); // The receiver was checked for NULL already.
+  Node* src            = argument(1); // byte[] array
+  Node* ofs            = argument(2); // type int
+  Node* limit          = argument(3); // type int
+
+  const Type* src_type = src->Value(&_gvn);
+  const TypeAryPtr* top_src = src_type->isa_aryptr();
+  if (top_src  == NULL || top_src->klass()  == NULL) {
+    // failed array check
+    return false;
+  }
+  // Figure out the size and type of the elements we will be copying.
+  BasicType src_elem = src_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  if (src_elem != T_BYTE) {
+    return false;
+  }
+  // 'src_start' points to src array + offset
+  Node* src_start = array_element_address(src, ofs, src_elem);
+
+  const char* klass_SHA_name = NULL;
+  const char* stub_name = NULL;
+  address     stub_addr = NULL;
+  bool        long_state = false;
+
+  switch (predicate) {
+  case 0:
+    if (UseSHA1Intrinsics) {
+      klass_SHA_name = "sun/security/provider/SHA";
+      stub_name = "sha1_implCompressMB";
+      stub_addr = StubRoutines::sha1_implCompressMB();
+    }
+    break;
+  case 1:
+    if (UseSHA256Intrinsics) {
+      klass_SHA_name = "sun/security/provider/SHA2";
+      stub_name = "sha256_implCompressMB";
+      stub_addr = StubRoutines::sha256_implCompressMB();
+    }
+    break;
+  case 2:
+    if (UseSHA512Intrinsics) {
+      klass_SHA_name = "sun/security/provider/SHA5";
+      stub_name = "sha512_implCompressMB";
+      stub_addr = StubRoutines::sha512_implCompressMB();
+      long_state = true;
+    }
+    break;
+  default:
+    fatal(err_msg_res("unknown SHA intrinsic predicate: %d", predicate));
+  }
+  if (klass_SHA_name != NULL) {
+    // get DigestBase klass to lookup for SHA klass
+    const TypeInstPtr* tinst = _gvn.type(digestBase_obj)->isa_instptr();
+    assert(tinst != NULL, "digestBase_obj is not instance???");
+    assert(tinst->klass()->is_loaded(), "DigestBase is not loaded");
+
+    ciKlass* klass_SHA = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make(klass_SHA_name));
+    assert(klass_SHA->is_loaded(), "predicate checks that this class is loaded");
+    ciInstanceKlass* instklass_SHA = klass_SHA->as_instance_klass();
+    return inline_sha_implCompressMB(digestBase_obj, instklass_SHA, long_state, stub_addr, stub_name, src_start, ofs, limit);
+  }
+  return false;
+}
+//------------------------------inline_sha_implCompressMB-----------------------
+bool LibraryCallKit::inline_sha_implCompressMB(Node* digestBase_obj, ciInstanceKlass* instklass_SHA,
+                                               bool long_state, address stubAddr, const char *stubName,
+                                               Node* src_start, Node* ofs, Node* limit) {
+  const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_SHA);
+  const TypeOopPtr* xtype = aklass->as_instance_type();
+  Node* sha_obj = new (C) CheckCastPPNode(control(), digestBase_obj, xtype);
+  sha_obj = _gvn.transform(sha_obj);
+
+  Node* state;
+  if (long_state) {
+    state = get_state_from_sha5_object(sha_obj);
+  } else {
+    state = get_state_from_sha_object(sha_obj);
+  }
+  if (state == NULL) return false;
+
+  // Call the stub.
+  Node* call = make_runtime_call(RC_LEAF|RC_NO_FP,
+                                 OptoRuntime::digestBase_implCompressMB_Type(),
+                                 stubAddr, stubName, TypePtr::BOTTOM,
+                                 src_start, state, ofs, limit);
+  // return ofs (int)
+  Node* result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms));
+  set_result(result);
+
+  return true;
+}
+
+//------------------------------get_state_from_sha_object-----------------------
+Node * LibraryCallKit::get_state_from_sha_object(Node *sha_object) {
+  Node* sha_state = load_field_from_object(sha_object, "state", "[I", /*is_exact*/ false);
+  assert (sha_state != NULL, "wrong version of sun.security.provider.SHA/SHA2");
+  if (sha_state == NULL) return (Node *) NULL;
+
+  // now have the array, need to get the start address of the state array
+  Node* state = array_element_address(sha_state, intcon(0), T_INT);
+  return state;
+}
+
+//------------------------------get_state_from_sha5_object-----------------------
+Node * LibraryCallKit::get_state_from_sha5_object(Node *sha_object) {
+  Node* sha_state = load_field_from_object(sha_object, "state", "[J", /*is_exact*/ false);
+  assert (sha_state != NULL, "wrong version of sun.security.provider.SHA5");
+  if (sha_state == NULL) return (Node *) NULL;
+
+  // now have the array, need to get the start address of the state array
+  Node* state = array_element_address(sha_state, intcon(0), T_LONG);
+  return state;
+}
+
+//----------------------------inline_digestBase_implCompressMB_predicate----------------------------
+// Return node representing slow path of predicate check.
+// the pseudo code we want to emulate with this predicate is:
+//    if (digestBaseObj instanceof SHA/SHA2/SHA5) do_intrinsic, else do_javapath
+//
+Node* LibraryCallKit::inline_digestBase_implCompressMB_predicate(int predicate) {
+  assert(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics,
+         "need SHA1/SHA256/SHA512 instruction support");
+  assert((uint)predicate < 3, "sanity");
+
+  // The receiver was checked for NULL already.
+  Node* digestBaseObj = argument(0);
+
+  // get DigestBase klass for instanceOf check
+  const TypeInstPtr* tinst = _gvn.type(digestBaseObj)->isa_instptr();
+  assert(tinst != NULL, "digestBaseObj is null");
+  assert(tinst->klass()->is_loaded(), "DigestBase is not loaded");
+
+  const char* klass_SHA_name = NULL;
+  switch (predicate) {
+  case 0:
+    if (UseSHA1Intrinsics) {
+      // we want to do an instanceof comparison against the SHA class
+      klass_SHA_name = "sun/security/provider/SHA";
+    }
+    break;
+  case 1:
+    if (UseSHA256Intrinsics) {
+      // we want to do an instanceof comparison against the SHA2 class
+      klass_SHA_name = "sun/security/provider/SHA2";
+    }
+    break;
+  case 2:
+    if (UseSHA512Intrinsics) {
+      // we want to do an instanceof comparison against the SHA5 class
+      klass_SHA_name = "sun/security/provider/SHA5";
+    }
+    break;
+  default:
+    fatal(err_msg_res("unknown SHA intrinsic predicate: %d", predicate));
+  }
+
+  ciKlass* klass_SHA = NULL;
+  if (klass_SHA_name != NULL) {
+    klass_SHA = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make(klass_SHA_name));
+  }
+  if ((klass_SHA == NULL) || !klass_SHA->is_loaded()) {
+    // if none of SHA/SHA2/SHA5 is loaded, we never take the intrinsic fast path
+    Node* ctrl = control();
+    set_control(top()); // no intrinsic path
+    return ctrl;
+  }
+  ciInstanceKlass* instklass_SHA = klass_SHA->as_instance_klass();
+
+  Node* instofSHA = gen_instanceof(digestBaseObj, makecon(TypeKlassPtr::make(instklass_SHA)));
+  Node* cmp_instof = _gvn.transform(new (C) CmpINode(instofSHA, intcon(1)));
+  Node* bool_instof = _gvn.transform(new (C) BoolNode(cmp_instof, BoolTest::ne));
+  Node* instof_false = generate_guard(bool_instof, NULL, PROB_MIN);
+
+  return instof_false;  // even if it is NULL
+}
--- a/src/share/vm/opto/output.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/opto/output.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -783,9 +783,10 @@
     // grow downwards in all implementations.
     // (If, on some machine, the interpreter's Java locals or stack
     // were to grow upwards, the embedded doubles would be word-swapped.)
-    jint   *dp = (jint*)&d;
-    array->append(new ConstantIntValue(dp[1]));
-    array->append(new ConstantIntValue(dp[0]));
+    jlong_accessor acc;
+    acc.long_value = jlong_cast(d);
+    array->append(new ConstantIntValue(acc.words[1]));
+    array->append(new ConstantIntValue(acc.words[0]));
 #endif
     break;
   }
@@ -802,9 +803,10 @@
     // grow downwards in all implementations.
     // (If, on some machine, the interpreter's Java locals or stack
     // were to grow upwards, the embedded doubles would be word-swapped.)
-    jint *dp = (jint*)&d;
-    array->append(new ConstantIntValue(dp[1]));
-    array->append(new ConstantIntValue(dp[0]));
+    jlong_accessor acc;
+    acc.long_value = d;
+    array->append(new ConstantIntValue(acc.words[1]));
+    array->append(new ConstantIntValue(acc.words[0]));
 #endif
     break;
   }
--- a/src/share/vm/opto/parse1.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/opto/parse1.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -565,12 +565,13 @@
     set_map(entry_map);
     do_method_entry();
   }
-  if (depth() == 1) {
+
+  if (depth() == 1 && !failing()) {
     // Add check to deoptimize the nmethod if RTM state was changed
     rtm_deopt();
   }
 
-  // Check for bailouts during method entry.
+  // Check for bailouts during method entry or RTM state check setup.
   if (failing()) {
     if (log)  log->done("parse");
     C->set_default_node_notes(caller_nn);
--- a/src/share/vm/opto/runtime.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/opto/runtime.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -898,6 +898,50 @@
   return TypeFunc::make(domain, range);
 }
 
+/*
+ * void implCompress(byte[] buf, int ofs)
+ */
+const TypeFunc* OptoRuntime::sha_implCompress_Type() {
+  // create input type (domain)
+  int num_args = 2;
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL; // buf
+  fields[argp++] = TypePtr::NOTNULL; // state
+  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // no result type needed
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = NULL; // void
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+  return TypeFunc::make(domain, range);
+}
+
+/*
+ * int implCompressMultiBlock(byte[] b, int ofs, int limit)
+ */
+const TypeFunc* OptoRuntime::digestBase_implCompressMB_Type() {
+  // create input type (domain)
+  int num_args = 4;
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL; // buf
+  fields[argp++] = TypePtr::NOTNULL; // state
+  fields[argp++] = TypeInt::INT;     // ofs
+  fields[argp++] = TypeInt::INT;     // limit
+  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // returning ofs (int)
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypeInt::INT; // ofs
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
+  return TypeFunc::make(domain, range);
+}
+
 //------------- Interpreter state access for on stack replacement
 const TypeFunc* OptoRuntime::osr_end_Type() {
   // create input type (domain)
--- a/src/share/vm/opto/runtime.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/opto/runtime.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -300,6 +300,9 @@
   static const TypeFunc* aescrypt_block_Type();
   static const TypeFunc* cipherBlockChaining_aescrypt_Type();
 
+  static const TypeFunc* sha_implCompress_Type();
+  static const TypeFunc* digestBase_implCompressMB_Type();
+
   static const TypeFunc* updateBytesCRC32_Type();
 
   // leaf on stack replacement interpreter accessor types
--- a/src/share/vm/opto/superword.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/opto/superword.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1374,6 +1374,20 @@
       if (n->is_Load()) {
         Node* ctl = n->in(MemNode::Control);
         Node* mem = first->in(MemNode::Memory);
+        SWPointer p1(n->as_Mem(), this);
+        // Identify the memory dependency for the new loadVector node by
+        // walking up through memory chain.
+        // This is done to give flexibility to the new loadVector node so that
+        // it can move above independent storeVector nodes.
+        while (mem->is_StoreVector()) {
+          SWPointer p2(mem->as_Mem(), this);
+          int cmp = p1.cmp(p2);
+          if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) {
+            mem = mem->in(MemNode::Memory);
+          } else {
+            break; // dependent memory
+          }
+        }
         Node* adr = low_adr->in(MemNode::Address);
         const TypePtr* atyp = n->adr_type();
         vn = LoadVectorNode::make(C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));
--- a/src/share/vm/prims/jni.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/prims/jni.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -5064,6 +5064,7 @@
 #if INCLUDE_ALL_GCS
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #endif
+#include "memory/guardedMemory.hpp"
 #include "utilities/quickSort.hpp"
 #include "utilities/ostream.hpp"
 #if INCLUDE_VM_STRUCTS
@@ -5082,9 +5083,11 @@
 void TestMetachunk_test();
 void TestVirtualSpaceNode_test();
 void TestNewSize_test();
+void TestKlass_test();
 #if INCLUDE_ALL_GCS
 void TestOldFreeSpaceCalculation_test();
 void TestG1BiasedArray_test();
+void TestBufferingOopClosure_test();
 void TestCodeCacheRemSet_test();
 #endif
 
@@ -5102,9 +5105,11 @@
     run_unit_test(arrayOopDesc::test_max_array_length());
     run_unit_test(CollectedHeap::test_is_in());
     run_unit_test(QuickSort::test_quick_sort());
+    run_unit_test(GuardedMemory::test_guarded_memory());
     run_unit_test(AltHashing::test_alt_hash());
     run_unit_test(test_loggc_filename());
     run_unit_test(TestNewSize_test());
+    run_unit_test(TestKlass_test());
 #if INCLUDE_VM_STRUCTS
     run_unit_test(VMStructs::test());
 #endif
@@ -5112,6 +5117,7 @@
     run_unit_test(TestOldFreeSpaceCalculation_test());
     run_unit_test(TestG1BiasedArray_test());
     run_unit_test(HeapRegionRemSet::test_prt());
+    run_unit_test(TestBufferingOopClosure_test());
     run_unit_test(TestCodeCacheRemSet_test());
 #endif
     tty->print_cr("All internal VM tests passed");
--- a/src/share/vm/prims/jniCheck.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/prims/jniCheck.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
+#include "memory/guardedMemory.hpp"
 #include "oops/instanceKlass.hpp"
 #include "oops/oop.inline.hpp"
 #include "oops/symbol.hpp"
@@ -323,6 +324,74 @@
   }
 }
 
+/*
+ * Copy and wrap array elements for bounds checking.
+ * Remember the original elements (GuardedMemory::get_tag())
+ */
+static void* check_jni_wrap_copy_array(JavaThread* thr, jarray array,
+    void* orig_elements) {
+  void* result;
+  IN_VM(
+    oop a = JNIHandles::resolve_non_null(array);
+    size_t len = arrayOop(a)->length() <<
+        TypeArrayKlass::cast(a->klass())->log2_element_size();
+    result = GuardedMemory::wrap_copy(orig_elements, len, orig_elements);
+  )
+  return result;
+}
+
+static void* check_wrapped_array(JavaThread* thr, const char* fn_name,
+    void* obj, void* carray, size_t* rsz) {
+  if (carray == NULL) {
+    tty->print_cr("%s: elements vector NULL" PTR_FORMAT, fn_name, p2i(obj));
+    NativeReportJNIFatalError(thr, "Elements vector NULL");
+  }
+  GuardedMemory guarded(carray);
+  void* orig_result = guarded.get_tag();
+  if (!guarded.verify_guards()) {
+    tty->print_cr("ReleasePrimitiveArrayCritical: release array failed bounds "
+        "check, incorrect pointer returned ? array: " PTR_FORMAT " carray: "
+        PTR_FORMAT, p2i(obj), p2i(carray));
+    guarded.print_on(tty);
+    NativeReportJNIFatalError(thr, "ReleasePrimitiveArrayCritical: "
+        "failed bounds check");
+  }
+  if (orig_result == NULL) {
+    tty->print_cr("ReleasePrimitiveArrayCritical: unrecognized elements. array: "
+        PTR_FORMAT " carray: " PTR_FORMAT, p2i(obj), p2i(carray));
+    guarded.print_on(tty);
+    NativeReportJNIFatalError(thr, "ReleasePrimitiveArrayCritical: "
+        "unrecognized elements");
+  }
+  if (rsz != NULL) {
+    *rsz = guarded.get_user_size();
+  }
+  return orig_result;
+}
+
+static void* check_wrapped_array_release(JavaThread* thr, const char* fn_name,
+    void* obj, void* carray, jint mode) {
+  size_t sz;
+  void* orig_result = check_wrapped_array(thr, fn_name, obj, carray, &sz);
+  switch (mode) {
+  case 0:
+    memcpy(orig_result, carray, sz);
+    GuardedMemory::free_copy(carray);
+    break;
+  case JNI_COMMIT:
+    memcpy(orig_result, carray, sz);
+    break;
+  case JNI_ABORT:
+    GuardedMemory::free_copy(carray);
+    break;
+  default:
+    tty->print_cr("%s: Unrecognized mode %i releasing array "
+        PTR_FORMAT " elements " PTR_FORMAT, fn_name, mode, p2i(obj), p2i(carray));
+    NativeReportJNIFatalError(thr, "Unrecognized array release mode");
+  }
+  return orig_result;
+}
+
 oop jniCheck::validate_handle(JavaThread* thr, jobject obj) {
   if (JNIHandles::is_frame_handle(thr, obj) ||
       JNIHandles::is_local_handle(thr, obj) ||
@@ -1314,7 +1383,7 @@
 JNI_END
 
 // Arbitrary (but well-known) tag
-const jint STRING_TAG = 0x47114711;
+const void* STRING_TAG = (void*)0x47114711;
 
 JNI_ENTRY_CHECKED(const jchar *,
   checked_jni_GetStringChars(JNIEnv *env,
@@ -1324,21 +1393,22 @@
     IN_VM(
       checkString(thr, str);
     )
-    jchar* newResult = NULL;
+    jchar* new_result = NULL;
     const jchar *result = UNCHECKED()->GetStringChars(env,str,isCopy);
     assert (isCopy == NULL || *isCopy == JNI_TRUE, "GetStringChars didn't return a copy as expected");
     if (result != NULL) {
       size_t len = UNCHECKED()->GetStringLength(env,str) + 1; // + 1 for NULL termination
-      jint* tagLocation = (jint*) AllocateHeap(len * sizeof(jchar) + sizeof(jint), mtInternal);
-      *tagLocation = STRING_TAG;
-      newResult = (jchar*) (tagLocation + 1);
-      memcpy(newResult, result, len * sizeof(jchar));
+      len *= sizeof(jchar);
+      new_result = (jchar*) GuardedMemory::wrap_copy(result, len, STRING_TAG);
+      if (new_result == NULL) {
+        vm_exit_out_of_memory(len, OOM_MALLOC_ERROR, "checked_jni_GetStringChars");
+      }
       // Avoiding call to UNCHECKED()->ReleaseStringChars() since that will fire unexpected dtrace probes
       // Note that the dtrace arguments for the allocated memory will not match up with this solution.
       FreeHeap((char*)result);
     }
     functionExit(env);
-    return newResult;
+    return new_result;
 JNI_END
 
 JNI_ENTRY_CHECKED(void,
@@ -1354,11 +1424,23 @@
        UNCHECKED()->ReleaseStringChars(env,str,chars);
     }
     else {
-       jint* tagLocation = ((jint*) chars) - 1;
-       if (*tagLocation != STRING_TAG) {
-          NativeReportJNIFatalError(thr, "ReleaseStringChars called on something not allocated by GetStringChars");
-       }
-       UNCHECKED()->ReleaseStringChars(env,str,(const jchar*)tagLocation);
+      GuardedMemory guarded((void*)chars);
+      if (!guarded.verify_guards()) {
+        tty->print_cr("ReleaseStringChars: release chars failed bounds check. "
+            "string: " PTR_FORMAT " chars: " PTR_FORMAT, p2i(str), p2i(chars));
+        guarded.print_on(tty);
+        NativeReportJNIFatalError(thr, "ReleaseStringChars: "
+            "release chars failed bounds check.");
+      }
+      if (guarded.get_tag() != STRING_TAG) {
+        tty->print_cr("ReleaseStringChars: called on something not allocated "
+            "by GetStringChars. string: " PTR_FORMAT " chars: " PTR_FORMAT,
+            p2i(str), p2i(chars));
+        NativeReportJNIFatalError(thr, "ReleaseStringChars called on something "
+            "not allocated by GetStringChars");
+      }
+       UNCHECKED()->ReleaseStringChars(env, str,
+           (const jchar*) guarded.release_for_freeing());
     }
     functionExit(env);
 JNI_END
@@ -1385,7 +1467,7 @@
 JNI_END
 
 // Arbitrary (but well-known) tag - different than GetStringChars
-const jint STRING_UTF_TAG = 0x48124812;
+const void* STRING_UTF_TAG = (void*) 0x48124812;
 
 JNI_ENTRY_CHECKED(const char *,
   checked_jni_GetStringUTFChars(JNIEnv *env,
@@ -1395,21 +1477,21 @@
     IN_VM(
       checkString(thr, str);
     )
-    char* newResult = NULL;
+    char* new_result = NULL;
     const char *result = UNCHECKED()->GetStringUTFChars(env,str,isCopy);
     assert (isCopy == NULL || *isCopy == JNI_TRUE, "GetStringUTFChars didn't return a copy as expected");
     if (result != NULL) {
       size_t len = strlen(result) + 1; // + 1 for NULL termination
-      jint* tagLocation = (jint*) AllocateHeap(len + sizeof(jint), mtInternal);
-      *tagLocation = STRING_UTF_TAG;
-      newResult = (char*) (tagLocation + 1);
-      strcpy(newResult, result);
+      new_result = (char*) GuardedMemory::wrap_copy(result, len, STRING_UTF_TAG);
+      if (new_result == NULL) {
+        vm_exit_out_of_memory(len, OOM_MALLOC_ERROR, "checked_jni_GetStringUTFChars");
+      }
       // Avoiding call to UNCHECKED()->ReleaseStringUTFChars() since that will fire unexpected dtrace probes
       // Note that the dtrace arguments for the allocated memory will not match up with this solution.
       FreeHeap((char*)result, mtInternal);
     }
     functionExit(env);
-    return newResult;
+    return new_result;
 JNI_END
 
 JNI_ENTRY_CHECKED(void,
@@ -1425,11 +1507,23 @@
        UNCHECKED()->ReleaseStringUTFChars(env,str,chars);
     }
     else {
-       jint* tagLocation = ((jint*) chars) - 1;
-       if (*tagLocation != STRING_UTF_TAG) {
-          NativeReportJNIFatalError(thr, "ReleaseStringUTFChars called on something not allocated by GetStringUTFChars");
-       }
-       UNCHECKED()->ReleaseStringUTFChars(env,str,(const char*)tagLocation);
+      GuardedMemory guarded((void*)chars);
+      if (!guarded.verify_guards()) {
+        tty->print_cr("ReleaseStringUTFChars: release chars failed bounds check. "
+            "string: " PTR_FORMAT " chars: " PTR_FORMAT, p2i(str), p2i(chars));
+        guarded.print_on(tty);
+        NativeReportJNIFatalError(thr, "ReleaseStringUTFChars: "
+            "release chars failed bounds check.");
+      }
+      if (guarded.get_tag() != STRING_UTF_TAG) {
+        tty->print_cr("ReleaseStringUTFChars: called on something not "
+            "allocated by GetStringUTFChars. string: " PTR_FORMAT " chars: "
+            PTR_FORMAT, p2i(str), p2i(chars));
+        NativeReportJNIFatalError(thr, "ReleaseStringUTFChars "
+            "called on something not allocated by GetStringUTFChars");
+      }
+      UNCHECKED()->ReleaseStringUTFChars(env, str,
+          (const char*) guarded.release_for_freeing());
     }
     functionExit(env);
 JNI_END
@@ -1514,6 +1608,9 @@
     ElementType *result = UNCHECKED()->Get##Result##ArrayElements(env, \
                                                                   array, \
                                                                   isCopy); \
+    if (result != NULL) { \
+      result = (ElementType *) check_jni_wrap_copy_array(thr, array, result); \
+    } \
     functionExit(env); \
     return result; \
 JNI_END
@@ -1538,12 +1635,10 @@
       check_primitive_array_type(thr, array, ElementTag); \
       ASSERT_OOPS_ALLOWED; \
       typeArrayOop a = typeArrayOop(JNIHandles::resolve_non_null(array)); \
-      /* cannot check validity of copy, unless every request is logged by
-       * checking code.  Implementation of this check is deferred until a
-       * subsequent release.
-       */ \
     ) \
-    UNCHECKED()->Release##Result##ArrayElements(env,array,elems,mode); \
+    ElementType* orig_result = (ElementType *) check_wrapped_array_release( \
+        thr, "checked_jni_Release"#Result"ArrayElements", array, elems, mode); \
+    UNCHECKED()->Release##Result##ArrayElements(env, array, orig_result, mode); \
     functionExit(env); \
 JNI_END
 
@@ -1694,6 +1789,9 @@
       check_is_primitive_array(thr, array);
     )
     void *result = UNCHECKED()->GetPrimitiveArrayCritical(env, array, isCopy);
+    if (result != NULL) {
+      result = check_jni_wrap_copy_array(thr, array, result);
+    }
     functionExit(env);
     return result;
 JNI_END
@@ -1707,10 +1805,9 @@
     IN_VM(
       check_is_primitive_array(thr, array);
     )
-    /* The Hotspot JNI code does not use the parameters, so just check the
-     * array parameter as a minor sanity check
-     */
-    UNCHECKED()->ReleasePrimitiveArrayCritical(env, array, carray, mode);
+    // Check the element array...
+    void* orig_result = check_wrapped_array_release(thr, "ReleasePrimitiveArrayCritical", array, carray, mode);
+    UNCHECKED()->ReleasePrimitiveArrayCritical(env, array, orig_result, mode);
     functionExit(env);
 JNI_END
 
--- a/src/share/vm/prims/jvmtiTagMap.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/prims/jvmtiTagMap.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -3017,7 +3017,7 @@
 
   // If there are any non-perm roots in the code cache, visit them.
   blk.set_kind(JVMTI_HEAP_REFERENCE_OTHER);
-  CodeBlobToOopClosure look_in_blobs(&blk, false);
+  CodeBlobToOopClosure look_in_blobs(&blk, !CodeBlobToOopClosure::FixRelocations);
   CodeCache::scavenge_root_nmethods_do(&look_in_blobs);
 
   return true;
--- a/src/share/vm/prims/whitebox.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/prims/whitebox.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 
+#include "memory/metadataFactory.hpp"
 #include "memory/universe.hpp"
 #include "oops/oop.inline.hpp"
 
@@ -36,6 +37,7 @@
 #include "runtime/arguments.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/os.hpp"
+#include "utilities/array.hpp"
 #include "utilities/debug.hpp"
 #include "utilities/macros.hpp"
 #include "utilities/exceptions.hpp"
@@ -725,6 +727,35 @@
 WB_END
 
 
+int WhiteBox::array_bytes_to_length(size_t bytes) {
+  return Array<u1>::bytes_to_length(bytes);
+}
+
+WB_ENTRY(jlong, WB_AllocateMetaspace(JNIEnv* env, jobject wb, jobject class_loader, jlong size))
+  if (size < 0) {
+    THROW_MSG_0(vmSymbols::java_lang_IllegalArgumentException(),
+        err_msg("WB_AllocateMetaspace: size is negative: " JLONG_FORMAT, size));
+  }
+
+  oop class_loader_oop = JNIHandles::resolve(class_loader);
+  ClassLoaderData* cld = class_loader_oop != NULL
+      ? java_lang_ClassLoader::loader_data(class_loader_oop)
+      : ClassLoaderData::the_null_class_loader_data();
+
+  void* metadata = MetadataFactory::new_writeable_array<u1>(cld, WhiteBox::array_bytes_to_length((size_t)size), thread);
+
+  return (jlong)(uintptr_t)metadata;
+WB_END
+
+WB_ENTRY(void, WB_FreeMetaspace(JNIEnv* env, jobject wb, jobject class_loader, jlong addr, jlong size))
+  oop class_loader_oop = JNIHandles::resolve(class_loader);
+  ClassLoaderData* cld = class_loader_oop != NULL
+      ? java_lang_ClassLoader::loader_data(class_loader_oop)
+      : ClassLoaderData::the_null_class_loader_data();
+
+  MetadataFactory::free_array(cld, (Array<u1>*)(uintptr_t)addr);
+WB_END
+
 //Some convenience methods to deal with objects from java
 int WhiteBox::offset_for_field(const char* field_name, oop object,
     Symbol* signature_symbol) {
@@ -855,6 +886,10 @@
   {CC"isInStringTable",    CC"(Ljava/lang/String;)Z", (void*)&WB_IsInStringTable  },
   {CC"fullGC",   CC"()V",                             (void*)&WB_FullGC },
   {CC"readReservedMemory", CC"()V",                   (void*)&WB_ReadReservedMemory },
+  {CC"allocateMetaspace",
+     CC"(Ljava/lang/ClassLoader;J)J",                 (void*)&WB_AllocateMetaspace },
+  {CC"freeMetaspace",
+     CC"(Ljava/lang/ClassLoader;JJ)V",                (void*)&WB_FreeMetaspace },
   {CC"getCPUFeatures",     CC"()Ljava/lang/String;",  (void*)&WB_GetCPUFeatures     },
   {CC"getNMethod",         CC"(Ljava/lang/reflect/Executable;Z)[Ljava/lang/Object;",
                                                       (void*)&WB_GetNMethod         },
--- a/src/share/vm/prims/whitebox.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/prims/whitebox.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -62,6 +62,8 @@
     Symbol* signature_symbol);
   static const char* lookup_jstring(const char* field_name, oop object);
   static bool lookup_bool(const char* field_name, oop object);
+
+  static int array_bytes_to_length(size_t bytes);
 };
 
 
--- a/src/share/vm/runtime/arguments.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/runtime/arguments.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1398,6 +1398,22 @@
                 (int)ObjectAlignmentInBytes, os::vm_page_size());
     return false;
   }
+  if(SurvivorAlignmentInBytes == 0) {
+    SurvivorAlignmentInBytes = ObjectAlignmentInBytes;
+  } else {
+    if (!is_power_of_2(SurvivorAlignmentInBytes)) {
+      jio_fprintf(defaultStream::error_stream(),
+            "error: SurvivorAlignmentInBytes=%d must be power of 2\n",
+            (int)SurvivorAlignmentInBytes);
+      return false;
+    }
+    if (SurvivorAlignmentInBytes < ObjectAlignmentInBytes) {
+      jio_fprintf(defaultStream::error_stream(),
+          "error: SurvivorAlignmentInBytes=%d must be greater than ObjectAlignmentInBytes=%d \n",
+          (int)SurvivorAlignmentInBytes, (int)ObjectAlignmentInBytes);
+      return false;
+    }
+  }
   return true;
 }
 
@@ -1505,8 +1521,10 @@
     heap_alignment = G1CollectedHeap::conservative_max_heap_alignment();
   }
 #endif // INCLUDE_ALL_GCS
-  _conservative_max_heap_alignment = MAX3(heap_alignment, os::max_page_size(),
-    CollectorPolicy::compute_heap_alignment());
+  _conservative_max_heap_alignment = MAX4(heap_alignment,
+                                          (size_t)os::vm_allocation_granularity(),
+                                          os::max_page_size(),
+                                          CollectorPolicy::compute_heap_alignment());
 }
 
 void Arguments::set_ergonomics_flags() {
--- a/src/share/vm/runtime/frame.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/runtime/frame.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -900,7 +900,7 @@
 }
 
 
-void frame::oops_interpreted_do(OopClosure* f, CLDToOopClosure* cld_f,
+void frame::oops_interpreted_do(OopClosure* f, CLDClosure* cld_f,
     const RegisterMap* map, bool query_oop_map_cache) {
   assert(is_interpreted_frame(), "Not an interpreted frame");
   assert(map != NULL, "map must be set");
@@ -1140,7 +1140,7 @@
 }
 
 
-void frame::oops_do_internal(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf, RegisterMap* map, bool use_interpreter_oop_map_cache) {
+void frame::oops_do_internal(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf, RegisterMap* map, bool use_interpreter_oop_map_cache) {
 #ifndef PRODUCT
   // simulate GC crash here to dump java thread in error report
   if (CrashGCForDumpingJavaThread) {
--- a/src/share/vm/runtime/frame.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/runtime/frame.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -419,19 +419,19 @@
 
   // Oops-do's
   void oops_compiled_arguments_do(Symbol* signature, bool has_receiver, bool has_appendix, const RegisterMap* reg_map, OopClosure* f);
-  void oops_interpreted_do(OopClosure* f, CLDToOopClosure* cld_f, const RegisterMap* map, bool query_oop_map_cache = true);
+  void oops_interpreted_do(OopClosure* f, CLDClosure* cld_f, const RegisterMap* map, bool query_oop_map_cache = true);
 
  private:
   void oops_interpreted_arguments_do(Symbol* signature, bool has_receiver, OopClosure* f);
 
   // Iteration of oops
-  void oops_do_internal(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf, RegisterMap* map, bool use_interpreter_oop_map_cache);
+  void oops_do_internal(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf, RegisterMap* map, bool use_interpreter_oop_map_cache);
   void oops_entry_do(OopClosure* f, const RegisterMap* map);
   void oops_code_blob_do(OopClosure* f, CodeBlobClosure* cf, const RegisterMap* map);
   int adjust_offset(Method* method, int index); // helper for above fn
  public:
   // Memory management
-  void oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf, RegisterMap* map) { oops_do_internal(f, cld_f, cf, map, true); }
+  void oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf, RegisterMap* map) { oops_do_internal(f, cld_f, cf, map, true); }
   void nmethods_do(CodeBlobClosure* cf);
 
   // RedefineClasses support for finding live interpreted methods on the stack
--- a/src/share/vm/runtime/globals.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/runtime/globals.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -597,6 +597,9 @@
   product(bool, UseAES, false,                                              \
           "Control whether AES instructions can be used on x86/x64")        \
                                                                             \
+  product(bool, UseSHA, false,                                              \
+          "Control whether SHA instructions can be used on SPARC")          \
+                                                                            \
   product(uintx, LargePageSizeInBytes, 0,                                   \
           "Large page size (0 to let VM choose the page size)")             \
                                                                             \
@@ -703,6 +706,15 @@
   product(bool, UseAESIntrinsics, false,                                    \
           "Use intrinsics for AES versions of crypto")                      \
                                                                             \
+  product(bool, UseSHA1Intrinsics, false,                                   \
+          "Use intrinsics for SHA-1 crypto hash function")                  \
+                                                                            \
+  product(bool, UseSHA256Intrinsics, false,                                 \
+          "Use intrinsics for SHA-224 and SHA-256 crypto hash functions")   \
+                                                                            \
+  product(bool, UseSHA512Intrinsics, false,                                 \
+          "Use intrinsics for SHA-384 and SHA-512 crypto hash functions")   \
+                                                                            \
   product(bool, UseCRC32Intrinsics, false,                                  \
           "use intrinsics for java.util.zip.CRC32")                         \
                                                                             \
@@ -1064,6 +1076,9 @@
   product(bool, ClassUnloading, true,                                       \
           "Do unloading of classes")                                        \
                                                                             \
+  product(bool, ClassUnloadingWithConcurrentMark, true,                     \
+          "Do unloading of classes with a concurrent marking cycle")        \
+                                                                            \
   develop(bool, DisableStartThread, false,                                  \
           "Disable starting of additional Java threads "                    \
           "(for debugging only)")                                           \
@@ -3870,6 +3885,9 @@
   product(bool, PrintGCCause, true,                                         \
           "Include GC cause in GC logging")                                 \
                                                                             \
+  experimental(intx, SurvivorAlignmentInBytes, 0,                           \
+           "Default survivor space alignment in bytes")                     \
+                                                                            \
   product(bool , AllowNonVirtualCalls, false,                               \
           "Obey the ACC_SUPER flag and allow invokenonvirtual calls")       \
                                                                             \
--- a/src/share/vm/runtime/os.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/runtime/os.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -32,6 +32,9 @@
 #include "gc_implementation/shared/vmGCOperations.hpp"
 #include "interpreter/interpreter.hpp"
 #include "memory/allocation.inline.hpp"
+#ifdef ASSERT
+#include "memory/guardedMemory.hpp"
+#endif
 #include "oops/oop.inline.hpp"
 #include "prims/jvm.h"
 #include "prims/jvm_misc.hpp"
@@ -524,118 +527,16 @@
 
 
 
-#ifdef ASSERT
-#define space_before             (MallocCushion + sizeof(double))
-#define space_after              MallocCushion
-#define size_addr_from_base(p)   (size_t*)(p + space_before - sizeof(size_t))
-#define size_addr_from_obj(p)    ((size_t*)p - 1)
-// MallocCushion: size of extra cushion allocated around objects with +UseMallocOnly
-// NB: cannot be debug variable, because these aren't set from the command line until
-// *after* the first few allocs already happened
-#define MallocCushion            16
-#else
-#define space_before             0
-#define space_after              0
-#define size_addr_from_base(p)   should not use w/o ASSERT
-#define size_addr_from_obj(p)    should not use w/o ASSERT
-#define MallocCushion            0
-#endif
 #define paranoid                 0  /* only set to 1 if you suspect checking code has bug */
 
 #ifdef ASSERT
-inline size_t get_size(void* obj) {
-  size_t size = *size_addr_from_obj(obj);
-  if (size < 0) {
-    fatal(err_msg("free: size field of object #" PTR_FORMAT " was overwritten ("
-                  SIZE_FORMAT ")", obj, size));
-  }
-  return size;
-}
-
-u_char* find_cushion_backwards(u_char* start) {
-  u_char* p = start;
-  while (p[ 0] != badResourceValue || p[-1] != badResourceValue ||
-         p[-2] != badResourceValue || p[-3] != badResourceValue) p--;
-  // ok, we have four consecutive marker bytes; find start
-  u_char* q = p - 4;
-  while (*q == badResourceValue) q--;
-  return q + 1;
-}
-
-u_char* find_cushion_forwards(u_char* start) {
-  u_char* p = start;
-  while (p[0] != badResourceValue || p[1] != badResourceValue ||
-         p[2] != badResourceValue || p[3] != badResourceValue) p++;
-  // ok, we have four consecutive marker bytes; find end of cushion
-  u_char* q = p + 4;
-  while (*q == badResourceValue) q++;
-  return q - MallocCushion;
-}
-
-void print_neighbor_blocks(void* ptr) {
-  // find block allocated before ptr (not entirely crash-proof)
-  if (MallocCushion < 4) {
-    tty->print_cr("### cannot find previous block (MallocCushion < 4)");
-    return;
-  }
-  u_char* start_of_this_block = (u_char*)ptr - space_before;
-  u_char* end_of_prev_block_data = start_of_this_block - space_after -1;
-  // look for cushion in front of prev. block
-  u_char* start_of_prev_block = find_cushion_backwards(end_of_prev_block_data);
-  ptrdiff_t size = *size_addr_from_base(start_of_prev_block);
-  u_char* obj = start_of_prev_block + space_before;
-  if (size <= 0 ) {
-    // start is bad; mayhave been confused by OS data inbetween objects
-    // search one more backwards
-    start_of_prev_block = find_cushion_backwards(start_of_prev_block);
-    size = *size_addr_from_base(start_of_prev_block);
-    obj = start_of_prev_block + space_before;
-  }
-
-  if (start_of_prev_block + space_before + size + space_after == start_of_this_block) {
-    tty->print_cr("### previous object: " PTR_FORMAT " (" SSIZE_FORMAT " bytes)", obj, size);
-  } else {
-    tty->print_cr("### previous object (not sure if correct): " PTR_FORMAT " (" SSIZE_FORMAT " bytes)", obj, size);
-  }
-
-  // now find successor block
-  u_char* start_of_next_block = (u_char*)ptr + *size_addr_from_obj(ptr) + space_after;
-  start_of_next_block = find_cushion_forwards(start_of_next_block);
-  u_char* next_obj = start_of_next_block + space_before;
-  ptrdiff_t next_size = *size_addr_from_base(start_of_next_block);
-  if (start_of_next_block[0] == badResourceValue &&
-      start_of_next_block[1] == badResourceValue &&
-      start_of_next_block[2] == badResourceValue &&
-      start_of_next_block[3] == badResourceValue) {
-    tty->print_cr("### next object: " PTR_FORMAT " (" SSIZE_FORMAT " bytes)", next_obj, next_size);
-  } else {
-    tty->print_cr("### next object (not sure if correct): " PTR_FORMAT " (" SSIZE_FORMAT " bytes)", next_obj, next_size);
-  }
-}
-
-
-void report_heap_error(void* memblock, void* bad, const char* where) {
-  tty->print_cr("## nof_mallocs = " UINT64_FORMAT ", nof_frees = " UINT64_FORMAT, os::num_mallocs, os::num_frees);
-  tty->print_cr("## memory stomp: byte at " PTR_FORMAT " %s object " PTR_FORMAT, bad, where, memblock);
-  print_neighbor_blocks(memblock);
-  fatal("memory stomping error");
-}
-
-void verify_block(void* memblock) {
-  size_t size = get_size(memblock);
-  if (MallocCushion) {
-    u_char* ptr = (u_char*)memblock - space_before;
-    for (int i = 0; i < MallocCushion; i++) {
-      if (ptr[i] != badResourceValue) {
-        report_heap_error(memblock, ptr+i, "in front of");
-      }
-    }
-    u_char* end = (u_char*)memblock + size + space_after;
-    for (int j = -MallocCushion; j < 0; j++) {
-      if (end[j] != badResourceValue) {
-        report_heap_error(memblock, end+j, "after");
-      }
-    }
+static void verify_memory(void* ptr) {
+  GuardedMemory guarded(ptr);
+  if (!guarded.verify_guards()) {
+    tty->print_cr("## nof_mallocs = " UINT64_FORMAT ", nof_frees = " UINT64_FORMAT, os::num_mallocs, os::num_frees);
+    tty->print_cr("## memory stomp:");
+    guarded.print_on(tty);
+    fatal("memory stomping error");
   }
 }
 #endif
@@ -686,16 +587,18 @@
     size = 1;
   }
 
-  const size_t alloc_size = size + space_before + space_after;
-
+#ifndef ASSERT
+  const size_t alloc_size = size;
+#else
+  const size_t alloc_size = GuardedMemory::get_total_size(size);
   if (size > alloc_size) { // Check for rollover.
     return NULL;
   }
+#endif
 
   NOT_PRODUCT(if (MallocVerifyInterval > 0) check_heap());
 
   u_char* ptr;
-
   if (MallocMaxTestWords > 0) {
     ptr = testMalloc(alloc_size);
   } else {
@@ -703,28 +606,26 @@
   }
 
 #ifdef ASSERT
-  if (ptr == NULL) return NULL;
-  if (MallocCushion) {
-    for (u_char* p = ptr; p < ptr + MallocCushion; p++) *p = (u_char)badResourceValue;
-    u_char* end = ptr + space_before + size;
-    for (u_char* pq = ptr+MallocCushion; pq < end; pq++) *pq = (u_char)uninitBlockPad;
-    for (u_char* q = end; q < end + MallocCushion; q++) *q = (u_char)badResourceValue;
+  if (ptr == NULL) {
+    return NULL;
   }
-  // put size just before data
-  *size_addr_from_base(ptr) = size;
+  // Wrap memory with guard
+  GuardedMemory guarded(ptr, size);
+  ptr = guarded.get_user_ptr();
 #endif
-  u_char* memblock = ptr + space_before;
-  if ((intptr_t)memblock == (intptr_t)MallocCatchPtr) {
-    tty->print_cr("os::malloc caught, " SIZE_FORMAT " bytes --> " PTR_FORMAT, size, memblock);
+  if ((intptr_t)ptr == (intptr_t)MallocCatchPtr) {
+    tty->print_cr("os::malloc caught, " SIZE_FORMAT " bytes --> " PTR_FORMAT, size, ptr);
     breakpoint();
   }
-  debug_only(if (paranoid) verify_block(memblock));
-  if (PrintMalloc && tty != NULL) tty->print_cr("os::malloc " SIZE_FORMAT " bytes --> " PTR_FORMAT, size, memblock);
+  debug_only(if (paranoid) verify_memory(ptr));
+  if (PrintMalloc && tty != NULL) {
+    tty->print_cr("os::malloc " SIZE_FORMAT " bytes --> " PTR_FORMAT, size, ptr);
+  }
 
-  // we do not track MallocCushion memory
-    MemTracker::record_malloc((address)memblock, size, memflags, caller == 0 ? CALLER_PC : caller);
+  // we do not track guard memory
+  MemTracker::record_malloc((address)ptr, size, memflags, caller == 0 ? CALLER_PC : caller);
 
-  return memblock;
+  return ptr;
 }
 
 
@@ -743,27 +644,32 @@
   return ptr;
 #else
   if (memblock == NULL) {
-    return malloc(size, memflags, (caller == 0 ? CALLER_PC : caller));
+    return os::malloc(size, memflags, (caller == 0 ? CALLER_PC : caller));
   }
   if ((intptr_t)memblock == (intptr_t)MallocCatchPtr) {
     tty->print_cr("os::realloc caught " PTR_FORMAT, memblock);
     breakpoint();
   }
-  verify_block(memblock);
+  verify_memory(memblock);
   NOT_PRODUCT(if (MallocVerifyInterval > 0) check_heap());
-  if (size == 0) return NULL;
+  if (size == 0) {
+    return NULL;
+  }
   // always move the block
-  void* ptr = malloc(size, memflags, caller == 0 ? CALLER_PC : caller);
-  if (PrintMalloc) tty->print_cr("os::remalloc " SIZE_FORMAT " bytes, " PTR_FORMAT " --> " PTR_FORMAT, size, memblock, ptr);
+  void* ptr = os::malloc(size, memflags, caller == 0 ? CALLER_PC : caller);
+  if (PrintMalloc) {
+    tty->print_cr("os::remalloc " SIZE_FORMAT " bytes, " PTR_FORMAT " --> " PTR_FORMAT, size, memblock, ptr);
+  }
   // Copy to new memory if malloc didn't fail
   if ( ptr != NULL ) {
-    memcpy(ptr, memblock, MIN2(size, get_size(memblock)));
-    if (paranoid) verify_block(ptr);
+    GuardedMemory guarded(memblock);
+    memcpy(ptr, memblock, MIN2(size, guarded.get_user_size()));
+    if (paranoid) verify_memory(ptr);
     if ((intptr_t)ptr == (intptr_t)MallocCatchPtr) {
       tty->print_cr("os::realloc caught, " SIZE_FORMAT " bytes --> " PTR_FORMAT, size, ptr);
       breakpoint();
     }
-    free(memblock);
+    os::free(memblock);
   }
   return ptr;
 #endif
@@ -771,6 +677,7 @@
 
 
 void  os::free(void *memblock, MEMFLAGS memflags) {
+  address trackp = (address) memblock;
   NOT_PRODUCT(inc_stat_counter(&num_frees, 1));
 #ifdef ASSERT
   if (memblock == NULL) return;
@@ -778,34 +685,20 @@
     if (tty != NULL) tty->print_cr("os::free caught " PTR_FORMAT, memblock);
     breakpoint();
   }
-  verify_block(memblock);
+  verify_memory(memblock);
   NOT_PRODUCT(if (MallocVerifyInterval > 0) check_heap());
-  // Added by detlefs.
-  if (MallocCushion) {
-    u_char* ptr = (u_char*)memblock - space_before;
-    for (u_char* p = ptr; p < ptr + MallocCushion; p++) {
-      guarantee(*p == badResourceValue,
-                "Thing freed should be malloc result.");
-      *p = (u_char)freeBlockPad;
-    }
-    size_t size = get_size(memblock);
-    inc_stat_counter(&free_bytes, size);
-    u_char* end = ptr + space_before + size;
-    for (u_char* q = end; q < end + MallocCushion; q++) {
-      guarantee(*q == badResourceValue,
-                "Thing freed should be malloc result.");
-      *q = (u_char)freeBlockPad;
-    }
-    if (PrintMalloc && tty != NULL)
+
+  GuardedMemory guarded(memblock);
+  size_t size = guarded.get_user_size();
+  inc_stat_counter(&free_bytes, size);
+  memblock = guarded.release_for_freeing();
+  if (PrintMalloc && tty != NULL) {
       fprintf(stderr, "os::free " SIZE_FORMAT " bytes --> " PTR_FORMAT "\n", size, (uintptr_t)memblock);
-  } else if (PrintMalloc && tty != NULL) {
-    // tty->print_cr("os::free %p", memblock);
-    fprintf(stderr, "os::free " PTR_FORMAT "\n", (uintptr_t)memblock);
   }
 #endif
-  MemTracker::record_free((address)memblock, memflags);
+  MemTracker::record_free(trackp, memflags);
 
-  ::free((char*)memblock - space_before);
+  ::free(memblock);
 }
 
 void os::init_random(long initval) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/runtime/sharedRuntimeMath.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_RUNTIME_SHAREDRUNTIMEMATH_HPP
+#define SHARE_VM_RUNTIME_SHAREDRUNTIMEMATH_HPP
+
+#include <math.h>
+
+// Used to access the lower/higher 32 bits of a double
+typedef union {
+    double d;
+    struct {
+#ifdef VM_LITTLE_ENDIAN
+      int lo;
+      int hi;
+#else
+      int hi;
+      int lo;
+#endif
+    } split;
+} DoubleIntConv;
+
+static inline int high(double d) {
+  DoubleIntConv x;
+  x.d = d;
+  return x.split.hi;
+}
+
+static inline int low(double d) {
+  DoubleIntConv x;
+  x.d = d;
+  return x.split.lo;
+}
+
+static inline void set_high(double* d, int high) {
+  DoubleIntConv conv;
+  conv.d = *d;
+  conv.split.hi = high;
+  *d = conv.d;
+}
+
+static inline void set_low(double* d, int low) {
+  DoubleIntConv conv;
+  conv.d = *d;
+  conv.split.lo = low;
+  *d = conv.d;
+}
+
+static double copysignA(double x, double y) {
+  DoubleIntConv convX;
+  convX.d = x;
+  convX.split.hi = (convX.split.hi & 0x7fffffff) | (high(y) & 0x80000000);
+  return convX.d;
+}
+
+/*
+ * ====================================================
+ * Copyright (c) 1998 Oracle and/or its affiliates. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+/*
+ * scalbn (double x, int n)
+ * scalbn(x,n) returns x* 2**n  computed by  exponent
+ * manipulation rather than by actually performing an
+ * exponentiation or a multiplication.
+ */
+
+static const double
+two54   =  1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
+twom54  =  5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
+hugeX  = 1.0e+300,
+tiny   = 1.0e-300;
+
+static double scalbnA(double x, int n) {
+  int  k,hx,lx;
+  hx = high(x);
+  lx = low(x);
+  k = (hx&0x7ff00000)>>20;              /* extract exponent */
+  if (k==0) {                           /* 0 or subnormal x */
+    if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
+    x *= two54;
+    hx = high(x);
+    k = ((hx&0x7ff00000)>>20) - 54;
+    if (n< -50000) return tiny*x;       /*underflow*/
+  }
+  if (k==0x7ff) return x+x;             /* NaN or Inf */
+  k = k+n;
+  if (k > 0x7fe) return hugeX*copysignA(hugeX,x); /* overflow  */
+  if (k > 0) {                          /* normal result */
+    set_high(&x, (hx&0x800fffff)|(k<<20));
+    return x;
+  }
+  if (k <= -54) {
+    if (n > 50000)      /* in case integer overflow in n+k */
+      return hugeX*copysignA(hugeX,x);  /*overflow*/
+    else return tiny*copysignA(tiny,x); /*underflow*/
+  }
+  k += 54;                              /* subnormal result */
+  set_high(&x, (hx&0x800fffff)|(k<<20));
+  return x*twom54;
+}
+
+#endif // SHARE_VM_RUNTIME_SHAREDRUNTIMEMATH_HPP
--- a/src/share/vm/runtime/sharedRuntimeTrans.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/runtime/sharedRuntimeTrans.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -40,81 +40,11 @@
 // generated; can not figure out how to turn down optimization for one
 // file in the IDE on Windows
 #ifdef WIN32
+# pragma warning( disable: 4748 ) // /GS can not protect parameters and local variables from local buffer overrun because optimizations are disabled in function
 # pragma optimize ( "", off )
 #endif
 
-#include <math.h>
-
-// VM_LITTLE_ENDIAN is #defined appropriately in the Makefiles
-// [jk] this is not 100% correct because the float word order may different
-// from the byte order (e.g. on ARM)
-#ifdef VM_LITTLE_ENDIAN
-# define __HI(x) *(1+(int*)&x)
-# define __LO(x) *(int*)&x
-#else
-# define __HI(x) *(int*)&x
-# define __LO(x) *(1+(int*)&x)
-#endif
-
-#if !defined(AIX)
-double copysign(double x, double y) {
-  __HI(x) = (__HI(x)&0x7fffffff)|(__HI(y)&0x80000000);
-  return x;
-}
-#endif
-
-/*
- * ====================================================
- * Copyright (c) 1998 Oracle and/or its affiliates. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-/*
- * scalbn (double x, int n)
- * scalbn(x,n) returns x* 2**n  computed by  exponent
- * manipulation rather than by actually performing an
- * exponentiation or a multiplication.
- */
-
-static const double
-two54   =  1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
-  twom54  =  5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
-  hugeX   = 1.0e+300,
-  tiny   = 1.0e-300;
-
-#if !defined(AIX)
-double scalbn (double x, int n) {
-  int  k,hx,lx;
-  hx = __HI(x);
-  lx = __LO(x);
-  k = (hx&0x7ff00000)>>20;              /* extract exponent */
-  if (k==0) {                           /* 0 or subnormal x */
-    if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
-    x *= two54;
-    hx = __HI(x);
-    k = ((hx&0x7ff00000)>>20) - 54;
-    if (n< -50000) return tiny*x;       /*underflow*/
-  }
-  if (k==0x7ff) return x+x;             /* NaN or Inf */
-  k = k+n;
-  if (k >  0x7fe) return hugeX*copysign(hugeX,x); /* overflow  */
-  if (k > 0)                            /* normal result */
-    {__HI(x) = (hx&0x800fffff)|(k<<20); return x;}
-  if (k <= -54) {
-    if (n > 50000)      /* in case integer overflow in n+k */
-      return hugeX*copysign(hugeX,x);   /*overflow*/
-    else return tiny*copysign(tiny,x);  /*underflow*/
-  }
-  k += 54;                              /* subnormal result */
-  __HI(x) = (hx&0x800fffff)|(k<<20);
-  return x*twom54;
-}
-#endif
+#include "runtime/sharedRuntimeMath.hpp"
 
 /* __ieee754_log(x)
  * Return the logrithm of x
@@ -185,8 +115,8 @@
   int k,hx,i,j;
   unsigned lx;
 
-  hx = __HI(x);               /* high word of x */
-  lx = __LO(x);               /* low  word of x */
+  hx = high(x);               /* high word of x */
+  lx = low(x);                /* low  word of x */
 
   k=0;
   if (hx < 0x00100000) {                   /* x < 2**-1022  */
@@ -194,13 +124,13 @@
       return -two54/zero;             /* log(+-0)=-inf */
     if (hx<0) return (x-x)/zero;   /* log(-#) = NaN */
     k -= 54; x *= two54; /* subnormal number, scale up x */
-    hx = __HI(x);             /* high word of x */
+    hx = high(x);             /* high word of x */
   }
   if (hx >= 0x7ff00000) return x+x;
   k += (hx>>20)-1023;
   hx &= 0x000fffff;
   i = (hx+0x95f64)&0x100000;
-  __HI(x) = hx|(i^0x3ff00000);        /* normalize x or x/2 */
+  set_high(&x, hx|(i^0x3ff00000)); /* normalize x or x/2 */
   k += (i>>20);
   f = x-1.0;
   if((0x000fffff&(2+hx))<3) {  /* |f| < 2**-20 */
@@ -279,8 +209,8 @@
   int i,k,hx;
   unsigned lx;
 
-  hx = __HI(x);       /* high word of x */
-  lx = __LO(x);       /* low word of x */
+  hx = high(x);       /* high word of x */
+  lx = low(x);        /* low word of x */
 
   k=0;
   if (hx < 0x00100000) {                  /* x < 2**-1022  */
@@ -288,14 +218,14 @@
       return -two54/zero;             /* log(+-0)=-inf */
     if (hx<0) return (x-x)/zero;        /* log(-#) = NaN */
     k -= 54; x *= two54; /* subnormal number, scale up x */
-    hx = __HI(x);                /* high word of x */
+    hx = high(x);                /* high word of x */
   }
   if (hx >= 0x7ff00000) return x+x;
   k += (hx>>20)-1023;
   i  = ((unsigned)k&0x80000000)>>31;
   hx = (hx&0x000fffff)|((0x3ff-i)<<20);
   y  = (double)(k+i);
-  __HI(x) = hx;
+  set_high(&x, hx);
   z  = y*log10_2lo + ivln10*__ieee754_log(x);
   return  z+y*log10_2hi;
 }
@@ -390,14 +320,14 @@
   int k=0,xsb;
   unsigned hx;
 
-  hx  = __HI(x);        /* high word of x */
+  hx  = high(x);                /* high word of x */
   xsb = (hx>>31)&1;             /* sign bit of x */
   hx &= 0x7fffffff;             /* high word of |x| */
 
   /* filter out non-finite argument */
   if(hx >= 0x40862E42) {                        /* if |x|>=709.78... */
     if(hx>=0x7ff00000) {
-      if(((hx&0xfffff)|__LO(x))!=0)
+      if(((hx&0xfffff)|low(x))!=0)
         return x+x;             /* NaN */
       else return (xsb==0)? x:0.0;      /* exp(+-inf)={inf,0} */
     }
@@ -428,10 +358,10 @@
   if(k==0)      return one-((x*c)/(c-2.0)-x);
   else          y = one-((lo-(x*c)/(2.0-c))-hi);
   if(k >= -1021) {
-    __HI(y) += (k<<20); /* add k to y's exponent */
+    set_high(&y, high(y) + (k<<20)); /* add k to y's exponent */
     return y;
   } else {
-    __HI(y) += ((k+1000)<<20);/* add k to y's exponent */
+    set_high(&y, high(y) + ((k+1000)<<20)); /* add k to y's exponent */
     return y*twom1000;
   }
 }
@@ -518,8 +448,8 @@
   unsigned lx,ly;
 
   i0 = ((*(int*)&one)>>29)^1; i1=1-i0;
-  hx = __HI(x); lx = __LO(x);
-  hy = __HI(y); ly = __LO(y);
+  hx = high(x); lx = low(x);
+  hy = high(y); ly = low(y);
   ix = hx&0x7fffffff;  iy = hy&0x7fffffff;
 
   /* y==zero: x**0 = 1 */
@@ -619,14 +549,14 @@
     u = ivln2_h*t;      /* ivln2_h has 21 sig. bits */
     v = t*ivln2_l-w*ivln2;
     t1 = u+v;
-    __LO(t1) = 0;
+    set_low(&t1, 0);
     t2 = v-(t1-u);
   } else {
     double ss,s2,s_h,s_l,t_h,t_l;
     n = 0;
     /* take care subnormal number */
     if(ix<0x00100000)
-      {ax *= two53; n -= 53; ix = __HI(ax); }
+      {ax *= two53; n -= 53; ix = high(ax); }
     n  += ((ix)>>20)-0x3ff;
     j  = ix&0x000fffff;
     /* determine interval */
@@ -634,17 +564,17 @@
     if(j<=0x3988E) k=0;         /* |x|<sqrt(3/2) */
     else if(j<0xBB67A) k=1;     /* |x|<sqrt(3)   */
     else {k=0;n+=1;ix -= 0x00100000;}
-    __HI(ax) = ix;
+    set_high(&ax, ix);
 
     /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
     u = ax-bp[k];               /* bp[0]=1.0, bp[1]=1.5 */
     v = one/(ax+bp[k]);
     ss = u*v;
     s_h = ss;
-    __LO(s_h) = 0;
+    set_low(&s_h, 0);
     /* t_h=ax+bp[k] High */
     t_h = zeroX;
-    __HI(t_h)=((ix>>1)|0x20000000)+0x00080000+(k<<18);
+    set_high(&t_h, ((ix>>1)|0x20000000)+0x00080000+(k<<18));
     t_l = ax - (t_h-bp[k]);
     s_l = v*((u-s_h*t_h)-s_h*t_l);
     /* compute log(ax) */
@@ -653,32 +583,32 @@
     r += s_l*(s_h+ss);
     s2  = s_h*s_h;
     t_h = 3.0+s2+r;
-    __LO(t_h) = 0;
+    set_low(&t_h, 0);
     t_l = r-((t_h-3.0)-s2);
     /* u+v = ss*(1+...) */
     u = s_h*t_h;
     v = s_l*t_h+t_l*ss;
     /* 2/(3log2)*(ss+...) */
     p_h = u+v;
-    __LO(p_h) = 0;
+    set_low(&p_h, 0);
     p_l = v-(p_h-u);
     z_h = cp_h*p_h;             /* cp_h+cp_l = 2/(3*log2) */
     z_l = cp_l*p_h+p_l*cp+dp_l[k];
     /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */
     t = (double)n;
     t1 = (((z_h+z_l)+dp_h[k])+t);
-    __LO(t1) = 0;
+    set_low(&t1, 0);
     t2 = z_l-(((t1-t)-dp_h[k])-z_h);
   }
 
   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
   y1  = y;
-  __LO(y1) = 0;
+  set_low(&y1, 0);
   p_l = (y-y1)*t1+y*t2;
   p_h = y1*t1;
   z = p_l+p_h;
-  j = __HI(z);
-  i = __LO(z);
+  j = high(z);
+  i = low(z);
   if (j>=0x40900000) {                          /* z >= 1024 */
     if(((j-0x40900000)|i)!=0)                   /* if z > 1024 */
       return s*hugeX*hugeX;                     /* overflow */
@@ -702,13 +632,13 @@
     n = j+(0x00100000>>(k+1));
     k = ((n&0x7fffffff)>>20)-0x3ff;     /* new k for n */
     t = zeroX;
-    __HI(t) = (n&~(0x000fffff>>k));
+    set_high(&t, (n&~(0x000fffff>>k)));
     n = ((n&0x000fffff)|0x00100000)>>(20-k);
     if(j<0) n = -n;
     p_h -= t;
   }
   t = p_l+p_h;
-  __LO(t) = 0;
+  set_low(&t, 0);
   u = t*lg2_h;
   v = (p_l-(t-p_h))*lg2+t*lg2_l;
   z = u+v;
@@ -717,10 +647,10 @@
   t1  = z - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
   r  = (z*t1)/(t1-two)-(w+z*w);
   z  = one-(r-z);
-  j  = __HI(z);
+  j  = high(z);
   j += (n<<20);
-  if((j>>20)<=0) z = scalbn(z,n);       /* subnormal output */
-  else __HI(z) += (n<<20);
+  if((j>>20)<=0) z = scalbnA(z,n);       /* subnormal output */
+  else set_high(&z, high(z) + (n<<20));
   return s*z;
 }
 
--- a/src/share/vm/runtime/sharedRuntimeTrig.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/runtime/sharedRuntimeTrig.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -63,63 +63,7 @@
 #define SAFEBUF
 #endif
 
-#include <math.h>
-
-// VM_LITTLE_ENDIAN is #defined appropriately in the Makefiles
-// [jk] this is not 100% correct because the float word order may different
-// from the byte order (e.g. on ARM)
-#ifdef VM_LITTLE_ENDIAN
-# define __HI(x) *(1+(int*)&x)
-# define __LO(x) *(int*)&x
-#else
-# define __HI(x) *(int*)&x
-# define __LO(x) *(1+(int*)&x)
-#endif
-
-static double copysignA(double x, double y) {
-  __HI(x) = (__HI(x)&0x7fffffff)|(__HI(y)&0x80000000);
-  return x;
-}
-
-/*
- * scalbn (double x, int n)
- * scalbn(x,n) returns x* 2**n  computed by  exponent
- * manipulation rather than by actually performing an
- * exponentiation or a multiplication.
- */
-
-static const double
-two54   =  1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
-twom54  =  5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
-hugeX  = 1.0e+300,
-tiny   = 1.0e-300;
-
-static double scalbnA (double x, int n) {
-  int  k,hx,lx;
-  hx = __HI(x);
-  lx = __LO(x);
-  k = (hx&0x7ff00000)>>20;              /* extract exponent */
-  if (k==0) {                           /* 0 or subnormal x */
-    if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
-    x *= two54;
-    hx = __HI(x);
-    k = ((hx&0x7ff00000)>>20) - 54;
-    if (n< -50000) return tiny*x;       /*underflow*/
-  }
-  if (k==0x7ff) return x+x;             /* NaN or Inf */
-  k = k+n;
-  if (k >  0x7fe) return hugeX*copysignA(hugeX,x); /* overflow  */
-  if (k > 0)                            /* normal result */
-    {__HI(x) = (hx&0x800fffff)|(k<<20); return x;}
-  if (k <= -54) {
-    if (n > 50000)      /* in case integer overflow in n+k */
-      return hugeX*copysignA(hugeX,x);  /*overflow*/
-    else return tiny*copysignA(tiny,x); /*underflow*/
-  }
-  k += 54;                              /* subnormal result */
-  __HI(x) = (hx&0x800fffff)|(k<<20);
-  return x*twom54;
-}
+#include "runtime/sharedRuntimeMath.hpp"
 
 /*
  * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
@@ -603,7 +547,7 @@
 {
         double z,r,v;
         int ix;
-        ix = __HI(x)&0x7fffffff;        /* high word of x */
+        ix = high(x)&0x7fffffff;                /* high word of x */
         if(ix<0x3e400000)                       /* |x| < 2**-27 */
            {if((int)x==0) return x;}            /* generate inexact */
         z       =  x*x;
@@ -658,9 +602,9 @@
 
 static double __kernel_cos(double x, double y)
 {
-  double a,h,z,r,qx;
+  double a,h,z,r,qx=0;
   int ix;
-  ix = __HI(x)&0x7fffffff;      /* ix = |x|'s high word*/
+  ix = high(x)&0x7fffffff;              /* ix = |x|'s high word*/
   if(ix<0x3e400000) {                   /* if x < 2**27 */
     if(((int)x)==0) return one;         /* generate inexact */
   }
@@ -672,8 +616,8 @@
     if(ix > 0x3fe90000) {               /* x > 0.78125 */
       qx = 0.28125;
     } else {
-      __HI(qx) = ix-0x00200000; /* x/4 */
-      __LO(qx) = 0;
+      set_high(&qx, ix-0x00200000); /* x/4 */
+      set_low(&qx, 0);
     }
     h = 0.5*z-qx;
     a = one-qx;
@@ -738,11 +682,11 @@
 {
   double z,r,v,w,s;
   int ix,hx;
-  hx = __HI(x);   /* high word of x */
+  hx = high(x);           /* high word of x */
   ix = hx&0x7fffffff;     /* high word of |x| */
   if(ix<0x3e300000) {                     /* x < 2**-28 */
     if((int)x==0) {                       /* generate inexact */
-      if (((ix | __LO(x)) | (iy + 1)) == 0)
+      if (((ix | low(x)) | (iy + 1)) == 0)
         return one / fabsd(x);
       else {
         if (iy == 1)
@@ -751,10 +695,10 @@
           double a, t;
 
           z = w = x + y;
-          __LO(z) = 0;
+          set_low(&z, 0);
           v = y - (z - x);
           t = a = -one / w;
-          __LO(t) = 0;
+          set_low(&t, 0);
           s = one + t * z;
           return t + a * (s + t * v);
         }
@@ -789,10 +733,10 @@
     /*  compute -1.0/(x+r) accurately */
     double a,t;
     z  = w;
-    __LO(z) = 0;
+    set_low(&z, 0);
     v  = r-(z - x);     /* z+v = r+x */
     t = a  = -1.0/w;    /* a = -1.0/w */
-    __LO(t) = 0;
+    set_low(&t, 0);
     s  = 1.0+t*z;
     return t+a*(s+t*v);
   }
@@ -841,7 +785,7 @@
   int n, ix;
 
   /* High word of x. */
-  ix = __HI(x);
+  ix = high(x);
 
   /* |x| ~< pi/4 */
   ix &= 0x7fffffff;
@@ -899,7 +843,7 @@
   int n, ix;
 
   /* High word of x. */
-  ix = __HI(x);
+  ix = high(x);
 
   /* |x| ~< pi/4 */
   ix &= 0x7fffffff;
@@ -956,7 +900,7 @@
   int n, ix;
 
   /* High word of x. */
-  ix = __HI(x);
+  ix = high(x);
 
   /* |x| ~< pi/4 */
   ix &= 0x7fffffff;
--- a/src/share/vm/runtime/stubRoutines.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/runtime/stubRoutines.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -125,6 +125,13 @@
 address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL;
 address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;
 
+address StubRoutines::_sha1_implCompress     = NULL;
+address StubRoutines::_sha1_implCompressMB   = NULL;
+address StubRoutines::_sha256_implCompress   = NULL;
+address StubRoutines::_sha256_implCompressMB = NULL;
+address StubRoutines::_sha512_implCompress   = NULL;
+address StubRoutines::_sha512_implCompressMB = NULL;
+
 address StubRoutines::_updateBytesCRC32 = NULL;
 address StubRoutines::_crc_table_adr = NULL;
 
--- a/src/share/vm/runtime/stubRoutines.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/runtime/stubRoutines.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -207,6 +207,13 @@
   static address _cipherBlockChaining_encryptAESCrypt;
   static address _cipherBlockChaining_decryptAESCrypt;
 
+  static address _sha1_implCompress;
+  static address _sha1_implCompressMB;
+  static address _sha256_implCompress;
+  static address _sha256_implCompressMB;
+  static address _sha512_implCompress;
+  static address _sha512_implCompressMB;
+
   static address _updateBytesCRC32;
   static address _crc_table_adr;
 
@@ -356,6 +363,13 @@
   static address cipherBlockChaining_encryptAESCrypt()  { return _cipherBlockChaining_encryptAESCrypt; }
   static address cipherBlockChaining_decryptAESCrypt()  { return _cipherBlockChaining_decryptAESCrypt; }
 
+  static address sha1_implCompress()     { return _sha1_implCompress; }
+  static address sha1_implCompressMB()   { return _sha1_implCompressMB; }
+  static address sha256_implCompress()   { return _sha256_implCompress; }
+  static address sha256_implCompressMB() { return _sha256_implCompressMB; }
+  static address sha512_implCompress()   { return _sha512_implCompress; }
+  static address sha512_implCompressMB() { return _sha512_implCompressMB; }
+
   static address updateBytesCRC32()    { return _updateBytesCRC32; }
   static address crc_table_addr()      { return _crc_table_adr; }
 
--- a/src/share/vm/runtime/thread.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/runtime/thread.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -834,7 +834,7 @@
   return false;
 }
 
-void Thread::oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf) {
+void Thread::oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf) {
   active_handles()->oops_do(f);
   // Do oop for ThreadShadow
   f->do_oop((oop*)&_pending_exception);
@@ -2730,7 +2730,7 @@
   }
 };
 
-void JavaThread::oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf) {
+void JavaThread::oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf) {
   // Verify that the deferred card marks have been flushed.
   assert(deferred_card_mark().is_empty(), "Should be empty during GC");
 
@@ -3253,7 +3253,7 @@
 #endif
 }
 
-void CompilerThread::oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf) {
+void CompilerThread::oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf) {
   JavaThread::oops_do(f, cld_f, cf);
   if (_scanned_nmethod != NULL && cf != NULL) {
     // Safepoints can occur when the sweeper is scanning an nmethod so
@@ -4167,22 +4167,22 @@
 // uses the Threads_lock to gurantee this property. It also makes sure that
 // all threads gets blocked when exiting or starting).
 
-void Threads::oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf) {
+void Threads::oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf) {
   ALL_JAVA_THREADS(p) {
     p->oops_do(f, cld_f, cf);
   }
   VMThread::vm_thread()->oops_do(f, cld_f, cf);
 }
 
-void Threads::possibly_parallel_oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf) {
+void Threads::possibly_parallel_oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf) {
   // Introduce a mechanism allowing parallel threads to claim threads as
   // root groups.  Overhead should be small enough to use all the time,
   // even in sequential code.
   SharedHeap* sh = SharedHeap::heap();
   // Cannot yet substitute active_workers for n_par_threads
   // because of G1CollectedHeap::verify() use of
-  // SharedHeap::process_strong_roots().  n_par_threads == 0 will
-  // turn off parallelism in process_strong_roots while active_workers
+  // SharedHeap::process_roots().  n_par_threads == 0 will
+  // turn off parallelism in process_roots while active_workers
   // is being used for parallelism elsewhere.
   bool is_par = sh->n_par_threads() > 0;
   assert(!is_par ||
--- a/src/share/vm/runtime/thread.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/runtime/thread.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -472,13 +472,13 @@
   // Apply "cld_f->do_cld" to CLDs that are otherwise not kept alive.
   //   Used by JavaThread::oops_do.
   // Apply "cf->do_code_blob" (if !NULL) to all code blobs active in frames
-  virtual void oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf);
+  virtual void oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf);
 
   // Handles the parallel case for the method below.
 private:
   bool claim_oops_do_par_case(int collection_parity);
 public:
-  // Requires that "collection_parity" is that of the current strong roots
+  // Requires that "collection_parity" is that of the current roots
   // iteration.  If "is_par" is false, sets the parity of "this" to
   // "collection_parity", and returns "true".  If "is_par" is true,
   // uses an atomic instruction to set the current threads parity to
@@ -1429,7 +1429,7 @@
   void frames_do(void f(frame*, const RegisterMap*));
 
   // Memory operations
-  void oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf);
+  void oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf);
 
   // Sweeper operations
   void nmethods_do(CodeBlobClosure* cf);
@@ -1860,7 +1860,7 @@
   // GC support
   // Apply "f->do_oop" to all root oops in "this".
   // Apply "cf->do_code_blob" (if !NULL) to all code blobs active in frames
-  void oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf);
+  void oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf);
 
 #ifndef PRODUCT
 private:
@@ -1927,9 +1927,9 @@
 
   // Apply "f->do_oop" to all root oops in all threads.
   // This version may only be called by sequential code.
-  static void oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf);
+  static void oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf);
   // This version may be called by sequential or parallel code.
-  static void possibly_parallel_oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf);
+  static void possibly_parallel_oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf);
   // This creates a list of GCTasks, one per thread.
   static void create_thread_roots_tasks(GCTaskQueue* q);
   // This creates a list of GCTasks, one per thread, for marking objects.
--- a/src/share/vm/runtime/vmThread.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/runtime/vmThread.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -682,7 +682,7 @@
 }
 
 
-void VMThread::oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf) {
+void VMThread::oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf) {
   Thread::oops_do(f, cld_f, cf);
   _vm_queue->oops_do(f);
 }
--- a/src/share/vm/runtime/vmThread.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/runtime/vmThread.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -126,7 +126,7 @@
   static VMThread* vm_thread()                    { return _vm_thread; }
 
   // GC support
-  void oops_do(OopClosure* f, CLDToOopClosure* cld_f, CodeBlobClosure* cf);
+  void oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf);
 
   // Debugging
   void print_on(outputStream* st) const;
--- a/src/share/vm/utilities/array.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/utilities/array.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -305,6 +305,7 @@
   friend class MetadataFactory;
   friend class VMStructs;
   friend class MethodHandleCompiler;           // special case
+  friend class WhiteBox;
 protected:
   int _length;                                 // the number of array elements
   T   _data[1];                                // the array memory
@@ -326,6 +327,31 @@
 
   static size_t byte_sizeof(int length) { return sizeof(Array<T>) + MAX2(length - 1, 0) * sizeof(T); }
 
+  // WhiteBox API helper.
+  // Can't distinguish between array of length 0 and length 1,
+  // will always return 0 in those cases.
+  static int bytes_to_length(size_t bytes)       {
+    assert(is_size_aligned(bytes, BytesPerWord), "Must be, for now");
+
+    if (sizeof(Array<T>) >= bytes) {
+      return 0;
+    }
+
+    size_t left = bytes - sizeof(Array<T>);
+    assert(is_size_aligned(left, sizeof(T)), "Must be");
+
+    size_t elements = left / sizeof(T);
+    assert(elements <= (size_t)INT_MAX, err_msg("number of elements " SIZE_FORMAT "doesn't fit into an int.", elements));
+
+    int length = (int)elements;
+
+    assert((size_t)size(length) * BytesPerWord == bytes,
+        err_msg("Expected: " SIZE_FORMAT " got: " SIZE_FORMAT,
+                bytes, (size_t)size(length) * BytesPerWord));
+
+    return length;
+  }
+
   explicit Array(int length) : _length(length) {
     assert(length >= 0, "illegal length");
   }
--- a/src/share/vm/utilities/globalDefinitions.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/utilities/globalDefinitions.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -558,6 +558,27 @@
   return fabs(value);
 }
 
+//----------------------------------------------------------------------------------------------------
+// Special casts
+// Cast floats into same-size integers and vice-versa w/o changing bit-pattern
+typedef union {
+  jfloat f;
+  jint i;
+} FloatIntConv;
+
+typedef union {
+  jdouble d;
+  jlong l;
+  julong ul;
+} DoubleLongConv;
+
+inline jint    jint_cast    (jfloat  x)  { return ((FloatIntConv*)&x)->i; }
+inline jfloat  jfloat_cast  (jint    x)  { return ((FloatIntConv*)&x)->f; }
+
+inline jlong   jlong_cast   (jdouble x)  { return ((DoubleLongConv*)&x)->l;  }
+inline julong  julong_cast  (jdouble x)  { return ((DoubleLongConv*)&x)->ul; }
+inline jdouble jdouble_cast (jlong   x)  { return ((DoubleLongConv*)&x)->d;  }
+
 inline jint low (jlong value)                    { return jint(value); }
 inline jint high(jlong value)                    { return jint(value >> 32); }
 
--- a/src/share/vm/utilities/globalDefinitions_gcc.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/utilities/globalDefinitions_gcc.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -167,17 +167,6 @@
 typedef uint32_t juint;
 typedef uint64_t julong;
 
-//----------------------------------------------------------------------------------------------------
-// Special (possibly not-portable) casts
-// Cast floats into same-size integers and vice-versa w/o changing bit-pattern
-// %%%%%% These seem like standard C++ to me--how about factoring them out? - Ungar
-
-inline jint    jint_cast   (jfloat  x)           { return *(jint*   )&x; }
-inline jlong   jlong_cast  (jdouble x)           { return *(jlong*  )&x; }
-inline julong  julong_cast (jdouble x)           { return *(julong* )&x; }
-
-inline jfloat  jfloat_cast (jint    x)           { return *(jfloat* )&x; }
-inline jdouble jdouble_cast(jlong   x)           { return *(jdouble*)&x; }
 
 //----------------------------------------------------------------------------------------------------
 // Constant for jlong (specifying an long long canstant is C++ compiler specific)
--- a/src/share/vm/utilities/globalDefinitions_sparcWorks.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/utilities/globalDefinitions_sparcWorks.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -183,15 +183,6 @@
 typedef unsigned int       juint;
 typedef unsigned long long julong;
 
-//----------------------------------------------------------------------------------------------------
-// Special (possibly not-portable) casts
-// Cast floats into same-size integers and vice-versa w/o changing bit-pattern
-
-inline jint    jint_cast   (jfloat  x)           { return *(jint*   )&x; }
-inline jlong   jlong_cast  (jdouble x)           { return *(jlong*  )&x; }
-
-inline jfloat  jfloat_cast (jint    x)           { return *(jfloat* )&x; }
-inline jdouble jdouble_cast(jlong   x)           { return *(jdouble*)&x; }
 
 //----------------------------------------------------------------------------------------------------
 // Constant for jlong (specifying an long long constant is C++ compiler specific)
--- a/src/share/vm/utilities/globalDefinitions_visCPP.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/utilities/globalDefinitions_visCPP.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -116,16 +116,6 @@
 typedef unsigned int     juint;
 typedef unsigned __int64 julong;
 
-//----------------------------------------------------------------------------------------------------
-// Special (possibly not-portable) casts
-// Cast floats into same-size integers and vice-versa w/o changing bit-pattern
-
-inline jint    jint_cast   (jfloat  x)           { return *(jint*   )&x; }
-inline jlong   jlong_cast  (jdouble x)           { return *(jlong*  )&x; }
-
-inline jfloat  jfloat_cast (jint    x)           { return *(jfloat* )&x; }
-inline jdouble jdouble_cast(jlong   x)           { return *(jdouble*)&x; }
-
 
 //----------------------------------------------------------------------------------------------------
 // Non-standard stdlib-like stuff:
--- a/src/share/vm/utilities/globalDefinitions_xlc.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/utilities/globalDefinitions_xlc.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -114,16 +114,6 @@
 typedef uint32_t juint;
 typedef uint64_t julong;
 
-//----------------------------------------------------------------------------------------------------
-// Special (possibly not-portable) casts
-// Cast floats into same-size integers and vice-versa w/o changing bit-pattern
-// %%%%%% These seem like standard C++ to me--how about factoring them out? - Ungar
-
-inline jint    jint_cast   (jfloat  x)           { return *(jint*   )&x; }
-inline jlong   jlong_cast  (jdouble x)           { return *(jlong*  )&x; }
-
-inline jfloat  jfloat_cast (jint    x)           { return *(jfloat* )&x; }
-inline jdouble jdouble_cast(jlong   x)           { return *(jdouble*)&x; }
 
 //----------------------------------------------------------------------------------------------------
 // Constant for jlong (specifying an long long canstant is C++ compiler specific)
--- a/src/share/vm/utilities/growableArray.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/utilities/growableArray.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -147,6 +147,9 @@
   }
 };
 
+template<class E> class GrowableArrayIterator;
+template<class E, class UnaryPredicate> class GrowableArrayFilterIterator;
+
 template<class E> class GrowableArray : public GenericGrowableArray {
   friend class VMStructs;
 
@@ -243,6 +246,14 @@
     return _data[_len-1];
   }
 
+  GrowableArrayIterator<E> begin() const {
+    return GrowableArrayIterator<E>(this, 0);
+  }
+
+  GrowableArrayIterator<E> end() const {
+    return GrowableArrayIterator<E>(this, length());
+  }
+
   void push(const E& elem) { append(elem); }
 
   E pop() {
@@ -412,4 +423,83 @@
     tty->print("}\n");
 }
 
+// Custom STL-style iterator to iterate over GrowableArrays
+// It is constructed by invoking GrowableArray::begin() and GrowableArray::end()
+template<class E> class GrowableArrayIterator : public StackObj {
+  friend class GrowableArray<E>;
+  template<class F, class UnaryPredicate> friend class GrowableArrayFilterIterator;
+
+ private:
+  const GrowableArray<E>* _array; // GrowableArray we iterate over
+  int _position;                  // The current position in the GrowableArray
+
+  // Private constructor used in GrowableArray::begin() and GrowableArray::end()
+  GrowableArrayIterator(const GrowableArray<E>* array, int position) : _array(array), _position(position) {
+    assert(0 <= position && position <= _array->length(), "illegal position");
+  }
+
+ public:
+  GrowableArrayIterator<E>& operator++()  { ++_position; return *this; }
+  E operator*()                           { return _array->at(_position); }
+
+  bool operator==(const GrowableArrayIterator<E>& rhs)  {
+    assert(_array == rhs._array, "iterator belongs to different array");
+    return _position == rhs._position;
+  }
+
+  bool operator!=(const GrowableArrayIterator<E>& rhs)  {
+    assert(_array == rhs._array, "iterator belongs to different array");
+    return _position != rhs._position;
+  }
+};
+
+// Custom STL-style iterator to iterate over elements of a GrowableArray that satisfy a given predicate
+template<class E, class UnaryPredicate> class GrowableArrayFilterIterator : public StackObj {
+  friend class GrowableArray<E>;
+
+ private:
+  const GrowableArray<E>* _array;   // GrowableArray we iterate over
+  int _position;                    // Current position in the GrowableArray
+  UnaryPredicate _predicate;        // Unary predicate the elements of the GrowableArray should satisfy
+
+ public:
+  GrowableArrayFilterIterator(const GrowableArrayIterator<E>& begin, UnaryPredicate filter_predicate)
+   : _array(begin._array), _position(begin._position), _predicate(filter_predicate) {
+    // Advance to first element satisfying the predicate
+    while(_position != _array->length() && !_predicate(_array->at(_position))) {
+      ++_position;
+    }
+  }
+
+  GrowableArrayFilterIterator<E, UnaryPredicate>& operator++() {
+    do {
+      // Advance to next element satisfying the predicate
+      ++_position;
+    } while(_position != _array->length() && !_predicate(_array->at(_position)));
+    return *this;
+  }
+
+  E operator*()   { return _array->at(_position); }
+
+  bool operator==(const GrowableArrayIterator<E>& rhs)  {
+    assert(_array == rhs._array, "iterator belongs to different array");
+    return _position == rhs._position;
+  }
+
+  bool operator!=(const GrowableArrayIterator<E>& rhs)  {
+    assert(_array == rhs._array, "iterator belongs to different array");
+    return _position != rhs._position;
+  }
+
+  bool operator==(const GrowableArrayFilterIterator<E, UnaryPredicate>& rhs)  {
+    assert(_array == rhs._array, "iterator belongs to different array");
+    return _position == rhs._position;
+  }
+
+  bool operator!=(const GrowableArrayFilterIterator<E, UnaryPredicate>& rhs)  {
+    assert(_array == rhs._array, "iterator belongs to different array");
+    return _position != rhs._position;
+  }
+};
+
 #endif // SHARE_VM_UTILITIES_GROWABLEARRAY_HPP
--- a/test/TEST.groups	Wed Aug 13 14:49:46 2014 -0700
+++ b/test/TEST.groups	Thu Aug 14 12:55:30 2014 -0700
@@ -169,6 +169,8 @@
 #
 needs_full_vm_compact1 = \
   runtime/NMT \
+  gc/class_unloading/TestCMSClassUnloadingDisabledHWM.java \
+  gc/class_unloading/TestG1ClassUnloadingHWM.java \
   gc/g1/TestRegionAlignment.java \
   gc/g1/TestShrinkToOneRegion.java \
   gc/metaspace/G1AddMetaspaceDependency.java \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/classUnloading/methodUnloading/TestMethodUnloading.java	Thu Aug 14 12:55:30 2014 -0700
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import sun.hotspot.WhiteBox;
+
+import java.lang.reflect.Method;
+import java.net.URL;
+import java.net.URLClassLoader;
+
+/*
+ * @test MethodUnloadingTest
+ * @bug 8029443
+ * @summary "Tests the unloading of methods to to class unloading"
+ * @library /testlibrary /testlibrary/whitebox
+ * @build TestMethodUnloading
+ * @build WorkerClass
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:-BackgroundCompilation -XX:-UseCompressedOops -XX:+UseParallelGC -XX:CompileOnly=TestMethodUnloading::doWork TestMethodUnloading
+ */
+public class TestMethodUnloading {
+    private static final String workerClassName = "WorkerClass";
+    private static int work = -1;
+
+    private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
+    private static int COMP_LEVEL_SIMPLE = 1;
+    private static int COMP_LEVEL_FULL_OPTIMIZATION = 4;
+
+    /**
+     * Does some work by either using the workerClass or locally producing values.
+     * @param workerClass Class performing some work (will be unloaded)
+     * @param useWorker If true the workerClass is used
+     */
+    static private void doWork(Class<?> workerClass, boolean useWorker) throws InstantiationException, IllegalAccessException {
+        if (useWorker) {
+            // Create a new instance
+            Object worker = workerClass.newInstance();
+            // We would like to call a method of WorkerClass here but we cannot cast to WorkerClass
+            // because the class was loaded by a different class loader. One solution would be to use
+            // reflection but since we want C2 to implement the call as an optimized IC we call
+            // Object::hashCode() here which actually calls WorkerClass::hashCode().
+            // C2 will then implement this call as an optimized IC that points to a to-interpreter stub
+            // referencing the Method* for WorkerClass::hashCode().
+            work = worker.hashCode();
+            if (work != 42) {
+                new RuntimeException("Work not done");
+            }
+        } else {
+            // Do some important work here
+            work = 1;
+        }
+    }
+
+    /**
+     * Makes sure that method is compiled by forcing compilation if not yet compiled.
+     * @param m Method to be checked
+     */
+    static private void makeSureIsCompiled(Method m) {
+        // Make sure background compilation is disabled
+        if (WHITE_BOX.getBooleanVMFlag("BackgroundCompilation")) {
+            throw new RuntimeException("Background compilation enabled");
+        }
+
+        // Check if already compiled
+        if (!WHITE_BOX.isMethodCompiled(m)) {
+            // If not, try to compile it with C2
+            if(!WHITE_BOX.enqueueMethodForCompilation(m, COMP_LEVEL_FULL_OPTIMIZATION)) {
+                // C2 compiler not available, try to compile with C1
+                WHITE_BOX.enqueueMethodForCompilation(m, COMP_LEVEL_SIMPLE);
+            }
+            // Because background compilation is disabled, method should now be compiled
+            if(!WHITE_BOX.isMethodCompiled(m)) {
+                throw new RuntimeException(m + " not compiled");
+            }
+        }
+    }
+
+    /**
+     * This test creates stale Method* metadata in a to-interpreter stub of an optimized IC.
+     *
+     * The following steps are performed:
+     * (1) A workerClass is loaded by a custom class loader
+     * (2) The method doWork that calls a method of the workerClass is compiled. The call
+     *     is implemented as an optimized IC calling a to-interpreted stub. The to-interpreter
+     *     stub contains a Method* to a workerClass method.
+     * (3) Unloading of the workerClass is enforced. The to-interpreter stub now contains a dead Method*.
+     * (4) Depending on the implementation of the IC, the compiled version of doWork should still be
+     *     valid. We call it again without using the workerClass.
+     */
+    static public void main(String[] args) throws Exception {
+        // (1) Create a custom class loader with no parent class loader
+        URL url = TestMethodUnloading.class.getProtectionDomain().getCodeSource().getLocation();
+        URLClassLoader loader = new URLClassLoader(new URL[] {url}, null);
+
+        // Load worker class with custom class loader
+        Class<?> workerClass = Class.forName(workerClassName, true, loader);
+
+        // (2) Make sure all paths of doWork are profiled and compiled
+        for (int i = 0; i < 100000; ++i) {
+            doWork(workerClass, true);
+            doWork(workerClass, false);
+        }
+
+        // Make sure doWork is compiled now
+        Method doWork = TestMethodUnloading.class.getDeclaredMethod("doWork", Class.class, boolean.class);
+        makeSureIsCompiled(doWork);
+
+        // (3) Throw away class loader and reference to workerClass to allow unloading
+        loader.close();
+        loader = null;
+        workerClass = null;
+
+        // Force garbage collection to trigger unloading of workerClass
+        // Dead reference to WorkerClass::hashCode triggers JDK-8029443
+        WHITE_BOX.fullGC();
+
+        // (4) Depending on the implementation of the IC, the compiled version of doWork
+        // may still be valid here. Execute it without a workerClass.
+        doWork(null, false);
+        if (work != 1) {
+            throw new RuntimeException("Work not done");
+        }
+
+        doWork(Object.class, false);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/classUnloading/methodUnloading/WorkerClass.java	Thu Aug 14 12:55:30 2014 -0700
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * Worker class that is dynamically loaded/unloaded by TestMethodUnloading.
+ */
+public class WorkerClass {
+    /**
+     * We override hashCode here to be able to access this implementation
+     * via an Object reference (we cannot cast to WorkerClass).
+     */
+    @Override
+    public int hashCode() {
+        return 42;
+    }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/sha/TestSHA.java	Thu Aug 14 12:55:30 2014 -0700
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8035968
+ * @summary C2 support for SHA on SPARC
+ *
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1   TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-224 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-256 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-384 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-512 TestSHA
+ *
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1   -Doffset=1 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-224 -Doffset=1 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-256 -Doffset=1 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-384 -Doffset=1 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-512 -Doffset=1 TestSHA
+ *
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1   -Dalgorithm2=SHA-256 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1   -Dalgorithm2=SHA-512 TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-256 -Dalgorithm2=SHA-512 TestSHA
+ *
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=SHA-1   -Dalgorithm2=MD5     TestSHA
+ * @run main/othervm/timeout=600 -Xbatch -Dalgorithm=MD5     -Dalgorithm2=SHA-1   TestSHA
+ */
+
+import java.security.MessageDigest;
+import java.util.Arrays;
+
+public class TestSHA {
+    private static final int HASH_LEN = 64; /* up to 512-bit */
+    private static final int ALIGN = 8;     /* for different data alignments */
+
+    public static void main(String[] args) throws Exception {
+        String provider = System.getProperty("provider", "SUN");
+        String algorithm = System.getProperty("algorithm", "SHA-1");
+        String algorithm2 = System.getProperty("algorithm2", "");
+        int msgSize = Integer.getInteger("msgSize", 1024);
+        int offset = Integer.getInteger("offset", 0)  % ALIGN;
+        int iters = (args.length > 0 ? Integer.valueOf(args[0]) : 100000);
+        int warmupIters = (args.length > 1 ? Integer.valueOf(args[1]) : 20000);
+
+        testSHA(provider, algorithm, msgSize, offset, iters, warmupIters);
+
+        if (algorithm2.equals("") == false) {
+            testSHA(provider, algorithm2, msgSize, offset, iters, warmupIters);
+        }
+    }
+
+    static void testSHA(String provider, String algorithm, int msgSize,
+                        int offset, int iters, int warmupIters) throws Exception {
+        System.out.println("provider = " + provider);
+        System.out.println("algorithm = " + algorithm);
+        System.out.println("msgSize = " + msgSize + " bytes");
+        System.out.println("offset = " + offset);
+        System.out.println("iters = " + iters);
+
+        byte[] expectedHash = new byte[HASH_LEN];
+        byte[] hash = new byte[HASH_LEN];
+        byte[] data = new byte[msgSize + offset];
+        for (int i = 0; i < (msgSize + offset); i++) {
+            data[i] = (byte)(i & 0xff);
+        }
+
+        try {
+            MessageDigest sha = MessageDigest.getInstance(algorithm, provider);
+
+            /* do once, which doesn't use intrinsics */
+            sha.reset();
+            sha.update(data, offset, msgSize);
+            expectedHash = sha.digest();
+
+            /* warm up */
+            for (int i = 0; i < warmupIters; i++) {
+                sha.reset();
+                sha.update(data, offset, msgSize);
+                hash = sha.digest();
+            }
+
+            /* check result */
+            if (Arrays.equals(hash, expectedHash) == false) {
+                System.out.println("TestSHA Error: ");
+                showArray(expectedHash, "expectedHash");
+                showArray(hash,         "computedHash");
+                //System.exit(1);
+                throw new Exception("TestSHA Error");
+            } else {
+                showArray(hash, "hash");
+            }
+
+            /* measure performance */
+            long start = System.nanoTime();
+            for (int i = 0; i < iters; i++) {
+                sha.reset();
+                sha.update(data, offset, msgSize);
+                hash = sha.digest();
+            }
+            long end = System.nanoTime();
+            double total = (double)(end - start)/1e9;         /* in seconds */
+            double thruput = (double)msgSize*iters/1e6/total; /* in MB/s */
+            System.out.println("TestSHA runtime = " + total + " seconds");
+            System.out.println("TestSHA throughput = " + thruput + " MB/s");
+            System.out.println();
+        } catch (Exception e) {
+            System.out.println("Exception: " + e);
+            //System.exit(1);
+            throw new Exception(e);
+        }
+    }
+
+    static void showArray(byte b[], String name) {
+        System.out.format("%s [%d]: ", name, b.length);
+        for (int i = 0; i < Math.min(b.length, HASH_LEN); i++) {
+            System.out.format("%02x ", b[i] & 0xff);
+        }
+        System.out.println();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/osr/TestOSRWithNonEmptyStack.java	Thu Aug 14 12:55:30 2014 -0700
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+
+import jdk.internal.org.objectweb.asm.ClassWriter;
+import jdk.internal.org.objectweb.asm.Label;
+import jdk.internal.org.objectweb.asm.MethodVisitor;
+import static jdk.internal.org.objectweb.asm.Opcodes.*;
+
+/**
+ * @test
+ * @bug 8051344
+ * @summary Force OSR compilation with non-empty stack at the OSR entry point.
+ * @compile -XDignore.symbol.file TestOSRWithNonEmptyStack.java
+ * @run main/othervm -XX:CompileOnly=TestCase.test TestOSRWithNonEmptyStack
+ */
+public class TestOSRWithNonEmptyStack extends ClassLoader {
+    private static final int CLASS_FILE_VERSION = 52;
+    private static final String CLASS_NAME = "TestCase";
+    private static final String METHOD_NAME = "test";
+    private static final int ITERATIONS = 1_000_000;
+
+    private static byte[] generateTestClass() {
+        ClassWriter cw = new ClassWriter(ClassWriter.COMPUTE_FRAMES);
+
+        cw.visit(TestOSRWithNonEmptyStack.CLASS_FILE_VERSION, ACC_PUBLIC,
+                TestOSRWithNonEmptyStack.CLASS_NAME, null, "java/lang/Object",
+                null);
+
+        TestOSRWithNonEmptyStack.generateConstructor(cw);
+        TestOSRWithNonEmptyStack.generateTestMethod(cw);
+
+        cw.visitEnd();
+        return cw.toByteArray();
+    }
+
+    private static void generateConstructor(ClassWriter classWriter) {
+        MethodVisitor mv = classWriter.visitMethod(ACC_PUBLIC, "<init>", "()V",
+                null, null);
+
+        mv.visitCode();
+
+        mv.visitVarInsn(ALOAD, 0);
+        mv.visitMethodInsn(INVOKESPECIAL, "java/lang/Object", "<init>", "()V",
+                false);
+        mv.visitInsn(RETURN);
+
+        mv.visitMaxs(0, 0);
+        mv.visitEnd();
+    }
+
+    private static void generateTestMethod(ClassWriter classWriter) {
+        MethodVisitor mv = classWriter.visitMethod(ACC_PUBLIC,
+                TestOSRWithNonEmptyStack.METHOD_NAME, "()V", null, null);
+        Label osrEntryPoint = new Label();
+
+        mv.visitCode();
+        // Push 'this' into stack before OSR entry point to bail out compilation
+        mv.visitVarInsn(ALOAD, 0);
+        // Setup loop counter
+        mv.visitInsn(ICONST_0);
+        mv.visitVarInsn(ISTORE, 1);
+        // Begin loop
+        mv.visitLabel(osrEntryPoint);
+        // Increment loop counter
+        mv.visitVarInsn(ILOAD, 1);
+        mv.visitInsn(ICONST_1);
+        mv.visitInsn(IADD);
+        // Duplicate it for loop condition check
+        mv.visitInsn(DUP);
+        mv.visitVarInsn(ISTORE, 1);
+        // Check loop condition
+        mv.visitLdcInsn(TestOSRWithNonEmptyStack.ITERATIONS);
+        mv.visitJumpInsn(IF_ICMPLT, osrEntryPoint);
+        // Pop 'this'.
+        mv.visitInsn(POP);
+        mv.visitInsn(RETURN);
+
+        mv.visitMaxs(0, 0);
+        mv.visitEnd();
+    }
+
+    private void run() {
+        byte[] bytecode = TestOSRWithNonEmptyStack.generateTestClass();
+
+        try {
+            Class klass = defineClass(TestOSRWithNonEmptyStack.CLASS_NAME,
+                    bytecode, 0, bytecode.length);
+
+            Constructor ctor = klass.getConstructor();
+            Method method = klass.getDeclaredMethod(
+                    TestOSRWithNonEmptyStack.METHOD_NAME);
+
+            Object testCase = ctor.newInstance();
+            method.invoke(testCase);
+        } catch (Exception e) {
+            throw new RuntimeException(
+                    "Test bug: generated class should be valid.", e);
+        }
+    }
+
+    public static void main(String args[]) {
+        new TestOSRWithNonEmptyStack().run();
+    }
+}
--- a/test/compiler/rtm/cli/TestRTMRetryCountOption.java	Wed Aug 13 14:49:46 2014 -0700
+++ b/test/compiler/rtm/cli/TestRTMRetryCountOption.java	Thu Aug 14 12:55:30 2014 -0700
@@ -35,7 +35,7 @@
     private static final String DEFAULT_VALUE = "5";
 
     private TestRTMRetryCountOption() {
-        super(Boolean.TRUE::booleanValue, "RTMRetryCount", false, true,
+        super(Boolean.TRUE::booleanValue, "RTMRetryCount", false, false,
                 TestRTMRetryCountOption.DEFAULT_VALUE,
                 "0", "10", "100", "1000");
     }
--- a/test/compiler/rtm/cli/TestUseRTMDeoptOptionOnSupportedConfig.java	Wed Aug 13 14:49:46 2014 -0700
+++ b/test/compiler/rtm/cli/TestUseRTMDeoptOptionOnSupportedConfig.java	Thu Aug 14 12:55:30 2014 -0700
@@ -50,38 +50,25 @@
 
     @Override
     public void runTestCases() throws Throwable {
-        String experimentalOptionError
-                = CommandLineOptionTest.getExperimentalOptionErrorMessage(
-                "UseRTMDeopt");
-        // verify that option is experimental
+        // verify that option could be turned on
+        CommandLineOptionTest.verifySameJVMStartup(
+                null, null, ExitCode.OK, "-XX:+UseRTMDeopt");
+        // verify that option could be turned off
         CommandLineOptionTest.verifySameJVMStartup(
-                new String[] { experimentalOptionError }, null, ExitCode.FAIL,
-                "-XX:+UseRTMDeopt");
-        // verify that option could be turned on
-        CommandLineOptionTest.verifySameJVMStartup(null, null, ExitCode.OK,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                "-XX:+UseRTMDeopt");
-        // verify that option could be turned off
-        CommandLineOptionTest.verifySameJVMStartup(null, null, ExitCode.OK,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                "-XX:-UseRTMDeopt");
+                null, null, ExitCode.OK, "-XX:-UseRTMDeopt");
+        // verify default value
+        CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMDeopt",
+                TestUseRTMDeoptOptionOnSupportedConfig.DEFAULT_VALUE);
         // verify default value
         CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMDeopt",
                 TestUseRTMDeoptOptionOnSupportedConfig.DEFAULT_VALUE,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS);
-        // verify default value
-        CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMDeopt",
-                TestUseRTMDeoptOptionOnSupportedConfig.DEFAULT_VALUE,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
                 "-XX:+UseRTMLocking");
         // verify that option is off when UseRTMLocking is off
-        CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMDeopt", "false",
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                "-XX:-UseRTMLocking", "-XX:+UseRTMDeopt");
+        CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMDeopt",
+                "false", "-XX:-UseRTMLocking", "-XX:+UseRTMDeopt");
         // verify that option could be turned on
-        CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMDeopt", "true",
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                "-XX:+UseRTMLocking", "-XX:+UseRTMDeopt");
+        CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMDeopt",
+                "true", "-XX:+UseRTMLocking", "-XX:+UseRTMDeopt");
     }
 
     public static void main(String args[]) throws Throwable {
--- a/test/compiler/rtm/cli/TestUseRTMDeoptOptionOnUnsupportedConfig.java	Wed Aug 13 14:49:46 2014 -0700
+++ b/test/compiler/rtm/cli/TestUseRTMDeoptOptionOnUnsupportedConfig.java	Thu Aug 14 12:55:30 2014 -0700
@@ -48,7 +48,7 @@
     private TestUseRTMDeoptOptionOnUnsupportedConfig() {
         super(new NotPredicate(new AndPredicate(new SupportedCPU(),
                         new SupportedVM())),
-                "UseRTMDeopt", true, true,
+                "UseRTMDeopt", true, false,
                 TestUseRTMDeoptOptionOnUnsupportedConfig.DEFAULT_VALUE, "true");
     }
 
@@ -57,14 +57,11 @@
         super.verifyJVMStartup();
         // verify default value
         CommandLineOptionTest.verifyOptionValueForSameVM(optionName,
-                defaultValue,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS);
+                defaultValue);
         // verify that until RTMLocking is not used, value
         // will be set to default false.
         CommandLineOptionTest.verifyOptionValueForSameVM(optionName,
-                defaultValue,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                "-XX:+UseRTMDeopt");
+                defaultValue, "-XX:+UseRTMDeopt");
     }
 
     public static void main(String args[]) throws Throwable {
--- a/test/compiler/rtm/cli/TestUseRTMLockingOptionOnSupportedConfig.java	Wed Aug 13 14:49:46 2014 -0700
+++ b/test/compiler/rtm/cli/TestUseRTMLockingOptionOnSupportedConfig.java	Thu Aug 14 12:55:30 2014 -0700
@@ -51,43 +51,31 @@
     @Override
     public void runTestCases() throws Throwable {
         String unrecongnizedOption
-                = CommandLineOptionTest.getUnrecognizedOptionErrorMessage(
-                "UseRTMLocking");
-        String experimentalOptionError
-                = CommandLineOptionTest.getExperimentalOptionErrorMessage(
+                =  CommandLineOptionTest.getUnrecognizedOptionErrorMessage(
                 "UseRTMLocking");
-        // verify that options is experimental
-        CommandLineOptionTest.verifySameJVMStartup(
-                new String[] { experimentalOptionError }, null, ExitCode.FAIL,
-                "-XX:+UseRTMLocking");
         // verify that there are no warning or error in VM output
         CommandLineOptionTest.verifySameJVMStartup(null,
                 new String[]{
                         RTMGenericCommandLineOptionTest.RTM_INSTR_ERROR,
                         unrecongnizedOption
-                }, ExitCode.OK,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                "-XX:+UseRTMLocking");
+                }, ExitCode.OK, "-XX:+UseRTMLocking"
+        );
 
         CommandLineOptionTest.verifySameJVMStartup(null,
                 new String[]{
                         RTMGenericCommandLineOptionTest.RTM_INSTR_ERROR,
                         unrecongnizedOption
-                }, ExitCode.OK,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                "-XX:-UseRTMLocking");
+                }, ExitCode.OK, "-XX:-UseRTMLocking"
+        );
         // verify that UseRTMLocking is of by default
         CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMLocking",
-                TestUseRTMLockingOptionOnSupportedConfig.DEFAULT_VALUE,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS);
+                TestUseRTMLockingOptionOnSupportedConfig.DEFAULT_VALUE);
         // verify that we can change UseRTMLocking value
         CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMLocking",
                 TestUseRTMLockingOptionOnSupportedConfig.DEFAULT_VALUE,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
                 "-XX:-UseRTMLocking");
         CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMLocking",
-                "true", CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                "-XX:+UseRTMLocking");
+                "true", "-XX:+UseRTMLocking");
     }
 
     public static void main(String args[]) throws Throwable {
--- a/test/compiler/rtm/cli/TestUseRTMLockingOptionOnUnsupportedCPU.java	Wed Aug 13 14:49:46 2014 -0700
+++ b/test/compiler/rtm/cli/TestUseRTMLockingOptionOnUnsupportedCPU.java	Thu Aug 14 12:55:30 2014 -0700
@@ -63,9 +63,7 @@
             CommandLineOptionTest.verifySameJVMStartup(
                     new String[] { errorMessage },
                     new String[] { unrecongnizedOption },
-                    ExitCode.FAIL,
-                    CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                    "-XX:+UseRTMLocking");
+                    ExitCode.FAIL, "-XX:+UseRTMLocking");
             // verify that we can pass -UseRTMLocking without
             // getting any error messages
             CommandLineOptionTest.verifySameJVMStartup(
@@ -73,27 +71,20 @@
                     new String[]{
                             errorMessage,
                             unrecongnizedOption
-                    }, ExitCode.OK,
-                    CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                    "-XX:-UseRTMLocking");
+                    }, ExitCode.OK, "-XX:-UseRTMLocking");
 
             // verify that UseRTMLocking is false by default
             CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMLocking",
-                    TestUseRTMLockingOptionOnUnsupportedCPU.DEFAULT_VALUE,
-                    CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS);
+                    TestUseRTMLockingOptionOnUnsupportedCPU.DEFAULT_VALUE);
         } else {
             // verify that on non-x86 CPUs RTMLocking could not be used
             CommandLineOptionTest.verifySameJVMStartup(
                     new String[] { unrecongnizedOption },
-                    null, ExitCode.FAIL,
-                    CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                    "-XX:+UseRTMLocking");
+                    null, ExitCode.FAIL, "-XX:+UseRTMLocking");
 
             CommandLineOptionTest.verifySameJVMStartup(
                     new String[] { unrecongnizedOption },
-                    null, ExitCode.FAIL,
-                    CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                    "-XX:-UseRTMLocking");
+                    null, ExitCode.FAIL, "-XX:-UseRTMLocking");
         }
     }
 
--- a/test/compiler/rtm/cli/TestUseRTMLockingOptionOnUnsupportedVM.java	Wed Aug 13 14:49:46 2014 -0700
+++ b/test/compiler/rtm/cli/TestUseRTMLockingOptionOnUnsupportedVM.java	Thu Aug 14 12:55:30 2014 -0700
@@ -53,27 +53,17 @@
     public void runTestCases() throws Throwable {
         String errorMessage
                 = RTMGenericCommandLineOptionTest.RTM_UNSUPPORTED_VM_ERROR;
-        String experimentalOptionError
-                = CommandLineOptionTest.getExperimentalOptionErrorMessage(
-                "UseRTMLocking");
-        // verify that options is experimental
-        CommandLineOptionTest.verifySameJVMStartup(
-                new String[] { experimentalOptionError }, null, ExitCode.FAIL,
-                "-XX:+UseRTMLocking");
         // verify that we can't use +UseRTMLocking
         CommandLineOptionTest.verifySameJVMStartup(
                 new String[] { errorMessage }, null, ExitCode.FAIL,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
                 "-XX:+UseRTMLocking");
         // verify that we can turn it off
         CommandLineOptionTest.verifySameJVMStartup(null,
                 new String[] { errorMessage }, ExitCode.OK,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
                 "-XX:-UseRTMLocking");
         // verify that it is off by default
         CommandLineOptionTest.verifyOptionValueForSameVM("UseRTMLocking",
-                TestUseRTMLockingOptionOnUnsupportedVM.DEFAULT_VALUE,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS);
+                TestUseRTMLockingOptionOnUnsupportedVM.DEFAULT_VALUE);
     }
 
     public static void main(String args[]) throws Throwable {
--- a/test/compiler/rtm/cli/TestUseRTMLockingOptionWithBiasedLocking.java	Wed Aug 13 14:49:46 2014 -0700
+++ b/test/compiler/rtm/cli/TestUseRTMLockingOptionWithBiasedLocking.java	Thu Aug 14 12:55:30 2014 -0700
@@ -53,22 +53,18 @@
         // verify that we will not get a warning
         CommandLineOptionTest.verifySameJVMStartup(null,
                 new String[] { warningMessage }, ExitCode.OK,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
                 "-XX:+UseRTMLocking", "-XX:-UseBiasedLocking");
         // verify that we will get a warning
         CommandLineOptionTest.verifySameJVMStartup(
                 new String[] { warningMessage }, null, ExitCode.OK,
-                CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
                 "-XX:+UseRTMLocking", "-XX:+UseBiasedLocking");
         // verify that UseBiasedLocking is false when we use rtm locking
         CommandLineOptionTest.verifyOptionValueForSameVM("UseBiasedLocking",
-                "false", CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                "-XX:+UseRTMLocking");
+                "false", "-XX:+UseRTMLocking");
         // verify that we can't turn on biased locking when
         // using rtm locking
         CommandLineOptionTest.verifyOptionValueForSameVM("UseBiasedLocking",
-                "false", CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS,
-                "-XX:+UseRTMLocking", "-XX:+UseBiasedLocking");
+                "false", "-XX:+UseRTMLocking", "-XX:+UseBiasedLocking");
     }
 
     public static void main(String args[]) throws Throwable {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/class_unloading/AllocateBeyondMetaspaceSize.java	Thu Aug 14 12:55:30 2014 -0700
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import sun.hotspot.WhiteBox;
+
+class AllocateBeyondMetaspaceSize {
+  public static Object dummy;
+
+  public static void main(String [] args) {
+    if (args.length != 2) {
+      throw new IllegalArgumentException("Usage: <MetaspaceSize> <YoungGenSize>");
+    }
+
+    long metaspaceSize = Long.parseLong(args[0]);
+    long youngGenSize = Long.parseLong(args[1]);
+
+    run(metaspaceSize, youngGenSize);
+  }
+
+  private static void run(long metaspaceSize, long youngGenSize) {
+    WhiteBox wb = WhiteBox.getWhiteBox();
+
+    long allocationBeyondMetaspaceSize  = metaspaceSize * 2;
+    long metaspace = wb.allocateMetaspace(null, allocationBeyondMetaspaceSize);
+
+    triggerYoungGC(youngGenSize);
+
+    wb.freeMetaspace(null, metaspace, metaspace);
+  }
+
+  private static void triggerYoungGC(long youngGenSize) {
+    long approxAllocSize = 32 * 1024;
+    long numAllocations  = 2 * youngGenSize / approxAllocSize;
+
+    for (long i = 0; i < numAllocations; i++) {
+      dummy = new byte[(int)approxAllocSize];
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/class_unloading/TestCMSClassUnloadingEnabledHWM.java	Thu Aug 14 12:55:30 2014 -0700
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @key gc
+ * @bug 8049831
+ * @library /testlibrary /testlibrary/whitebox
+ * @build TestCMSClassUnloadingEnabledHWM AllocateBeyondMetaspaceSize
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run driver TestCMSClassUnloadingEnabledHWM
+ * @summary Test that -XX:-CMSClassUnloadingEnabled will trigger a Full GC when more than MetaspaceSize metadata is allocated.
+ */
+
+import com.oracle.java.testlibrary.OutputAnalyzer;
+import com.oracle.java.testlibrary.ProcessTools;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+public class TestCMSClassUnloadingEnabledHWM {
+  private static long MetaspaceSize = 32 * 1024 * 1024;
+  private static long YoungGenSize  = 32 * 1024 * 1024;
+
+  private static OutputAnalyzer run(boolean enableUnloading) throws Exception {
+    ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
+      "-Xbootclasspath/a:.",
+      "-XX:+WhiteBoxAPI",
+      "-XX:MetaspaceSize=" + MetaspaceSize,
+      "-Xmn" + YoungGenSize,
+      "-XX:+UseConcMarkSweepGC",
+      "-XX:" + (enableUnloading ? "+" : "-") + "CMSClassUnloadingEnabled",
+      "-XX:+PrintHeapAtGC",
+      "-XX:+PrintGCDetails",
+      "AllocateBeyondMetaspaceSize",
+      "" + MetaspaceSize,
+      "" + YoungGenSize);
+    return new OutputAnalyzer(pb.start());
+  }
+
+  public static OutputAnalyzer runWithCMSClassUnloading() throws Exception {
+    return run(true);
+  }
+
+  public static OutputAnalyzer runWithoutCMSClassUnloading() throws Exception {
+    return run(false);
+  }
+
+  public static void testWithoutCMSClassUnloading() throws Exception {
+    // -XX:-CMSClassUnloadingEnabled is used, so we expect a full GC instead of a concurrent cycle.
+    OutputAnalyzer out = runWithoutCMSClassUnloading();
+
+    out.shouldMatch(".*Full GC.*");
+    out.shouldNotMatch(".*CMS Initial Mark.*");
+  }
+
+  public static void testWithCMSClassUnloading() throws Exception {
+    // -XX:+CMSClassUnloadingEnabled is used, so we expect a concurrent cycle instead of a full GC.
+    OutputAnalyzer out = runWithCMSClassUnloading();
+
+    out.shouldMatch(".*CMS Initial Mark.*");
+    out.shouldNotMatch(".*Full GC.*");
+  }
+
+  public static void main(String args[]) throws Exception {
+    testWithCMSClassUnloading();
+    testWithoutCMSClassUnloading();
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/class_unloading/TestG1ClassUnloadingHWM.java	Thu Aug 14 12:55:30 2014 -0700
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @key gc
+ * @bug 8049831
+ * @library /testlibrary /testlibrary/whitebox
+ * @build TestG1ClassUnloadingHWM AllocateBeyondMetaspaceSize
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ * @run driver TestG1ClassUnloadingHWM
+ * @summary Test that -XX:-ClassUnloadingWithConcurrentMark will trigger a Full GC when more than MetaspaceSize metadata is allocated.
+ */
+
+import com.oracle.java.testlibrary.OutputAnalyzer;
+import com.oracle.java.testlibrary.ProcessTools;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+public class TestG1ClassUnloadingHWM {
+  private static long MetaspaceSize = 32 * 1024 * 1024;
+  private static long YoungGenSize  = 32 * 1024 * 1024;
+
+  private static OutputAnalyzer run(boolean enableUnloading) throws Exception {
+    ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
+      "-Xbootclasspath/a:.",
+      "-XX:+WhiteBoxAPI",
+      "-XX:MetaspaceSize=" + MetaspaceSize,
+      "-Xmn" + YoungGenSize,
+      "-XX:+UseG1GC",
+      "-XX:" + (enableUnloading ? "+" : "-") + "ClassUnloadingWithConcurrentMark",
+      "-XX:+PrintHeapAtGC",
+      "-XX:+PrintGCDetails",
+      "AllocateBeyondMetaspaceSize",
+      "" + MetaspaceSize,
+      "" + YoungGenSize);
+    return new OutputAnalyzer(pb.start());
+  }
+
+  public static OutputAnalyzer runWithG1ClassUnloading() throws Exception {
+    return run(true);
+  }
+
+  public static OutputAnalyzer runWithoutG1ClassUnloading() throws Exception {
+    return run(false);
+  }
+
+  public static void testWithoutG1ClassUnloading() throws Exception {
+    // -XX:-ClassUnloadingWithConcurrentMark is used, so we expect a full GC instead of a concurrent cycle.
+    OutputAnalyzer out = runWithoutG1ClassUnloading();
+
+    out.shouldMatch(".*Full GC.*");
+    out.shouldNotMatch(".*initial-mark.*");
+  }
+
+  public static void testWithG1ClassUnloading() throws Exception {
+    // -XX:+ClassUnloadingWithConcurrentMark is used, so we expect a concurrent cycle instead of a full GC.
+    OutputAnalyzer out = runWithG1ClassUnloading();
+
+    out.shouldMatch(".*initial-mark.*");
+    out.shouldNotMatch(".*Full GC.*");
+  }
+
+  public static void main(String args[]) throws Exception {
+    testWithG1ClassUnloading();
+    testWithoutG1ClassUnloading();
+  }
+}
+
--- a/test/gc/g1/TestDeferredRSUpdate.java	Wed Aug 13 14:49:46 2014 -0700
+++ b/test/gc/g1/TestDeferredRSUpdate.java	Thu Aug 14 12:55:30 2014 -0700
@@ -23,7 +23,7 @@
 
 /*
  * @test TestDeferredRSUpdate
- * @bug 8040977
+ * @bug 8040977 8052170
  * @summary Ensure that running with -XX:-G1DeferredRSUpdate does not crash the VM
  * @key gc
  * @library /testlibrary
@@ -38,6 +38,7 @@
 
     ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
                                                               "-Xmx10M",
+                                                              "-XX:+PrintGCDetails",
                                                               // G1DeferredRSUpdate is a develop option, but we cannot limit execution of this test to only debug VMs.
                                                               "-XX:+IgnoreUnrecognizedVMOptions",
                                                               "-XX:-G1DeferredRSUpdate",
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/g1/TestEagerReclaimHumongousRegions.java	Thu Aug 14 12:55:30 2014 -0700
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestEagerReclaimHumongousRegions
+ * @bug 8027959
+ * @summary Test to make sure that eager reclaim of humongous objects work. We simply try to fill
+ * up the heap with humongous objects that should be eagerly reclaimable to avoid Full GC.
+ * @key gc
+ * @library /testlibrary
+ */
+
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+import java.util.LinkedList;
+
+import com.oracle.java.testlibrary.OutputAnalyzer;
+import com.oracle.java.testlibrary.ProcessTools;
+import com.oracle.java.testlibrary.Asserts;
+
+class ReclaimRegionFast {
+    public static final int M = 1024*1024;
+
+    public static LinkedList<Object> garbageList = new LinkedList<Object>();
+
+    public static void genGarbage() {
+        for (int i = 0; i < 32*1024; i++) {
+            garbageList.add(new int[100]);
+        }
+        garbageList.clear();
+    }
+
+    // A large object referenced by a static.
+    static int[] filler = new int[10 * M];
+
+    public static void main(String[] args) {
+
+        int[] large = new int[M];
+
+        Object ref_from_stack = large;
+
+        for (int i = 0; i < 100; i++) {
+            // A large object that will be reclaimed eagerly.
+            large = new int[6*M];
+            genGarbage();
+            // Make sure that the compiler cannot completely remove
+            // the allocation of the large object until here.
+            System.out.println(large);
+        }
+
+        // Keep the reference to the first object alive.
+        System.out.println(ref_from_stack);
+    }
+}
+
+public class TestEagerReclaimHumongousRegions {
+    public static void main(String[] args) throws Exception {
+        ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
+            "-XX:+UseG1GC",
+            "-Xms128M",
+            "-Xmx128M",
+            "-Xmn16M",
+            "-XX:+PrintGC",
+            ReclaimRegionFast.class.getName());
+
+        Pattern p = Pattern.compile("Full GC");
+
+        OutputAnalyzer output = new OutputAnalyzer(pb.start());
+
+        int found = 0;
+        Matcher m = p.matcher(output.getStdout());
+        while (m.find()) { found++; }
+        System.out.println("Issued " + found + " Full GCs");
+        Asserts.assertLT(found, 10, "Found that " + found + " Full GCs were issued. This is larger than the bound. Eager reclaim seems to not work at all");
+
+        output.shouldHaveExitValue(0);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/g1/TestEagerReclaimHumongousRegions2.java	Thu Aug 14 12:55:30 2014 -0700
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestEagerReclaimHumongousRegions2
+ * @bug 8051973
+ * @summary Test to make sure that eager reclaim of humongous objects correctly clears
+ * mark bitmaps at reclaim.
+ * @key gc
+ * @library /testlibrary
+ */
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.Random;
+
+import com.oracle.java.testlibrary.OutputAnalyzer;
+import com.oracle.java.testlibrary.ProcessTools;
+
+// An object that has a few references to other instances to slow down marking.
+class ObjectWithSomeRefs {
+    public ObjectWithSomeRefs other1;
+    public ObjectWithSomeRefs other2;
+    public ObjectWithSomeRefs other3;
+    public ObjectWithSomeRefs other4;
+}
+
+class ReclaimRegionFast {
+    public static final int M = 1024*1024;
+
+    public static LinkedList<Object> garbageList = new LinkedList<Object>();
+
+    public static void genGarbage(Object large) {
+        for (int i = 0; i < 64*1024; i++) {
+            Object[] garbage = new Object[50];
+            garbage[0] = large;
+            garbageList.add(garbage);
+        }
+        garbageList.clear();
+    }
+
+    public static ArrayList<ObjectWithSomeRefs> longList = new ArrayList<ObjectWithSomeRefs>();
+
+    public static void main(String[] args) {
+
+        for (int i = 0; i < 16*1024; i++) {
+             longList.add(new ObjectWithSomeRefs());
+        }
+
+        Random rnd = new Random();
+        for (int i = 0; i < longList.size(); i++) {
+             int len = longList.size();
+             longList.get(i).other1 = longList.get(rnd.nextInt(len));
+             longList.get(i).other2 = longList.get(rnd.nextInt(len));
+             longList.get(i).other3 = longList.get(rnd.nextInt(len));
+             longList.get(i).other4 = longList.get(rnd.nextInt(len));
+        }
+
+        int[] large1 = new int[M];
+        int[] large2 = null;
+        int[] large3 = null;
+        int[] large4 = null;
+
+        Object ref_from_stack = large1;
+
+        for (int i = 0; i < 20; i++) {
+            // A set of large objects that will be reclaimed eagerly - and hopefully marked.
+            large1 = new int[M - 20];
+            large2 = new int[M - 20];
+            large3 = new int[M - 20];
+            large4 = new int[M - 20];
+            genGarbage(large1);
+            // Make sure that the compiler cannot completely remove
+            // the allocation of the large object until here.
+            System.out.println(large1 + " " + large2 + " " + large3 + " " + large4);
+        }
+
+        // Keep the reference to the first object alive.
+        System.out.println(ref_from_stack);
+    }
+}
+
+public class TestEagerReclaimHumongousRegions2 {
+    public static void main(String[] args) throws Exception {
+        ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
+            "-XX:+UseG1GC",
+            "-Xms128M",
+            "-Xmx128M",
+            "-Xmn2M",
+            "-XX:G1HeapRegionSize=1M",
+            "-XX:InitiatingHeapOccupancyPercent=0", // Want to have as much as possible initial marks.
+            "-XX:+PrintGC",
+            "-XX:+VerifyAfterGC",
+            "-XX:ConcGCThreads=1", // Want to make marking as slow as possible.
+            "-XX:+IgnoreUnrecognizedVMOptions", // G1VerifyBitmaps is develop only.
+            "-XX:+G1VerifyBitmaps",
+            ReclaimRegionFast.class.getName());
+        OutputAnalyzer output = new OutputAnalyzer(pb.start());
+        output.shouldHaveExitValue(0);
+    }
+}
+
--- a/test/gc/g1/TestGCLogMessages.java	Wed Aug 13 14:49:46 2014 -0700
+++ b/test/gc/g1/TestGCLogMessages.java	Thu Aug 14 12:55:30 2014 -0700
@@ -23,7 +23,7 @@
 
 /*
  * @test TestGCLogMessages
- * @bug 8035406 8027295 8035398 8019342
+ * @bug 8035406 8027295 8035398 8019342 8027959
  * @summary Ensure that the PrintGCDetails output for a minor GC with G1
  * includes the expected necessary messages.
  * @key gc
@@ -54,6 +54,7 @@
     output.shouldNotContain("[String Dedup Fixup");
     output.shouldNotContain("[Young Free CSet");
     output.shouldNotContain("[Non-Young Free CSet");
+    output.shouldNotContain("[Humongous Reclaim");
     output.shouldHaveExitValue(0);
 
     pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
@@ -71,6 +72,10 @@
     output.shouldContain("[String Dedup Fixup");
     output.shouldNotContain("[Young Free CSet");
     output.shouldNotContain("[Non-Young Free CSet");
+    output.shouldContain("[Humongous Reclaim");
+    output.shouldNotContain("[Humongous Total");
+    output.shouldNotContain("[Humongous Candidate");
+    output.shouldNotContain("[Humongous Reclaimed");
     output.shouldHaveExitValue(0);
 
     pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
@@ -90,6 +95,10 @@
     output.shouldContain("[String Dedup Fixup");
     output.shouldContain("[Young Free CSet");
     output.shouldContain("[Non-Young Free CSet");
+    output.shouldContain("[Humongous Reclaim");
+    output.shouldContain("[Humongous Total");
+    output.shouldContain("[Humongous Candidate");
+    output.shouldContain("[Humongous Reclaimed");
     output.shouldHaveExitValue(0);
   }
 
--- a/test/testlibrary/whitebox/sun/hotspot/WhiteBox.java	Wed Aug 13 14:49:46 2014 -0700
+++ b/test/testlibrary/whitebox/sun/hotspot/WhiteBox.java	Thu Aug 14 12:55:30 2014 -0700
@@ -142,6 +142,8 @@
 
   // Memory
   public native void readReservedMemory();
+  public native long allocateMetaspace(ClassLoader classLoader, long size);
+  public native void freeMetaspace(ClassLoader classLoader, long addr, long size);
 
   // force Full GC
   public native void fullGC();