# HG changeset patch
# User kamg
# Date 1326853528 18000
# Node ID 6520f9861937895808a236eee58a7166db8e6dfd
# Parent  d7e3846464d015a7fdcdaafed2470d689cf0e04e# Parent  f1cd52d6ce02e91ac9ea7f10f3ca6c82a9e52a38
Merge

diff -r d7e3846464d0 -r 6520f9861937 .hgtags
--- a/.hgtags	Tue Jan 17 13:08:52 2012 -0500
+++ b/.hgtags	Tue Jan 17 21:25:28 2012 -0500
@@ -207,3 +207,7 @@
 a2fef924d8e6f37dac2a887315e3502876cc8e24 hs23-b08
 61165f53f1656b9f99e4fb806429bf98b99d59c3 jdk8-b18
 4bcf61041217f8677dcec18e90e9196acc945bba hs23-b09
+9232e0ecbc2cec54dcc8f93004fb00c214446460 jdk8-b19
+fe2c8764998112b7fefcd7d41599714813ae4327 jdk8-b20
+9952d1c439d64c5fd4ad1236a63a62bd5a49d4c3 jdk8-b21
+513351373923f74a7c91755748b95c9771e59f96 hs23-b10
diff -r d7e3846464d0 -r 6520f9861937 agent/src/share/classes/sun/jvm/hotspot/oops/InstanceKlass.java
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/InstanceKlass.java	Tue Jan 17 13:08:52 2012 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/InstanceKlass.java	Tue Jan 17 21:25:28 2012 -0500
@@ -63,8 +63,6 @@
   private static int CLASS_STATE_FULLY_INITIALIZED;
   private static int CLASS_STATE_INITIALIZATION_ERROR;
 
-  private static int IS_MARKED_DEPENDENT_MASK;
-
   private static synchronized void initialize(TypeDataBase db) throws WrongTypeException {
     Type type            = db.lookupType("instanceKlass");
     arrayKlasses         = new OopField(type.getOopField("_array_klasses"), Oop.getHeaderSize());
@@ -92,7 +90,7 @@
     staticFieldSize      = new CIntField(type.getCIntegerField("_static_field_size"), Oop.getHeaderSize());
     staticOopFieldCount   = new CIntField(type.getCIntegerField("_static_oop_field_count"), Oop.getHeaderSize());
     nonstaticOopMapSize  = new CIntField(type.getCIntegerField("_nonstatic_oop_map_size"), Oop.getHeaderSize());
-    miscFlags            = new CIntField(type.getCIntegerField("_misc_flags"), Oop.getHeaderSize());
+    isMarkedDependent    = new CIntField(type.getCIntegerField("_is_marked_dependent"), Oop.getHeaderSize());
     initState            = new CIntField(type.getCIntegerField("_init_state"), Oop.getHeaderSize());
     vtableLen            = new CIntField(type.getCIntegerField("_vtable_len"), Oop.getHeaderSize());
     itableLen            = new CIntField(type.getCIntegerField("_itable_len"), Oop.getHeaderSize());
@@ -120,8 +118,6 @@
     CLASS_STATE_FULLY_INITIALIZED = db.lookupIntConstant("instanceKlass::fully_initialized").intValue();
     CLASS_STATE_INITIALIZATION_ERROR = db.lookupIntConstant("instanceKlass::initialization_error").intValue();
 
-    IS_MARKED_DEPENDENT_MASK = db.lookupIntConstant("instanceKlass::IS_MARKED_DEPENDENT").intValue();
-
   }
 
   InstanceKlass(OopHandle handle, ObjectHeap heap) {
@@ -155,7 +151,7 @@
   private static CIntField staticFieldSize;
   private static CIntField staticOopFieldCount;
   private static CIntField nonstaticOopMapSize;
-  private static CIntField miscFlags;
+  private static CIntField isMarkedDependent;
   private static CIntField initState;
   private static CIntField vtableLen;
   private static CIntField itableLen;
@@ -337,7 +333,7 @@
   public long      getNonstaticFieldSize()  { return                nonstaticFieldSize.getValue(this); }
   public long      getStaticOopFieldCount() { return                staticOopFieldCount.getValue(this); }
   public long      getNonstaticOopMapSize() { return                nonstaticOopMapSize.getValue(this); }
-  public boolean   getIsMarkedDependent()   { return                (miscFlags.getValue(this) & IS_MARKED_DEPENDENT_MASK) != 0; }
+  public boolean   getIsMarkedDependent()   { return                isMarkedDependent.getValue(this) != 0; }
   public long      getVtableLen()           { return                vtableLen.getValue(this); }
   public long      getItableLen()           { return                itableLen.getValue(this); }
   public Symbol    getGenericSignature()    { return getSymbol(genericSignature); }
@@ -528,7 +524,7 @@
       visitor.doCInt(staticFieldSize, true);
       visitor.doCInt(staticOopFieldCount, true);
       visitor.doCInt(nonstaticOopMapSize, true);
-      visitor.doCInt(miscFlags, true);
+      visitor.doCInt(isMarkedDependent, true);
       visitor.doCInt(initState, true);
       visitor.doCInt(vtableLen, true);
       visitor.doCInt(itableLen, true);
diff -r d7e3846464d0 -r 6520f9861937 make/hotspot_version
--- a/make/hotspot_version	Tue Jan 17 13:08:52 2012 -0500
+++ b/make/hotspot_version	Tue Jan 17 21:25:28 2012 -0500
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=23
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=10
+HS_BUILD_NUMBER=11
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
diff -r d7e3846464d0 -r 6520f9861937 src/cpu/sparc/vm/sparc.ad
--- a/src/cpu/sparc/vm/sparc.ad	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/cpu/sparc/vm/sparc.ad	Tue Jan 17 21:25:28 2012 -0500
@@ -9283,6 +9283,7 @@
 // (compare 'operand indIndex' and 'instruct addP_reg_reg' above)
 instruct jumpXtnd(iRegX switch_val, o7RegI table) %{
   match(Jump switch_val);
+  effect(TEMP table);
 
   ins_cost(350);
 
@@ -10273,24 +10274,24 @@
 // ============================================================================
 // inlined locking and unlocking
 
-instruct cmpFastLock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, o7RegP scratch ) %{
+instruct cmpFastLock(flagsRegP pcc, iRegP object, o1RegP box, iRegP scratch2, o7RegP scratch ) %{
   match(Set pcc (FastLock object box));
 
-  effect(KILL scratch, TEMP scratch2);
+  effect(TEMP scratch2, USE_KILL box, KILL scratch);
   ins_cost(100);
 
-  format %{ "FASTLOCK  $object, $box; KILL $scratch, $scratch2, $box" %}
+  format %{ "FASTLOCK  $object,$box\t! kills $box,$scratch,$scratch2" %}
   ins_encode( Fast_Lock(object, box, scratch, scratch2) );
   ins_pipe(long_memory_op);
 %}
 
 
-instruct cmpFastUnlock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, o7RegP scratch ) %{
+instruct cmpFastUnlock(flagsRegP pcc, iRegP object, o1RegP box, iRegP scratch2, o7RegP scratch ) %{
   match(Set pcc (FastUnlock object box));
-  effect(KILL scratch, TEMP scratch2);
+  effect(TEMP scratch2, USE_KILL box, KILL scratch);
   ins_cost(100);
 
-  format %{ "FASTUNLOCK  $object, $box; KILL $scratch, $scratch2, $box" %}
+  format %{ "FASTUNLOCK  $object,$box\t! kills $box,$scratch,$scratch2" %}
   ins_encode( Fast_Unlock(object, box, scratch, scratch2) );
   ins_pipe(long_memory_op);
 %}
diff -r d7e3846464d0 -r 6520f9861937 src/cpu/x86/vm/x86_32.ad
--- a/src/cpu/x86/vm/x86_32.ad	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/cpu/x86/vm/x86_32.ad	Tue Jan 17 21:25:28 2012 -0500
@@ -13435,20 +13435,20 @@
 // inlined locking and unlocking
 
 
-instruct cmpFastLock( eFlagsReg cr, eRegP object, eRegP box, eAXRegI tmp, eRegP scr) %{
+instruct cmpFastLock( eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
   match( Set cr (FastLock object box) );
-  effect( TEMP tmp, TEMP scr );
+  effect( TEMP tmp, TEMP scr, USE_KILL box );
   ins_cost(300);
-  format %{ "FASTLOCK $object, $box KILLS $tmp,$scr" %}
+  format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
   ins_encode( Fast_Lock(object,box,tmp,scr) );
   ins_pipe( pipe_slow );
 %}
 
 instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
   match( Set cr (FastUnlock object box) );
-  effect( TEMP tmp );
+  effect( TEMP tmp, USE_KILL box );
   ins_cost(300);
-  format %{ "FASTUNLOCK $object, $box, $tmp" %}
+  format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
   ins_encode( Fast_Unlock(object,box,tmp) );
   ins_pipe( pipe_slow );
 %}
diff -r d7e3846464d0 -r 6520f9861937 src/cpu/x86/vm/x86_64.ad
--- a/src/cpu/x86/vm/x86_64.ad	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/cpu/x86/vm/x86_64.ad	Tue Jan 17 21:25:28 2012 -0500
@@ -11511,13 +11511,13 @@
 // inlined locking and unlocking
 
 instruct cmpFastLock(rFlagsReg cr,
-                     rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
+                     rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr)
 %{
   match(Set cr (FastLock object box));
-  effect(TEMP tmp, TEMP scr);
+  effect(TEMP tmp, TEMP scr, USE_KILL box);
 
   ins_cost(300);
-  format %{ "fastlock $object,$box,$tmp,$scr" %}
+  format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
   ins_encode(Fast_Lock(object, box, tmp, scr));
   ins_pipe(pipe_slow);
 %}
@@ -11526,10 +11526,10 @@
                        rRegP object, rax_RegP box, rRegP tmp)
 %{
   match(Set cr (FastUnlock object box));
-  effect(TEMP tmp);
+  effect(TEMP tmp, USE_KILL box);
 
   ins_cost(300);
-  format %{ "fastunlock $object, $box, $tmp" %}
+  format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
   ins_encode(Fast_Unlock(object, box, tmp));
   ins_pipe(pipe_slow);
 %}
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/ci/ciTypeFlow.cpp
--- a/src/share/vm/ci/ciTypeFlow.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/ci/ciTypeFlow.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1589,7 +1589,7 @@
   _next = NULL;
   _on_work_list = false;
   _backedge_copy = false;
-  _exception_entry = false;
+  _has_monitorenter = false;
   _trap_bci = -1;
   _trap_index = 0;
   df_init();
@@ -2182,6 +2182,10 @@
         !head->is_clonable_exit(lp))
       continue;
 
+    // Avoid BoxLock merge.
+    if (EliminateNestedLocks && head->has_monitorenter())
+      continue;
+
     // check not already cloned
     if (head->backedge_copy_count() != 0)
       continue;
@@ -2322,6 +2326,10 @@
     // Watch for bailouts.
     if (failing())  return;
 
+    if (str.cur_bc() == Bytecodes::_monitorenter) {
+      block->set_has_monitorenter();
+    }
+
     if (res) {
 
       // We have encountered a trap.  Record it in this block.
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/ci/ciTypeFlow.hpp
--- a/src/share/vm/ci/ciTypeFlow.hpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/ci/ciTypeFlow.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -544,15 +544,19 @@
     // Has this block been cloned for a loop backedge?
     bool                             _backedge_copy;
 
+    // This block is entry to irreducible loop.
+    bool                             _irreducible_entry;
+
+    // This block has monitor entry point.
+    bool                             _has_monitorenter;
+
     // A pointer used for our internal work list
+    bool                             _on_work_list;      // on the work list
     Block*                           _next;
-    bool                             _on_work_list;      // on the work list
     Block*                           _rpo_next;          // Reverse post order list
 
     // Loop info
     Loop*                            _loop;              // nearest loop
-    bool                             _irreducible_entry; // entry to irreducible loop
-    bool                             _exception_entry;   // entry to exception handler
 
     ciBlock*     ciblock() const     { return _ciblock; }
     StateVector* state() const     { return _state; }
@@ -689,6 +693,8 @@
     bool   is_loop_head() const          { return _loop && _loop->head() == this; }
     void   set_irreducible_entry(bool c) { _irreducible_entry = c; }
     bool   is_irreducible_entry() const  { return _irreducible_entry; }
+    void   set_has_monitorenter()        { _has_monitorenter = true; }
+    bool   has_monitorenter() const      { return _has_monitorenter; }
     bool   is_visited() const            { return has_pre_order(); }
     bool   is_post_visited() const       { return has_post_order(); }
     bool   is_clonable_exit(Loop* lp);
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/code/dependencies.cpp
--- a/src/share/vm/code/dependencies.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/code/dependencies.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1631,7 +1631,7 @@
   for (ContextStream str(*this); str.next(); ) {
     klassOop d = str.klass();
     assert(!instanceKlass::cast(d)->is_marked_dependent(), "checking");
-    instanceKlass::cast(d)->set_is_marked_dependent();
+    instanceKlass::cast(d)->set_is_marked_dependent(true);
   }
 }
 
@@ -1640,7 +1640,7 @@
   // Unmark transitive interfaces
   for (ContextStream str(*this); str.next(); ) {
     klassOop d = str.klass();
-    instanceKlass::cast(d)->clear_is_marked_dependent();
+    instanceKlass::cast(d)->set_is_marked_dependent(false);
   }
 }
 
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/gc_implementation/g1/concurrentMark.cpp
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,7 @@
 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.hpp"
+#include "gc_implementation/g1/heapRegion.inline.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "gc_implementation/shared/vmGCOperations.hpp"
@@ -183,12 +184,11 @@
 void CMMarkStack::allocate(size_t size) {
   _base = NEW_C_HEAP_ARRAY(oop, size);
   if (_base == NULL) {
-    vm_exit_during_initialization("Failed to allocate "
-                                  "CM region mark stack");
+    vm_exit_during_initialization("Failed to allocate CM region mark stack");
   }
   _index = 0;
   _capacity = (jint) size;
-  _oops_do_bound = -1;
+  _saved_index = -1;
   NOT_PRODUCT(_max_depth = 0);
 }
 
@@ -283,7 +283,6 @@
   }
 }
 
-
 CMRegionStack::CMRegionStack() : _base(NULL) {}
 
 void CMRegionStack::allocate(size_t size) {
@@ -302,6 +301,8 @@
 }
 
 void CMRegionStack::push_lock_free(MemRegion mr) {
+  guarantee(false, "push_lock_free(): don't call this any more");
+
   assert(mr.word_size() > 0, "Precondition");
   while (true) {
     jint index = _index;
@@ -325,6 +326,8 @@
 // marking / remark phases. Should only be called in tandem with
 // other lock-free pops.
 MemRegion CMRegionStack::pop_lock_free() {
+  guarantee(false, "pop_lock_free(): don't call this any more");
+
   while (true) {
     jint index = _index;
 
@@ -390,6 +393,8 @@
 #endif
 
 bool CMRegionStack::invalidate_entries_into_cset() {
+  guarantee(false, "invalidate_entries_into_cset(): don't call this any more");
+
   bool result = false;
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   for (int i = 0; i < _oops_do_bound; ++i) {
@@ -438,14 +443,29 @@
   return res;
 }
 
+void CMMarkStack::note_start_of_gc() {
+  assert(_saved_index == -1,
+         "note_start_of_gc()/end_of_gc() bracketed incorrectly");
+  _saved_index = _index;
+}
+
+void CMMarkStack::note_end_of_gc() {
+  // This is intentionally a guarantee, instead of an assert. If we
+  // accidentally add something to the mark stack during GC, it
+  // will be a correctness issue so it's better if we crash. we'll
+  // only check this once per GC anyway, so it won't be a performance
+  // issue in any way.
+  guarantee(_saved_index == _index,
+            err_msg("saved index: %d index: %d", _saved_index, _index));
+  _saved_index = -1;
+}
+
 void CMMarkStack::oops_do(OopClosure* f) {
-  if (_index == 0) return;
-  assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
-         "Bound must be set.");
-  for (int i = 0; i < _oops_do_bound; i++) {
+  assert(_saved_index == _index,
+         err_msg("saved index: %d index: %d", _saved_index, _index));
+  for (int i = 0; i < _index; i += 1) {
     f->do_oop(&_base[i]);
   }
-  _oops_do_bound = -1;
 }
 
 bool ConcurrentMark::not_yet_marked(oop obj) const {
@@ -783,7 +803,7 @@
 public:
   bool doHeapRegion(HeapRegion* r) {
     if (!r->continuesHumongous()) {
-      r->note_start_of_marking(true);
+      r->note_start_of_marking();
     }
     return false;
   }
@@ -804,6 +824,10 @@
 
   // Initialise marking structures. This has to be done in a STW phase.
   reset();
+
+  // For each region note start of marking.
+  NoteStartOfMarkHRClosure startcl;
+  g1h->heap_region_iterate(&startcl);
 }
 
 
@@ -818,10 +842,6 @@
   // every remark and we'll eventually not need to cause one.
   force_overflow_stw()->init();
 
-  // For each region note start of marking.
-  NoteStartOfMarkHRClosure startcl;
-  g1h->heap_region_iterate(&startcl);
-
   // Start Concurrent Marking weak-reference discovery.
   ReferenceProcessor* rp = g1h->ref_processor_cm();
   // enable ("weak") refs discovery
@@ -946,22 +966,9 @@
 }
 #endif // !PRODUCT
 
-void ConcurrentMark::grayRoot(oop p) {
-  HeapWord* addr = (HeapWord*) p;
-  // We can't really check against _heap_start and _heap_end, since it
-  // is possible during an evacuation pause with piggy-backed
-  // initial-mark that the committed space is expanded during the
-  // pause without CM observing this change. So the assertions below
-  // is a bit conservative; but better than nothing.
-  assert(_g1h->g1_committed().contains(addr),
-         "address should be within the heap bounds");
-
-  if (!_nextMarkBitMap->isMarked(addr)) {
-    _nextMarkBitMap->parMark(addr);
-  }
-}
-
 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
+  guarantee(false, "grayRegionIfNecessary(): don't call this any more");
+
   // The objects on the region have already been marked "in bulk" by
   // the caller. We only need to decide whether to push the region on
   // the region stack or not.
@@ -1007,6 +1014,8 @@
 }
 
 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
+  guarantee(false, "markAndGrayObjectIfNecessary(): don't call this any more");
+
   // The object is not marked by the caller. We need to at least mark
   // it and maybe push in on the stack.
 
@@ -1224,7 +1233,6 @@
                                        true /* expected_active */);
 
     if (VerifyDuringGC) {
-
       HandleMark hm;  // handle scope
       gclog_or_tty->print(" VerifyDuringGC:(after)");
       Universe::heap()->prepare_for_verify();
@@ -1879,10 +1887,6 @@
   double end = os::elapsedTime();
   _cleanup_times.add((end - start) * 1000.0);
 
-  // G1CollectedHeap::heap()->print();
-  // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
-  // G1CollectedHeap::heap()->get_gc_time_stamp());
-
   if (PrintGC || PrintGCDetails) {
     g1h->print_size_transition(gclog_or_tty,
                                start_used_bytes,
@@ -2669,6 +2673,8 @@
 }
 
 void ConcurrentMark::drainAllSATBBuffers() {
+  guarantee(false, "drainAllSATBBuffers(): don't call this any more");
+
   CMGlobalObjectClosure oc(this);
   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   satb_mq_set.set_closure(&oc);
@@ -2687,12 +2693,6 @@
   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
 }
 
-void ConcurrentMark::markPrev(oop p) {
-  // Note we are overriding the read-only view of the prev map here, via
-  // the cast.
-  ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
-}
-
 void ConcurrentMark::clear(oop p) {
   assert(p != NULL && p->is_oop(), "expected an oop");
   HeapWord* addr = (HeapWord*)p;
@@ -2702,13 +2702,21 @@
   _nextMarkBitMap->clear(addr);
 }
 
-void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
+void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
   // Note we are overriding the read-only view of the prev map here, via
   // the cast.
   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
+}
+
+void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
   _nextMarkBitMap->clearRange(mr);
 }
 
+void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
+  clearRangePrevBitmap(mr);
+  clearRangeNextBitmap(mr);
+}
+
 HeapRegion*
 ConcurrentMark::claim_region(int task_num) {
   // "checkpoint" the finger
@@ -2803,6 +2811,9 @@
 }
 
 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
+  guarantee(false, "invalidate_aborted_regions_in_cset(): "
+                   "don't call this any more");
+
   bool result = false;
   for (int i = 0; i < (int)_max_task_num; ++i) {
     CMTask* the_task = _tasks[i];
@@ -2854,24 +2865,135 @@
     // ...then over the contents of the all the task queues.
     queue->oops_do(cl);
   }
-
-  // Invalidate any entries, that are in the region stack, that
-  // point into the collection set
-  if (_regionStack.invalidate_entries_into_cset()) {
-    // otherwise, any gray objects copied during the evacuation pause
-    // might not be visited.
-    assert(_should_gray_objects, "invariant");
+}
+
+#ifndef PRODUCT
+enum VerifyNoCSetOopsPhase {
+  VerifyNoCSetOopsStack,
+  VerifyNoCSetOopsQueues,
+  VerifyNoCSetOopsSATBCompleted,
+  VerifyNoCSetOopsSATBThread
+};
+
+class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
+private:
+  G1CollectedHeap* _g1h;
+  VerifyNoCSetOopsPhase _phase;
+  int _info;
+
+  const char* phase_str() {
+    switch (_phase) {
+    case VerifyNoCSetOopsStack:         return "Stack";
+    case VerifyNoCSetOopsQueues:        return "Queue";
+    case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
+    case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
+    default:                            ShouldNotReachHere();
+    }
+    return NULL;
+  }
+
+  void do_object_work(oop obj) {
+    guarantee(!_g1h->obj_in_cs(obj),
+              err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
+                      (void*) obj, phase_str(), _info));
+  }
+
+public:
+  VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
+
+  void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
+    _phase = phase;
+    _info = info;
+  }
+
+  virtual void do_oop(oop* p) {
+    oop obj = oopDesc::load_decode_heap_oop(p);
+    do_object_work(obj);
+  }
+
+  virtual void do_oop(narrowOop* p) {
+    // We should not come across narrow oops while scanning marking
+    // stacks and SATB buffers.
+    ShouldNotReachHere();
+  }
+
+  virtual void do_object(oop obj) {
+    do_object_work(obj);
+  }
+};
+
+void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
+                                         bool verify_enqueued_buffers,
+                                         bool verify_thread_buffers,
+                                         bool verify_fingers) {
+  assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
+  if (!G1CollectedHeap::heap()->mark_in_progress()) {
+    return;
   }
 
-  // Invalidate any aborted regions, recorded in the individual CM
-  // tasks, that point into the collection set.
-  if (invalidate_aborted_regions_in_cset()) {
-    // otherwise, any gray objects copied during the evacuation pause
-    // might not be visited.
-    assert(_should_gray_objects, "invariant");
+  VerifyNoCSetOopsClosure cl;
+
+  if (verify_stacks) {
+    // Verify entries on the global mark stack
+    cl.set_phase(VerifyNoCSetOopsStack);
+    _markStack.oops_do(&cl);
+
+    // Verify entries on the task queues
+    for (int i = 0; i < (int) _max_task_num; i += 1) {
+      cl.set_phase(VerifyNoCSetOopsQueues, i);
+      OopTaskQueue* queue = _task_queues->queue(i);
+      queue->oops_do(&cl);
+    }
+  }
+
+  SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
+
+  // Verify entries on the enqueued SATB buffers
+  if (verify_enqueued_buffers) {
+    cl.set_phase(VerifyNoCSetOopsSATBCompleted);
+    satb_qs.iterate_completed_buffers_read_only(&cl);
+  }
+
+  // Verify entries on the per-thread SATB buffers
+  if (verify_thread_buffers) {
+    cl.set_phase(VerifyNoCSetOopsSATBThread);
+    satb_qs.iterate_thread_buffers_read_only(&cl);
   }
 
+  if (verify_fingers) {
+    // Verify the global finger
+    HeapWord* global_finger = finger();
+    if (global_finger != NULL && global_finger < _heap_end) {
+      // The global finger always points to a heap region boundary. We
+      // use heap_region_containing_raw() to get the containing region
+      // given that the global finger could be pointing to a free region
+      // which subsequently becomes continues humongous. If that
+      // happens, heap_region_containing() will return the bottom of the
+      // corresponding starts humongous region and the check below will
+      // not hold any more.
+      HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
+      guarantee(global_finger == global_hr->bottom(),
+                err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
+                        global_finger, HR_FORMAT_PARAMS(global_hr)));
+    }
+
+    // Verify the task fingers
+    assert(parallel_marking_threads() <= _max_task_num, "sanity");
+    for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
+      CMTask* task = _tasks[i];
+      HeapWord* task_finger = task->finger();
+      if (task_finger != NULL && task_finger < _heap_end) {
+        // See above note on the global finger verification.
+        HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
+        guarantee(task_finger == task_hr->bottom() ||
+                  !task_hr->in_collection_set(),
+                  err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
+                          task_finger, HR_FORMAT_PARAMS(task_hr)));
+      }
+    }
+  }
 }
+#endif // PRODUCT
 
 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
   _markStack.setEmpty();
@@ -3080,19 +3202,6 @@
   }
 };
 
-class SetClaimValuesInCSetHRClosure: public HeapRegionClosure {
-  jint _claim_value;
-
-public:
-  SetClaimValuesInCSetHRClosure(jint claim_value) :
-    _claim_value(claim_value) { }
-
-  bool doHeapRegion(HeapRegion* hr) {
-    hr->set_claim_value(_claim_value);
-    return false;
-  }
-};
-
 class G1ParCompleteMarkInCSetTask: public AbstractGangTask {
 protected:
   G1CollectedHeap* _g1h;
@@ -3112,6 +3221,9 @@
 };
 
 void ConcurrentMark::complete_marking_in_collection_set() {
+  guarantee(false, "complete_marking_in_collection_set(): "
+                   "don't call this any more");
+
   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
 
   if (!g1h->mark_in_progress()) {
@@ -3135,9 +3247,8 @@
 
   assert(g1h->check_cset_heap_region_claim_values(HeapRegion::CompleteMarkCSetClaimValue), "sanity");
 
-  // Now reset the claim values in the regions in the collection set.
-  SetClaimValuesInCSetHRClosure set_cv_cl(HeapRegion::InitialClaimValue);
-  g1h->collection_set_iterate(&set_cv_cl);
+  // Reset the claim values in the regions in the collection set.
+  g1h->reset_cset_heap_region_claim_values();
 
   assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
 
@@ -3160,6 +3271,8 @@
 // newCSet().
 
 void ConcurrentMark::newCSet() {
+  guarantee(false, "newCSet(): don't call this any more");
+
   if (!concurrent_marking_in_progress()) {
     // nothing to do if marking is not in progress
     return;
@@ -3198,6 +3311,8 @@
 }
 
 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
+  guarantee(false, "registerCSetRegion(): don't call this any more");
+
   if (!concurrent_marking_in_progress()) return;
 
   HeapWord* region_end = hr->end();
@@ -3209,6 +3324,9 @@
 // Resets the region fields of active CMTasks whose values point
 // into the collection set.
 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
+  guarantee(false, "reset_active_task_region_fields_in_cset(): "
+                   "don't call this any more");
+
   assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
   assert(parallel_marking_threads() <= _max_task_num, "sanity");
 
@@ -3919,6 +4037,10 @@
 }
 
 void CMTask::drain_region_stack(BitMapClosure* bc) {
+  assert(_cm->region_stack_empty(), "region stack should be empty");
+  assert(_aborted_region.is_empty(), "aborted region should be empty");
+  return;
+
   if (has_aborted()) return;
 
   assert(_region_finger == NULL,
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/gc_implementation/g1/concurrentMark.hpp
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -166,10 +166,10 @@
 // Ideally this should be GrowableArray<> just like MSC's marking stack(s).
 class CMMarkStack VALUE_OBJ_CLASS_SPEC {
   ConcurrentMark* _cm;
-  oop*   _base;      // bottom of stack
-  jint   _index;     // one more than last occupied index
-  jint   _capacity;  // max #elements
-  jint   _oops_do_bound;  // Number of elements to include in next iteration.
+  oop*   _base;        // bottom of stack
+  jint   _index;       // one more than last occupied index
+  jint   _capacity;    // max #elements
+  jint   _saved_index; // value of _index saved at start of GC
   NOT_PRODUCT(jint _max_depth;)  // max depth plumbed during run
 
   bool   _overflow;
@@ -247,16 +247,12 @@
 
   void setEmpty()   { _index = 0; clear_overflow(); }
 
-  // Record the current size; a subsequent "oops_do" will iterate only over
-  // indices valid at the time of this call.
-  void set_oops_do_bound(jint bound = -1) {
-    if (bound == -1) {
-      _oops_do_bound = _index;
-    } else {
-      _oops_do_bound = bound;
-    }
-  }
-  jint oops_do_bound() { return _oops_do_bound; }
+  // Record the current index.
+  void note_start_of_gc();
+
+  // Make sure that we have not added any entries to the stack during GC.
+  void note_end_of_gc();
+
   // iterate over the oops in the mark stack, up to the bound recorded via
   // the call above.
   void oops_do(OopClosure* f);
@@ -724,10 +720,9 @@
   // G1CollectedHeap
 
   // This notifies CM that a root during initial-mark needs to be
-  // grayed and it's MT-safe. Currently, we just mark it. But, in the
-  // future, we can experiment with pushing it on the stack and we can
-  // do this without changing G1CollectedHeap.
-  void grayRoot(oop p);
+  // grayed. It is MT-safe.
+  inline void grayRoot(oop obj, size_t word_size);
+
   // It's used during evacuation pauses to gray a region, if
   // necessary, and it's MT-safe. It assumes that the caller has
   // marked any objects on that region. If _should_gray_objects is
@@ -735,6 +730,7 @@
   // pushed on the region stack, if it is located below the global
   // finger, otherwise we do nothing.
   void grayRegionIfNecessary(MemRegion mr);
+
   // It's used during evacuation pauses to mark and, if necessary,
   // gray a single object and it's MT-safe. It assumes the caller did
   // not mark the object. If _should_gray_objects is true and we're
@@ -791,24 +787,40 @@
 
   // Mark in the previous bitmap.  NB: this is usually read-only, so use
   // this carefully!
-  void markPrev(oop p);
+  inline void markPrev(oop p);
+  inline void markNext(oop p);
   void clear(oop p);
-  // Clears marks for all objects in the given range, for both prev and
-  // next bitmaps.  NB: the previous bitmap is usually read-only, so use
-  // this carefully!
-  void clearRangeBothMaps(MemRegion mr);
+  // Clears marks for all objects in the given range, for the prev,
+  // next, or both bitmaps.  NB: the previous bitmap is usually
+  // read-only, so use this carefully!
+  void clearRangePrevBitmap(MemRegion mr);
+  void clearRangeNextBitmap(MemRegion mr);
+  void clearRangeBothBitmaps(MemRegion mr);
 
-  // Record the current top of the mark and region stacks; a
-  // subsequent oops_do() on the mark stack and
-  // invalidate_entries_into_cset() on the region stack will iterate
-  // only over indices valid at the time of this call.
-  void set_oops_do_bound() {
-    _markStack.set_oops_do_bound();
-    _regionStack.set_oops_do_bound();
+  // Notify data structures that a GC has started.
+  void note_start_of_gc() {
+    _markStack.note_start_of_gc();
   }
+
+  // Notify data structures that a GC is finished.
+  void note_end_of_gc() {
+    _markStack.note_end_of_gc();
+  }
+
   // Iterate over the oops in the mark stack and all local queues. It
   // also calls invalidate_entries_into_cset() on the region stack.
   void oops_do(OopClosure* f);
+
+  // Verify that there are no CSet oops on the stacks (taskqueues /
+  // global mark stack), enqueued SATB buffers, per-thread SATB
+  // buffers, and fingers (global / per-task). The boolean parameters
+  // decide which of the above data structures to verify. If marking
+  // is not in progress, it's a no-op.
+  void verify_no_cset_oops(bool verify_stacks,
+                           bool verify_enqueued_buffers,
+                           bool verify_thread_buffers,
+                           bool verify_fingers) PRODUCT_RETURN;
+
   // It is called at the end of an evacuation pause during marking so
   // that CM is notified of where the new end of the heap is. It
   // doesn't do anything if concurrent_marking_in_progress() is false,
@@ -1166,6 +1178,7 @@
   // It keeps picking SATB buffers and processing them until no SATB
   // buffers are available.
   void drain_satb_buffers();
+
   // It keeps popping regions from the region stack and processing
   // them until the region stack is empty.
   void drain_region_stack(BitMapClosure* closure);
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp
--- a/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -153,4 +153,46 @@
   }
 }
 
+inline void ConcurrentMark::markPrev(oop p) {
+  assert(!_prevMarkBitMap->isMarked((HeapWord*) p), "sanity");
+  // Note we are overriding the read-only view of the prev map here, via
+  // the cast.
+  ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*) p);
+}
+
+inline void ConcurrentMark::markNext(oop p) {
+  assert(!_nextMarkBitMap->isMarked((HeapWord*) p), "sanity");
+  _nextMarkBitMap->mark((HeapWord*) p);
+}
+
+inline void ConcurrentMark::grayRoot(oop obj, size_t word_size) {
+  HeapWord* addr = (HeapWord*) obj;
+
+  // Currently we don't do anything with word_size but we will use it
+  // in the very near future in the liveness calculation piggy-backing
+  // changes.
+
+#ifdef ASSERT
+  HeapRegion* hr = _g1h->heap_region_containing(addr);
+  assert(hr != NULL, "sanity");
+  assert(!hr->is_survivor(), "should not allocate survivors during IM");
+  assert(addr < hr->next_top_at_mark_start(),
+         err_msg("addr: "PTR_FORMAT" hr: "HR_FORMAT" NTAMS: "PTR_FORMAT,
+                 addr, HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start()));
+  // We cannot assert that word_size == obj->size() given that obj
+  // might not be in a consistent state (another thread might be in
+  // the process of copying it). So the best thing we can do is to
+  // assert that word_size is under an upper bound which is its
+  // containing region's capacity.
+  assert(word_size * HeapWordSize <= hr->capacity(),
+         err_msg("size: "SIZE_FORMAT" capacity: "SIZE_FORMAT" "HR_FORMAT,
+                 word_size * HeapWordSize, hr->capacity(),
+                 HR_FORMAT_PARAMS(hr)));
+#endif // ASSERT
+
+  if (!_nextMarkBitMap->isMarked(addr)) {
+    _nextMarkBitMap->parMark(addr);
+  }
+}
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,9 +32,11 @@
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
+#include "gc_implementation/g1/g1EvacFailure.hpp"
 #include "gc_implementation/g1/g1MarkSweep.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.inline.hpp"
+#include "gc_implementation/g1/heapRegion.inline.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "gc_implementation/g1/vm_operations_g1.hpp"
@@ -591,17 +593,29 @@
     }
     res = new_region_try_secondary_free_list();
   }
-  if (res == NULL && do_expand) {
+  if (res == NULL && do_expand && _expand_heap_after_alloc_failure) {
+    // Currently, only attempts to allocate GC alloc regions set
+    // do_expand to true. So, we should only reach here during a
+    // safepoint. If this assumption changes we might have to
+    // reconsider the use of _expand_heap_after_alloc_failure.
+    assert(SafepointSynchronize::is_at_safepoint(), "invariant");
+
     ergo_verbose1(ErgoHeapSizing,
                   "attempt heap expansion",
                   ergo_format_reason("region allocation request failed")
                   ergo_format_byte("allocation request"),
                   word_size * HeapWordSize);
     if (expand(word_size * HeapWordSize)) {
-      // Even though the heap was expanded, it might not have reached
-      // the desired size. So, we cannot assume that the allocation
-      // will succeed.
+      // Given that expand() succeeded in expanding the heap, and we
+      // always expand the heap by an amount aligned to the heap
+      // region size, the free list should in theory not be empty. So
+      // it would probably be OK to use remove_head(). But the extra
+      // check for NULL is unlikely to be a performance issue here (we
+      // just expanded the heap!) so let's just be conservative and
+      // use remove_head_or_null().
       res = _free_list.remove_head_or_null();
+    } else {
+      _expand_heap_after_alloc_failure = false;
     }
   }
   return res;
@@ -1838,6 +1852,7 @@
   _young_list(new YoungList(this)),
   _gc_time_stamp(0),
   _retained_old_gc_alloc_region(NULL),
+  _expand_heap_after_alloc_failure(true),
   _surviving_young_words(NULL),
   _full_collections_completed(0),
   _in_cset_fast_test(NULL),
@@ -2605,12 +2620,16 @@
   }
 };
 
-void
-G1CollectedHeap::reset_heap_region_claim_values() {
+void G1CollectedHeap::reset_heap_region_claim_values() {
   ResetClaimValuesClosure blk;
   heap_region_iterate(&blk);
 }
 
+void G1CollectedHeap::reset_cset_heap_region_claim_values() {
+  ResetClaimValuesClosure blk;
+  collection_set_iterate(&blk);
+}
+
 #ifdef ASSERT
 // This checks whether all regions in the heap have the correct claim
 // value. I also piggy-backed on this a check to ensure that the
@@ -3000,14 +3019,20 @@
       } else {
         VerifyObjsInRegionClosure not_dead_yet_cl(r, _vo);
         r->object_iterate(&not_dead_yet_cl);
-        if (r->max_live_bytes() < not_dead_yet_cl.live_bytes()) {
-          gclog_or_tty->print_cr("["PTR_FORMAT","PTR_FORMAT"] "
-                                 "max_live_bytes "SIZE_FORMAT" "
-                                 "< calculated "SIZE_FORMAT,
-                                 r->bottom(), r->end(),
-                                 r->max_live_bytes(),
+        if (_vo != VerifyOption_G1UseNextMarking) {
+          if (r->max_live_bytes() < not_dead_yet_cl.live_bytes()) {
+            gclog_or_tty->print_cr("["PTR_FORMAT","PTR_FORMAT"] "
+                                   "max_live_bytes "SIZE_FORMAT" "
+                                   "< calculated "SIZE_FORMAT,
+                                   r->bottom(), r->end(),
+                                   r->max_live_bytes(),
                                  not_dead_yet_cl.live_bytes());
-          _failures = true;
+            _failures = true;
+          }
+        } else {
+          // When vo == UseNextMarking we cannot currently do a sanity
+          // check on the live bytes as the calculation has not been
+          // finalized yet.
         }
       }
     }
@@ -3641,25 +3666,6 @@
         }
         perm_gen()->save_marks();
 
-        // We must do this before any possible evacuation that should propagate
-        // marks.
-        if (mark_in_progress()) {
-          double start_time_sec = os::elapsedTime();
-
-          _cm->drainAllSATBBuffers();
-          double finish_mark_ms = (os::elapsedTime() - start_time_sec) * 1000.0;
-          g1_policy()->record_satb_drain_time(finish_mark_ms);
-        }
-        // Record the number of elements currently on the mark stack, so we
-        // only iterate over these.  (Since evacuation may add to the mark
-        // stack, doing more exposes race conditions.)  If no mark is in
-        // progress, this will be zero.
-        _cm->set_oops_do_bound();
-
-        if (mark_in_progress()) {
-          concurrent_mark()->newCSet();
-        }
-
 #if YOUNG_LIST_VERBOSE
         gclog_or_tty->print_cr("\nBefore choosing collection set.\nYoung_list:");
         _young_list->print();
@@ -3668,6 +3674,16 @@
 
         g1_policy()->choose_collection_set(target_pause_time_ms);
 
+        _cm->note_start_of_gc();
+        // We should not verify the per-thread SATB buffers given that
+        // we have not filtered them yet (we'll do so during the
+        // GC). We also call this after choose_collection_set() to
+        // ensure that the CSet has been finalized.
+        _cm->verify_no_cset_oops(true  /* verify_stacks */,
+                                 true  /* verify_enqueued_buffers */,
+                                 false /* verify_thread_buffers */,
+                                 true  /* verify_fingers */);
+
         if (_hr_printer.is_active()) {
           HeapRegion* hr = g1_policy()->collection_set();
           while (hr != NULL) {
@@ -3684,16 +3700,6 @@
           }
         }
 
-        // We have chosen the complete collection set. If marking is
-        // active then, we clear the region fields of any of the
-        // concurrent marking tasks whose region fields point into
-        // the collection set as these values will become stale. This
-        // will cause the owning marking threads to claim a new region
-        // when marking restarts.
-        if (mark_in_progress()) {
-          concurrent_mark()->reset_active_task_region_fields_in_cset();
-        }
-
 #ifdef ASSERT
         VerifyCSetClosure cl;
         collection_set_iterate(&cl);
@@ -3707,6 +3713,16 @@
         // Actually do the work...
         evacuate_collection_set();
 
+        // We do this to mainly verify the per-thread SATB buffers
+        // (which have been filtered by now) since we didn't verify
+        // them earlier. No point in re-checking the stacks / enqueued
+        // buffers given that the CSet has not changed since last time
+        // we checked.
+        _cm->verify_no_cset_oops(false /* verify_stacks */,
+                                 false /* verify_enqueued_buffers */,
+                                 true  /* verify_thread_buffers */,
+                                 true  /* verify_fingers */);
+
         free_collection_set(g1_policy()->collection_set());
         g1_policy()->clear_collection_set();
 
@@ -3775,6 +3791,8 @@
           size_t expand_bytes = g1_policy()->expansion_amount();
           if (expand_bytes > 0) {
             size_t bytes_before = capacity();
+            // No need for an ergo verbose message here,
+            // expansion_amount() does this when it returns a value > 0.
             if (!expand(expand_bytes)) {
               // We failed to expand the heap so let's verify that
               // committed/uncommitted amount match the backing store
@@ -3784,6 +3802,14 @@
           }
         }
 
+        // We redo the verificaiton but now wrt to the new CSet which
+        // has just got initialized after the previous CSet was freed.
+        _cm->verify_no_cset_oops(true  /* verify_stacks */,
+                                 true  /* verify_enqueued_buffers */,
+                                 true  /* verify_thread_buffers */,
+                                 true  /* verify_fingers */);
+        _cm->note_end_of_gc();
+
         double end_time_sec = os::elapsedTime();
         double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
         g1_policy()->record_pause_time_ms(pause_time_ms);
@@ -3831,21 +3857,6 @@
         // CM reference discovery will be re-enabled if necessary.
       }
 
-      {
-        size_t expand_bytes = g1_policy()->expansion_amount();
-        if (expand_bytes > 0) {
-          size_t bytes_before = capacity();
-          // No need for an ergo verbose message here,
-          // expansion_amount() does this when it returns a value > 0.
-          if (!expand(expand_bytes)) {
-            // We failed to expand the heap so let's verify that
-            // committed/uncommitted amount match the backing store
-            assert(capacity() == _g1_storage.committed_size(), "committed size mismatch");
-            assert(max_capacity() == _g1_storage.reserved_size(), "reserved size mismatch");
-          }
-        }
-      }
-
       // We should do this after we potentially expand the heap so
       // that all the COMMIT events are generated before the end GC
       // event, and after we retire the GC alloc regions so that all
@@ -3949,6 +3960,8 @@
     // we allocate to in the region sets. We'll re-add it later, when
     // it's retired again.
     _old_set.remove(retained_region);
+    bool during_im = g1_policy()->during_initial_mark_pause();
+    retained_region->note_start_of_copying(during_im);
     _old_gc_alloc_region.set(retained_region);
     _hr_printer.reuse(retained_region);
   }
@@ -3985,157 +3998,26 @@
   _evac_failure_scan_stack = NULL;
 }
 
-class UpdateRSetDeferred : public OopsInHeapRegionClosure {
-private:
-  G1CollectedHeap* _g1;
-  DirtyCardQueue *_dcq;
-  CardTableModRefBS* _ct_bs;
-
-public:
-  UpdateRSetDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
-    _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) {}
-
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(      oop* p) { do_oop_work(p); }
-  template <class T> void do_oop_work(T* p) {
-    assert(_from->is_in_reserved(p), "paranoia");
-    if (!_from->is_in_reserved(oopDesc::load_decode_heap_oop(p)) &&
-        !_from->is_survivor()) {
-      size_t card_index = _ct_bs->index_for(p);
-      if (_ct_bs->mark_card_deferred(card_index)) {
-        _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index));
-      }
-    }
-  }
-};
-
-class RemoveSelfPointerClosure: public ObjectClosure {
-private:
-  G1CollectedHeap* _g1;
-  ConcurrentMark* _cm;
-  HeapRegion* _hr;
-  size_t _prev_marked_bytes;
-  size_t _next_marked_bytes;
-  OopsInHeapRegionClosure *_cl;
-public:
-  RemoveSelfPointerClosure(G1CollectedHeap* g1, HeapRegion* hr,
-                           OopsInHeapRegionClosure* cl) :
-    _g1(g1), _hr(hr), _cm(_g1->concurrent_mark()),  _prev_marked_bytes(0),
-    _next_marked_bytes(0), _cl(cl) {}
-
-  size_t prev_marked_bytes() { return _prev_marked_bytes; }
-  size_t next_marked_bytes() { return _next_marked_bytes; }
-
-  // <original comment>
-  // The original idea here was to coalesce evacuated and dead objects.
-  // However that caused complications with the block offset table (BOT).
-  // In particular if there were two TLABs, one of them partially refined.
-  // |----- TLAB_1--------|----TLAB_2-~~~(partially refined part)~~~|
-  // The BOT entries of the unrefined part of TLAB_2 point to the start
-  // of TLAB_2. If the last object of the TLAB_1 and the first object
-  // of TLAB_2 are coalesced, then the cards of the unrefined part
-  // would point into middle of the filler object.
-  // The current approach is to not coalesce and leave the BOT contents intact.
-  // </original comment>
-  //
-  // We now reset the BOT when we start the object iteration over the
-  // region and refine its entries for every object we come across. So
-  // the above comment is not really relevant and we should be able
-  // to coalesce dead objects if we want to.
-  void do_object(oop obj) {
-    HeapWord* obj_addr = (HeapWord*) obj;
-    assert(_hr->is_in(obj_addr), "sanity");
-    size_t obj_size = obj->size();
-    _hr->update_bot_for_object(obj_addr, obj_size);
-    if (obj->is_forwarded() && obj->forwardee() == obj) {
-      // The object failed to move.
-      assert(!_g1->is_obj_dead(obj), "We should not be preserving dead objs.");
-      _cm->markPrev(obj);
-      assert(_cm->isPrevMarked(obj), "Should be marked!");
-      _prev_marked_bytes += (obj_size * HeapWordSize);
-      if (_g1->mark_in_progress() && !_g1->is_obj_ill(obj)) {
-        _cm->markAndGrayObjectIfNecessary(obj);
-      }
-      obj->set_mark(markOopDesc::prototype());
-      // While we were processing RSet buffers during the
-      // collection, we actually didn't scan any cards on the
-      // collection set, since we didn't want to update remebered
-      // sets with entries that point into the collection set, given
-      // that live objects fromthe collection set are about to move
-      // and such entries will be stale very soon. This change also
-      // dealt with a reliability issue which involved scanning a
-      // card in the collection set and coming across an array that
-      // was being chunked and looking malformed. The problem is
-      // that, if evacuation fails, we might have remembered set
-      // entries missing given that we skipped cards on the
-      // collection set. So, we'll recreate such entries now.
-      obj->oop_iterate(_cl);
-      assert(_cm->isPrevMarked(obj), "Should be marked!");
-    } else {
-      // The object has been either evacuated or is dead. Fill it with a
-      // dummy object.
-      MemRegion mr((HeapWord*)obj, obj_size);
-      CollectedHeap::fill_with_object(mr);
-      _cm->clearRangeBothMaps(mr);
-    }
-  }
-};
-
 void G1CollectedHeap::remove_self_forwarding_pointers() {
-  UpdateRSetImmediate immediate_update(_g1h->g1_rem_set());
-  DirtyCardQueue dcq(&_g1h->dirty_card_queue_set());
-  UpdateRSetDeferred deferred_update(_g1h, &dcq);
-  OopsInHeapRegionClosure *cl;
-  if (G1DeferredRSUpdate) {
-    cl = &deferred_update;
+  assert(check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
+  assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
+
+  G1ParRemoveSelfForwardPtrsTask rsfp_task(this);
+
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    set_par_threads();
+    workers()->run_task(&rsfp_task);
+    set_par_threads(0);
   } else {
-    cl = &immediate_update;
-  }
-  HeapRegion* cur = g1_policy()->collection_set();
-  while (cur != NULL) {
-    assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
-    assert(!cur->isHumongous(), "sanity");
-
-    if (cur->evacuation_failed()) {
-      assert(cur->in_collection_set(), "bad CS");
-      RemoveSelfPointerClosure rspc(_g1h, cur, cl);
-
-      // In the common case we make sure that this is done when the
-      // region is freed so that it is "ready-to-go" when it's
-      // re-allocated. However, when evacuation failure happens, a
-      // region will remain in the heap and might ultimately be added
-      // to a CSet in the future. So we have to be careful here and
-      // make sure the region's RSet is ready for parallel iteration
-      // whenever this might be required in the future.
-      cur->rem_set()->reset_for_par_iteration();
-      cur->reset_bot();
-      cl->set_region(cur);
-      cur->object_iterate(&rspc);
-
-      // A number of manipulations to make the TAMS be the current top,
-      // and the marked bytes be the ones observed in the iteration.
-      if (_g1h->concurrent_mark()->at_least_one_mark_complete()) {
-        // The comments below are the postconditions achieved by the
-        // calls.  Note especially the last such condition, which says that
-        // the count of marked bytes has been properly restored.
-        cur->note_start_of_marking(false);
-        // _next_top_at_mark_start == top, _next_marked_bytes == 0
-        cur->add_to_marked_bytes(rspc.prev_marked_bytes());
-        // _next_marked_bytes == prev_marked_bytes.
-        cur->note_end_of_marking();
-        // _prev_top_at_mark_start == top(),
-        // _prev_marked_bytes == prev_marked_bytes
-      }
-      // If there is no mark in progress, we modified the _next variables
-      // above needlessly, but harmlessly.
-      if (_g1h->mark_in_progress()) {
-        cur->note_start_of_marking(false);
-        // _next_top_at_mark_start == top, _next_marked_bytes == 0
-        // _next_marked_bytes == next_marked_bytes.
-      }
-    }
-    cur = cur->next_in_collection_set();
-  }
+    rsfp_task.work(0);
+  }
+
+  assert(check_cset_heap_region_claim_values(HeapRegion::ParEvacFailureClaimValue), "sanity");
+
+  // Reset the claim values in the regions in the collection set.
+  reset_cset_heap_region_claim_values();
+
+  assert(check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
   assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
 
   // Now restore saved marks, if any.
@@ -4148,6 +4030,7 @@
       markOop m = _preserved_marks_of_objs->at(i);
       obj->set_mark(m);
     }
+
     // Delete the preserved marks growable arrays (allocated on the C heap).
     delete _objs_with_preserved_marks;
     delete _preserved_marks_of_objs;
@@ -4172,8 +4055,7 @@
 
 oop
 G1CollectedHeap::handle_evacuation_failure_par(OopsInHeapRegionClosure* cl,
-                                               oop old,
-                                               bool should_mark_root) {
+                                               oop old) {
   assert(obj_in_cs(old),
          err_msg("obj: "PTR_FORMAT" should still be in the CSet",
                  (HeapWord*) old));
@@ -4182,15 +4064,6 @@
   if (forward_ptr == NULL) {
     // Forward-to-self succeeded.
 
-    // should_mark_root will be true when this routine is called
-    // from a root scanning closure during an initial mark pause.
-    // In this case the thread that succeeds in self-forwarding the
-    // object is also responsible for marking the object.
-    if (should_mark_root) {
-      assert(!oopDesc::is_null(old), "shouldn't be");
-      _cm->grayRoot(old);
-    }
-
     if (_evac_failure_closure != cl) {
       MutexLockerEx x(EvacFailureStack_lock, Mutex::_no_safepoint_check_flag);
       assert(!_drain_in_progress,
@@ -4286,30 +4159,8 @@
   return NULL;
 }
 
-#ifndef PRODUCT
-bool GCLabBitMapClosure::do_bit(size_t offset) {
-  HeapWord* addr = _bitmap->offsetToHeapWord(offset);
-  guarantee(_cm->isMarked(oop(addr)), "it should be!");
-  return true;
-}
-#endif // PRODUCT
-
 G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size) :
-  ParGCAllocBuffer(gclab_word_size),
-  _should_mark_objects(false),
-  _bitmap(G1CollectedHeap::heap()->reserved_region().start(), gclab_word_size),
-  _retired(false)
-{
-  //_should_mark_objects is set to true when G1ParCopyHelper needs to
-  // mark the forwarded location of an evacuated object.
-  // We set _should_mark_objects to true if marking is active, i.e. when we
-  // need to propagate a mark, or during an initial mark pause, i.e. when we
-  // need to mark objects immediately reachable by the roots.
-  if (G1CollectedHeap::heap()->mark_in_progress() ||
-      G1CollectedHeap::heap()->g1_policy()->during_initial_mark_pause()) {
-    _should_mark_objects = true;
-  }
-}
+  ParGCAllocBuffer(gclab_word_size), _retired(false) { }
 
 G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num)
   : _g1h(g1h),
@@ -4323,8 +4174,7 @@
     _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured)),
     _age_table(false),
     _strong_roots_time(0), _term_time(0),
-    _alloc_buffer_waste(0), _undo_waste(0)
-{
+    _alloc_buffer_waste(0), _undo_waste(0) {
   // we allocate G1YoungSurvRateNumRegions plus one entries, since
   // we "sacrifice" entry 0 to keep track of surviving bytes for
   // non-young regions (where the age is -1)
@@ -4429,35 +4279,53 @@
   } while (!refs()->is_empty());
 }
 
-G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1,
+                                     G1ParScanThreadState* par_scan_state) :
   _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
   _par_scan_state(par_scan_state),
   _during_initial_mark(_g1->g1_policy()->during_initial_mark_pause()),
   _mark_in_progress(_g1->mark_in_progress()) { }
 
-template <class T> void G1ParCopyHelper::mark_object(T* p) {
-  // This is called from do_oop_work for objects that are not
-  // in the collection set. Objects in the collection set
-  // are marked after they have been evacuated.
-
-  T heap_oop = oopDesc::load_heap_oop(p);
-  if (!oopDesc::is_null(heap_oop)) {
-    oop obj = oopDesc::decode_heap_oop(heap_oop);
-    HeapWord* addr = (HeapWord*)obj;
-    if (_g1->is_in_g1_reserved(addr)) {
-      _cm->grayRoot(oop(addr));
-    }
-  }
-}
-
-oop G1ParCopyHelper::copy_to_survivor_space(oop old, bool should_mark_root,
-                                                     bool should_mark_copy) {
+void G1ParCopyHelper::mark_object(oop obj) {
+#ifdef ASSERT
+  HeapRegion* hr = _g1->heap_region_containing(obj);
+  assert(hr != NULL, "sanity");
+  assert(!hr->in_collection_set(), "should not mark objects in the CSet");
+#endif // ASSERT
+
+  // We know that the object is not moving so it's safe to read its size.
+  _cm->grayRoot(obj, (size_t) obj->size());
+}
+
+void G1ParCopyHelper::mark_forwarded_object(oop from_obj, oop to_obj) {
+#ifdef ASSERT
+  assert(from_obj->is_forwarded(), "from obj should be forwarded");
+  assert(from_obj->forwardee() == to_obj, "to obj should be the forwardee");
+  assert(from_obj != to_obj, "should not be self-forwarded");
+
+  HeapRegion* from_hr = _g1->heap_region_containing(from_obj);
+  assert(from_hr != NULL, "sanity");
+  assert(from_hr->in_collection_set(), "from obj should be in the CSet");
+
+  HeapRegion* to_hr = _g1->heap_region_containing(to_obj);
+  assert(to_hr != NULL, "sanity");
+  assert(!to_hr->in_collection_set(), "should not mark objects in the CSet");
+#endif // ASSERT
+
+  // The object might be in the process of being copied by another
+  // worker so we cannot trust that its to-space image is
+  // well-formed. So we have to read its size from its from-space
+  // image which we know should not be changing.
+  _cm->grayRoot(to_obj, (size_t) from_obj->size());
+}
+
+oop G1ParCopyHelper::copy_to_survivor_space(oop old) {
   size_t    word_sz = old->size();
   HeapRegion* from_region = _g1->heap_region_containing_raw(old);
   // +1 to make the -1 indexes valid...
   int       young_index = from_region->young_index_in_cset()+1;
-  assert( (from_region->is_young() && young_index > 0) ||
-          (!from_region->is_young() && young_index == 0), "invariant" );
+  assert( (from_region->is_young() && young_index >  0) ||
+         (!from_region->is_young() && young_index == 0), "invariant" );
   G1CollectorPolicy* g1p = _g1->g1_policy();
   markOop m = old->mark();
   int age = m->has_displaced_mark_helper() ? m->displaced_mark_helper()->age()
@@ -4471,7 +4339,7 @@
     // This will either forward-to-self, or detect that someone else has
     // installed a forwarding pointer.
     OopsInHeapRegionClosure* cl = _par_scan_state->evac_failure_closure();
-    return _g1->handle_evacuation_failure_par(cl, old, should_mark_root);
+    return _g1->handle_evacuation_failure_par(cl, old);
   }
 
   // We're going to allocate linearly, so might as well prefetch ahead.
@@ -4507,28 +4375,14 @@
       obj->set_mark(m);
     }
 
-    // Mark the evacuated object or propagate "next" mark bit
-    if (should_mark_copy) {
-      if (!use_local_bitmaps ||
-          !_par_scan_state->alloc_buffer(alloc_purpose)->mark(obj_ptr)) {
-        // if we couldn't mark it on the local bitmap (this happens when
-        // the object was not allocated in the GCLab), we have to bite
-        // the bullet and do the standard parallel mark
-        _cm->markAndGrayObjectIfNecessary(obj);
-      }
-
-      if (_g1->isMarkedNext(old)) {
-        // Unmark the object's old location so that marking
-        // doesn't think the old object is alive.
-        _cm->nextMarkBitMap()->parClear((HeapWord*)old);
-      }
-    }
-
     size_t* surv_young_words = _par_scan_state->surviving_young_words();
     surv_young_words[young_index] += word_sz;
 
     if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
-      arrayOop(old)->set_length(0);
+      // We keep track of the next start index in the length field of
+      // the to-space object. The actual length can be found in the
+      // length field of the from-space object.
+      arrayOop(obj)->set_length(0);
       oop* old_p = set_partial_array_mask(old);
       _par_scan_state->push_on_queue(old_p);
     } else {
@@ -4550,61 +4404,24 @@
 ::do_oop_work(T* p) {
   oop obj = oopDesc::load_decode_heap_oop(p);
   assert(barrier != G1BarrierRS || obj != NULL,
-         "Precondition: G1BarrierRS implies obj is nonNull");
-
-  // Marking:
-  // If the object is in the collection set, then the thread
-  // that copies the object should mark, or propagate the
-  // mark to, the evacuated object.
-  // If the object is not in the collection set then we
-  // should call the mark_object() method depending on the
-  // value of the template parameter do_mark_object (which will
-  // be true for root scanning closures during an initial mark
-  // pause).
-  // The mark_object() method first checks whether the object
-  // is marked and, if not, attempts to mark the object.
+         "Precondition: G1BarrierRS implies obj is non-NULL");
 
   // here the null check is implicit in the cset_fast_test() test
   if (_g1->in_cset_fast_test(obj)) {
+    oop forwardee;
     if (obj->is_forwarded()) {
-      oopDesc::encode_store_heap_oop(p, obj->forwardee());
-      // If we are a root scanning closure during an initial
-      // mark pause (i.e. do_mark_object will be true) then
-      // we also need to handle marking of roots in the
-      // event of an evacuation failure. In the event of an
-      // evacuation failure, the object is forwarded to itself
-      // and not copied. For root-scanning closures, the
-      // object would be marked after a successful self-forward
-      // but an object could be pointed to by both a root and non
-      // root location and be self-forwarded by a non-root-scanning
-      // closure. Therefore we also have to attempt to mark the
-      // self-forwarded root object here.
-      if (do_mark_object && obj->forwardee() == obj) {
-        mark_object(p);
-      }
+      forwardee = obj->forwardee();
     } else {
-      // During an initial mark pause, objects that are pointed to
-      // by the roots need to be marked - even in the event of an
-      // evacuation failure. We pass the template parameter
-      // do_mark_object (which is true for root scanning closures
-      // during an initial mark pause) to copy_to_survivor_space
-      // which will pass it on to the evacuation failure handling
-      // code. The thread that successfully self-forwards a root
-      // object to itself is responsible for marking the object.
-      bool should_mark_root = do_mark_object;
-
-      // We need to mark the copied object if we're a root scanning
-      // closure during an initial mark pause (i.e. do_mark_object
-      // will be true), or the object is already marked and we need
-      // to propagate the mark to the evacuated copy.
-      bool should_mark_copy = do_mark_object ||
-                              _during_initial_mark ||
-                              (_mark_in_progress && !_g1->is_obj_ill(obj));
-
-      oop copy_oop = copy_to_survivor_space(obj, should_mark_root,
-                                                 should_mark_copy);
-      oopDesc::encode_store_heap_oop(p, copy_oop);
+      forwardee = copy_to_survivor_space(obj);
     }
+    assert(forwardee != NULL, "forwardee should not be NULL");
+    oopDesc::encode_store_heap_oop(p, forwardee);
+    if (do_mark_object && forwardee != obj) {
+      // If the object is self-forwarded we don't need to explicitly
+      // mark it, the evacuation failure protocol will do so.
+      mark_forwarded_object(obj, forwardee);
+    }
+
     // When scanning the RS, we only care about objs in CS.
     if (barrier == G1BarrierRS) {
       _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
@@ -4613,8 +4430,8 @@
     // The object is not in collection set. If we're a root scanning
     // closure during an initial mark pause (i.e. do_mark_object will
     // be true) then attempt to mark the object.
-    if (do_mark_object) {
-      mark_object(p);
+    if (do_mark_object && _g1->is_in_g1_reserved(obj)) {
+      mark_object(obj);
     }
   }
 
@@ -4632,35 +4449,51 @@
 
 template <class T> void G1ParScanPartialArrayClosure::do_oop_nv(T* p) {
   assert(has_partial_array_mask(p), "invariant");
-  oop old = clear_partial_array_mask(p);
-  assert(old->is_objArray(), "must be obj array");
-  assert(old->is_forwarded(), "must be forwarded");
-  assert(Universe::heap()->is_in_reserved(old), "must be in heap.");
-
-  objArrayOop obj = objArrayOop(old->forwardee());
-  assert((void*)old != (void*)old->forwardee(), "self forwarding here?");
-  // Process ParGCArrayScanChunk elements now
-  // and push the remainder back onto queue
-  int start     = arrayOop(old)->length();
-  int end       = obj->length();
-  int remainder = end - start;
-  assert(start <= end, "just checking");
+  oop from_obj = clear_partial_array_mask(p);
+
+  assert(Universe::heap()->is_in_reserved(from_obj), "must be in heap.");
+  assert(from_obj->is_objArray(), "must be obj array");
+  objArrayOop from_obj_array = objArrayOop(from_obj);
+  // The from-space object contains the real length.
+  int length                 = from_obj_array->length();
+
+  assert(from_obj->is_forwarded(), "must be forwarded");
+  oop to_obj                 = from_obj->forwardee();
+  assert(from_obj != to_obj, "should not be chunking self-forwarded objects");
+  objArrayOop to_obj_array   = objArrayOop(to_obj);
+  // We keep track of the next start index in the length field of the
+  // to-space object.
+  int next_index             = to_obj_array->length();
+  assert(0 <= next_index && next_index < length,
+         err_msg("invariant, next index: %d, length: %d", next_index, length));
+
+  int start                  = next_index;
+  int end                    = length;
+  int remainder              = end - start;
+  // We'll try not to push a range that's smaller than ParGCArrayScanChunk.
   if (remainder > 2 * ParGCArrayScanChunk) {
-    // Test above combines last partial chunk with a full chunk
     end = start + ParGCArrayScanChunk;
-    arrayOop(old)->set_length(end);
-    // Push remainder.
-    oop* old_p = set_partial_array_mask(old);
-    assert(arrayOop(old)->length() < obj->length(), "Empty push?");
-    _par_scan_state->push_on_queue(old_p);
+    to_obj_array->set_length(end);
+    // Push the remainder before we process the range in case another
+    // worker has run out of things to do and can steal it.
+    oop* from_obj_p = set_partial_array_mask(from_obj);
+    _par_scan_state->push_on_queue(from_obj_p);
   } else {
-    // Restore length so that the heap remains parsable in
-    // case of evacuation failure.
-    arrayOop(old)->set_length(end);
-  }
-  _scanner.set_region(_g1->heap_region_containing_raw(obj));
-  // process our set of indices (include header in first chunk)
-  obj->oop_iterate_range(&_scanner, start, end);
+    assert(length == end, "sanity");
+    // We'll process the final range for this object. Restore the length
+    // so that the heap remains parsable in case of evacuation failure.
+    to_obj_array->set_length(end);
+  }
+  _scanner.set_region(_g1->heap_region_containing_raw(to_obj));
+  // Process indexes [start,end). It will also process the header
+  // along with the first chunk (i.e., the chunk with start == 0).
+  // Note that at this point the length field of to_obj_array is not
+  // correct given that we are using it to keep track of the next
+  // start index. oop_iterate_range() (thankfully!) ignores the length
+  // field and only relies on the start / end parameters.  It does
+  // however return the size of the object which will be incorrect. So
+  // we have to ignore it even if we wanted to use it.
+  to_obj_array->oop_iterate_range(&_scanner, start, end);
 }
 
 class G1ParEvacuateFollowersClosure : public VoidClosure {
@@ -4893,12 +4726,16 @@
 
   g1_policy()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
 
-  // Scan strong roots in mark stack.
-  if (!_process_strong_tasks->is_task_claimed(G1H_PS_mark_stack_oops_do)) {
-    concurrent_mark()->oops_do(scan_non_heap_roots);
-  }
-  double mark_stack_scan_ms = (os::elapsedTime() - ext_roots_end) * 1000.0;
-  g1_policy()->record_mark_stack_scan_time(worker_i, mark_stack_scan_ms);
+  // During conc marking we have to filter the per-thread SATB buffers
+  // to make sure we remove any oops into the CSet (which will show up
+  // as implicitly live).
+  if (!_process_strong_tasks->is_task_claimed(G1H_PS_filter_satb_buffers)) {
+    if (mark_in_progress()) {
+      JavaThread::satb_mark_queue_set().filter_thread_buffers();
+    }
+  }
+  double satb_filtering_ms = (os::elapsedTime() - ext_roots_end) * 1000.0;
+  g1_policy()->record_satb_filtering_time(worker_i, satb_filtering_ms);
 
   // Now scan the complement of the collection set.
   if (scan_rs != NULL) {
@@ -5439,6 +5276,7 @@
 }
 
 void G1CollectedHeap::evacuate_collection_set() {
+  _expand_heap_after_alloc_failure = true;
   set_evacuation_failed(false);
 
   g1_rem_set()->prepare_for_oops_into_collection_set_do();
@@ -5516,13 +5354,6 @@
 
   finalize_for_evac_failure();
 
-  // Must do this before clearing the per-region evac-failure flags
-  // (which is currently done when we free the collection set).
-  // We also only do this if marking is actually in progress and so
-  // have to do this before we set the mark_in_progress flag at the
-  // end of an initial mark pause.
-  concurrent_mark()->complete_marking_in_collection_set();
-
   if (evacuation_failed()) {
     remove_self_forwarding_pointers();
     if (PrintGCDetails) {
@@ -6179,6 +6010,8 @@
       } else {
         _hr_printer.alloc(new_alloc_region, G1HRPrinter::Old);
       }
+      bool during_im = g1_policy()->during_initial_mark_pause();
+      new_alloc_region->note_start_of_copying(during_im);
       return new_alloc_region;
     } else {
       g1_policy()->note_alloc_region_limit_reached(ap);
@@ -6190,7 +6023,8 @@
 void G1CollectedHeap::retire_gc_alloc_region(HeapRegion* alloc_region,
                                              size_t allocated_bytes,
                                              GCAllocPurpose ap) {
-  alloc_region->note_end_of_copying();
+  bool during_im = g1_policy()->during_initial_mark_pause();
+  alloc_region->note_end_of_copying(during_im);
   g1_policy()->record_bytes_copied_during_gc(allocated_bytes);
   if (ap == GCAllocForSurvived) {
     young_list()->add_survivor_region(alloc_region);
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -285,6 +285,14 @@
   // Typically, it is not full so we should re-use it during the next GC.
   HeapRegion* _retained_old_gc_alloc_region;
 
+  // It specifies whether we should attempt to expand the heap after a
+  // region allocation failure. If heap expansion fails we set this to
+  // false so that we don't re-attempt the heap expansion (it's likely
+  // that subsequent expansion attempts will also fail if one fails).
+  // Currently, it is only consulted during GC and it's reset at the
+  // start of each GC.
+  bool _expand_heap_after_alloc_failure;
+
   // It resets the mutator alloc region before new allocations can take place.
   void init_mutator_alloc_region();
 
@@ -861,8 +869,7 @@
   void finalize_for_evac_failure();
 
   // An attempt to evacuate "obj" has failed; take necessary steps.
-  oop handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, oop obj,
-                                    bool should_mark_root);
+  oop handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, oop obj);
   void handle_evacuation_failure_common(oop obj, markOop m);
 
   // ("Weak") Reference processing support.
@@ -954,7 +961,7 @@
   unsigned int* _worker_cset_start_region_time_stamp;
 
   enum G1H_process_strong_roots_tasks {
-    G1H_PS_mark_stack_oops_do,
+    G1H_PS_filter_satb_buffers,
     G1H_PS_refProcessor_oops_do,
     // Leave this one last.
     G1H_PS_NumElements
@@ -1305,6 +1312,10 @@
   // It resets all the region claim values to the default.
   void reset_heap_region_claim_values();
 
+  // Resets the claim values of regions in the current
+  // collection set to the default.
+  void reset_cset_heap_region_claim_values();
+
 #ifdef ASSERT
   bool check_heap_region_claim_values(jint claim_value);
 
@@ -1740,10 +1751,8 @@
       _gclab_word_size(gclab_word_size),
       _real_start_word(NULL),
       _real_end_word(NULL),
-      _start_word(NULL)
-  {
-    guarantee( size_in_words() >= bitmap_size_in_words(),
-               "just making sure");
+      _start_word(NULL) {
+    guarantee(false, "GCLabBitMap::GCLabBitmap(): don't call this any more");
   }
 
   inline unsigned heapWordToOffset(HeapWord* addr) {
@@ -1797,6 +1806,8 @@
   }
 
   void set_buffer(HeapWord* start) {
+    guarantee(false, "set_buffer(): don't call this any more");
+
     guarantee(use_local_bitmaps, "invariant");
     clear();
 
@@ -1820,6 +1831,8 @@
 #endif // PRODUCT
 
   void retire() {
+    guarantee(false, "retire(): don't call this any more");
+
     guarantee(use_local_bitmaps, "invariant");
     assert(fields_well_formed(), "invariant");
 
@@ -1853,32 +1866,18 @@
 class G1ParGCAllocBuffer: public ParGCAllocBuffer {
 private:
   bool        _retired;
-  bool        _should_mark_objects;
-  GCLabBitMap _bitmap;
 
 public:
   G1ParGCAllocBuffer(size_t gclab_word_size);
 
-  inline bool mark(HeapWord* addr) {
-    guarantee(use_local_bitmaps, "invariant");
-    assert(_should_mark_objects, "invariant");
-    return _bitmap.mark(addr);
-  }
-
-  inline void set_buf(HeapWord* buf) {
-    if (use_local_bitmaps && _should_mark_objects) {
-      _bitmap.set_buffer(buf);
-    }
+  void set_buf(HeapWord* buf) {
     ParGCAllocBuffer::set_buf(buf);
     _retired = false;
   }
 
-  inline void retire(bool end_of_gc, bool retain) {
+  void retire(bool end_of_gc, bool retain) {
     if (_retired)
       return;
-    if (use_local_bitmaps && _should_mark_objects) {
-      _bitmap.retire();
-    }
     ParGCAllocBuffer::retire(end_of_gc, retain);
     _retired = true;
   }
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -281,7 +281,7 @@
 
   _par_last_gc_worker_start_times_ms = new double[_parallel_gc_threads];
   _par_last_ext_root_scan_times_ms = new double[_parallel_gc_threads];
-  _par_last_mark_stack_scan_times_ms = new double[_parallel_gc_threads];
+  _par_last_satb_filtering_times_ms = new double[_parallel_gc_threads];
 
   _par_last_update_rs_times_ms = new double[_parallel_gc_threads];
   _par_last_update_rs_processed_buffers = new double[_parallel_gc_threads];
@@ -905,10 +905,19 @@
     gclog_or_tty->print(" (%s)", gcs_are_young() ? "young" : "mixed");
   }
 
-  // We only need to do this here as the policy will only be applied
-  // to the GC we're about to start. so, no point is calculating this
-  // every time we calculate / recalculate the target young length.
-  update_survivors_policy();
+  if (!during_initial_mark_pause()) {
+    // We only need to do this here as the policy will only be applied
+    // to the GC we're about to start. so, no point is calculating this
+    // every time we calculate / recalculate the target young length.
+    update_survivors_policy();
+  } else {
+    // The marking phase has a "we only copy implicitly live
+    // objects during marking" invariant. The easiest way to ensure it
+    // holds is not to allocate any survivor regions and tenure all
+    // objects. In the future we might change this and handle survivor
+    // regions specially during marking.
+    tenure_all_objects();
+  }
 
   assert(_g1->used() == _g1->recalculate_used(),
          err_msg("sanity, used: "SIZE_FORMAT" recalculate_used: "SIZE_FORMAT,
@@ -939,7 +948,7 @@
   for (int i = 0; i < _parallel_gc_threads; ++i) {
     _par_last_gc_worker_start_times_ms[i] = -1234.0;
     _par_last_ext_root_scan_times_ms[i] = -1234.0;
-    _par_last_mark_stack_scan_times_ms[i] = -1234.0;
+    _par_last_satb_filtering_times_ms[i] = -1234.0;
     _par_last_update_rs_times_ms[i] = -1234.0;
     _par_last_update_rs_processed_buffers[i] = -1234.0;
     _par_last_scan_rs_times_ms[i] = -1234.0;
@@ -1227,7 +1236,7 @@
   // of the PrintGCDetails output, in the non-parallel case.
 
   double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms);
-  double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms);
+  double satb_filtering_time = avg_value(_par_last_satb_filtering_times_ms);
   double update_rs_time = avg_value(_par_last_update_rs_times_ms);
   double update_rs_processed_buffers =
     sum_of_values(_par_last_update_rs_processed_buffers);
@@ -1236,7 +1245,7 @@
   double termination_time = avg_value(_par_last_termination_times_ms);
 
   double known_time = ext_root_scan_time +
-                      mark_stack_scan_time +
+                      satb_filtering_time +
                       update_rs_time +
                       scan_rs_time +
                       obj_copy_time;
@@ -1282,7 +1291,7 @@
     body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms);
 
     body_summary->record_ext_root_scan_time_ms(ext_root_scan_time);
-    body_summary->record_mark_stack_scan_time_ms(mark_stack_scan_time);
+    body_summary->record_satb_filtering_time_ms(satb_filtering_time);
     body_summary->record_update_rs_time_ms(update_rs_time);
     body_summary->record_scan_rs_time_ms(scan_rs_time);
     body_summary->record_obj_copy_time_ms(obj_copy_time);
@@ -1376,16 +1385,12 @@
                            (last_pause_included_initial_mark) ? " (initial-mark)" : "",
                            elapsed_ms / 1000.0);
 
-    if (print_marking_info) {
-      print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms);
-    }
-
     if (parallel) {
       print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
       print_par_stats(2, "GC Worker Start", _par_last_gc_worker_start_times_ms);
       print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms);
       if (print_marking_info) {
-        print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms);
+        print_par_stats(2, "SATB Filtering", _par_last_satb_filtering_times_ms);
       }
       print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
       print_par_sizes(3, "Processed Buffers", _par_last_update_rs_processed_buffers);
@@ -1399,7 +1404,7 @@
         _par_last_gc_worker_times_ms[i] = _par_last_gc_worker_end_times_ms[i] - _par_last_gc_worker_start_times_ms[i];
 
         double worker_known_time = _par_last_ext_root_scan_times_ms[i] +
-                                   _par_last_mark_stack_scan_times_ms[i] +
+                                   _par_last_satb_filtering_times_ms[i] +
                                    _par_last_update_rs_times_ms[i] +
                                    _par_last_scan_rs_times_ms[i] +
                                    _par_last_obj_copy_times_ms[i] +
@@ -1412,7 +1417,7 @@
     } else {
       print_stats(1, "Ext Root Scanning", ext_root_scan_time);
       if (print_marking_info) {
-        print_stats(1, "Mark Stack Scanning", mark_stack_scan_time);
+        print_stats(1, "SATB Filtering", satb_filtering_time);
       }
       print_stats(1, "Update RS", update_rs_time);
       print_stats(2, "Processed Buffers", (int)update_rs_processed_buffers);
@@ -1983,11 +1988,10 @@
   if (summary->get_total_seq()->num() > 0) {
     print_summary_sd(0, "Evacuation Pauses", summary->get_total_seq());
     if (body_summary != NULL) {
-      print_summary(1, "SATB Drain", body_summary->get_satb_drain_seq());
       if (parallel) {
         print_summary(1, "Parallel Time", body_summary->get_parallel_seq());
         print_summary(2, "Ext Root Scanning", body_summary->get_ext_root_scan_seq());
-        print_summary(2, "Mark Stack Scanning", body_summary->get_mark_stack_scan_seq());
+        print_summary(2, "SATB Filtering", body_summary->get_satb_filtering_seq());
         print_summary(2, "Update RS", body_summary->get_update_rs_seq());
         print_summary(2, "Scan RS", body_summary->get_scan_rs_seq());
         print_summary(2, "Object Copy", body_summary->get_obj_copy_seq());
@@ -1996,7 +2000,7 @@
         {
           NumberSeq* other_parts[] = {
             body_summary->get_ext_root_scan_seq(),
-            body_summary->get_mark_stack_scan_seq(),
+            body_summary->get_satb_filtering_seq(),
             body_summary->get_update_rs_seq(),
             body_summary->get_scan_rs_seq(),
             body_summary->get_obj_copy_seq(),
@@ -2009,7 +2013,7 @@
         }
       } else {
         print_summary(1, "Ext Root Scanning", body_summary->get_ext_root_scan_seq());
-        print_summary(1, "Mark Stack Scanning", body_summary->get_mark_stack_scan_seq());
+        print_summary(1, "SATB Filtering", body_summary->get_satb_filtering_seq());
         print_summary(1, "Update RS", body_summary->get_update_rs_seq());
         print_summary(1, "Scan RS", body_summary->get_scan_rs_seq());
         print_summary(1, "Object Copy", body_summary->get_obj_copy_seq());
@@ -2036,7 +2040,7 @@
             body_summary->get_satb_drain_seq(),
             body_summary->get_update_rs_seq(),
             body_summary->get_ext_root_scan_seq(),
-            body_summary->get_mark_stack_scan_seq(),
+            body_summary->get_satb_filtering_seq(),
             body_summary->get_scan_rs_seq(),
             body_summary->get_obj_copy_seq()
           };
@@ -2433,9 +2437,6 @@
   assert(_inc_cset_build_state == Active, "Precondition");
   assert(!hr->is_young(), "non-incremental add of young region");
 
-  if (_g1->mark_in_progress())
-    _g1->concurrent_mark()->registerCSetRegion(hr);
-
   assert(!hr->in_collection_set(), "should not already be in the CSet");
   hr->set_in_collection_set(true);
   hr->set_next_in_collection_set(_collection_set);
@@ -2705,9 +2706,6 @@
   // Clear the fields that point to the survivor list - they are all young now.
   young_list->clear_survivors();
 
-  if (_g1->mark_in_progress())
-    _g1->concurrent_mark()->register_collection_set_finger(_inc_cset_max_finger);
-
   _collection_set = _inc_cset_head;
   _collection_set_bytes_used_before = _inc_cset_bytes_used_before;
   time_remaining_ms -= _inc_cset_predicted_elapsed_time_ms;
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -67,7 +67,7 @@
   define_num_seq(satb_drain) // optional
   define_num_seq(parallel) // parallel only
     define_num_seq(ext_root_scan)
-    define_num_seq(mark_stack_scan)
+    define_num_seq(satb_filtering)
     define_num_seq(update_rs)
     define_num_seq(scan_rs)
     define_num_seq(obj_copy)
@@ -215,7 +215,7 @@
 
   double* _par_last_gc_worker_start_times_ms;
   double* _par_last_ext_root_scan_times_ms;
-  double* _par_last_mark_stack_scan_times_ms;
+  double* _par_last_satb_filtering_times_ms;
   double* _par_last_update_rs_times_ms;
   double* _par_last_update_rs_processed_buffers;
   double* _par_last_scan_rs_times_ms;
@@ -841,8 +841,8 @@
     _par_last_ext_root_scan_times_ms[worker_i] = ms;
   }
 
-  void record_mark_stack_scan_time(int worker_i, double ms) {
-    _par_last_mark_stack_scan_times_ms[worker_i] = ms;
+  void record_satb_filtering_time(int worker_i, double ms) {
+    _par_last_satb_filtering_times_ms[worker_i] = ms;
   }
 
   void record_satb_drain_time(double ms) {
@@ -1146,6 +1146,11 @@
     _survivor_surv_rate_group->stop_adding_regions();
   }
 
+  void tenure_all_objects() {
+    _max_survivor_regions = 0;
+    _tenuring_threshold = 0;
+  }
+
   void record_survivor_regions(size_t      regions,
                                HeapRegion* head,
                                HeapRegion* tail) {
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/gc_implementation/g1/g1EvacFailure.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1EVACFAILURE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1EVACFAILURE_HPP
+
+#include "gc_implementation/g1/concurrentMark.inline.hpp"
+#include "gc_implementation/g1/dirtyCardQueue.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1_globals.hpp"
+#include "gc_implementation/g1/g1OopClosures.inline.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
+#include "gc_implementation/g1/heapRegionRemSet.hpp"
+#include "utilities/workgroup.hpp"
+
+// Closures and tasks associated with any self-forwarding pointers
+// installed as a result of an evacuation failure.
+
+class UpdateRSetDeferred : public OopsInHeapRegionClosure {
+private:
+  G1CollectedHeap* _g1;
+  DirtyCardQueue *_dcq;
+  CardTableModRefBS* _ct_bs;
+
+public:
+  UpdateRSetDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
+    _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) {}
+
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+  template <class T> void do_oop_work(T* p) {
+    assert(_from->is_in_reserved(p), "paranoia");
+    if (!_from->is_in_reserved(oopDesc::load_decode_heap_oop(p)) &&
+        !_from->is_survivor()) {
+      size_t card_index = _ct_bs->index_for(p);
+      if (_ct_bs->mark_card_deferred(card_index)) {
+        _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index));
+      }
+    }
+  }
+};
+
+class RemoveSelfForwardPtrObjClosure: public ObjectClosure {
+private:
+  G1CollectedHeap* _g1;
+  ConcurrentMark* _cm;
+  HeapRegion* _hr;
+  size_t _marked_bytes;
+  OopsInHeapRegionClosure *_update_rset_cl;
+  bool _during_initial_mark;
+  bool _during_conc_mark;
+public:
+  RemoveSelfForwardPtrObjClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
+                                 HeapRegion* hr,
+                                 OopsInHeapRegionClosure* update_rset_cl,
+                                 bool during_initial_mark,
+                                 bool during_conc_mark) :
+    _g1(g1), _cm(cm), _hr(hr), _marked_bytes(0),
+    _update_rset_cl(update_rset_cl),
+    _during_initial_mark(during_initial_mark),
+    _during_conc_mark(during_conc_mark) { }
+
+  size_t marked_bytes() { return _marked_bytes; }
+
+  // <original comment>
+  // The original idea here was to coalesce evacuated and dead objects.
+  // However that caused complications with the block offset table (BOT).
+  // In particular if there were two TLABs, one of them partially refined.
+  // |----- TLAB_1--------|----TLAB_2-~~~(partially refined part)~~~|
+  // The BOT entries of the unrefined part of TLAB_2 point to the start
+  // of TLAB_2. If the last object of the TLAB_1 and the first object
+  // of TLAB_2 are coalesced, then the cards of the unrefined part
+  // would point into middle of the filler object.
+  // The current approach is to not coalesce and leave the BOT contents intact.
+  // </original comment>
+  //
+  // We now reset the BOT when we start the object iteration over the
+  // region and refine its entries for every object we come across. So
+  // the above comment is not really relevant and we should be able
+  // to coalesce dead objects if we want to.
+  void do_object(oop obj) {
+    HeapWord* obj_addr = (HeapWord*) obj;
+    assert(_hr->is_in(obj_addr), "sanity");
+    size_t obj_size = obj->size();
+    _hr->update_bot_for_object(obj_addr, obj_size);
+
+    if (obj->is_forwarded() && obj->forwardee() == obj) {
+      // The object failed to move.
+
+      // We consider all objects that we find self-forwarded to be
+      // live. What we'll do is that we'll update the prev marking
+      // info so that they are all under PTAMS and explicitly marked.
+      _cm->markPrev(obj);
+      if (_during_initial_mark) {
+        // For the next marking info we'll only mark the
+        // self-forwarded objects explicitly if we are during
+        // initial-mark (since, normally, we only mark objects pointed
+        // to by roots if we succeed in copying them). By marking all
+        // self-forwarded objects we ensure that we mark any that are
+        // still pointed to be roots. During concurrent marking, and
+        // after initial-mark, we don't need to mark any objects
+        // explicitly and all objects in the CSet are considered
+        // (implicitly) live. So, we won't mark them explicitly and
+        // we'll leave them over NTAMS.
+        _cm->markNext(obj);
+      }
+      _marked_bytes += (obj_size * HeapWordSize);
+      obj->set_mark(markOopDesc::prototype());
+
+      // While we were processing RSet buffers during the collection,
+      // we actually didn't scan any cards on the collection set,
+      // since we didn't want to update remembered sets with entries
+      // that point into the collection set, given that live objects
+      // from the collection set are about to move and such entries
+      // will be stale very soon.
+      // This change also dealt with a reliability issue which
+      // involved scanning a card in the collection set and coming
+      // across an array that was being chunked and looking malformed.
+      // The problem is that, if evacuation fails, we might have
+      // remembered set entries missing given that we skipped cards on
+      // the collection set. So, we'll recreate such entries now.
+      obj->oop_iterate(_update_rset_cl);
+      assert(_cm->isPrevMarked(obj), "Should be marked!");
+    } else {
+      // The object has been either evacuated or is dead. Fill it with a
+      // dummy object.
+      MemRegion mr((HeapWord*) obj, obj_size);
+      CollectedHeap::fill_with_object(mr);
+    }
+  }
+};
+
+class RemoveSelfForwardPtrHRClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
+  ConcurrentMark* _cm;
+  OopsInHeapRegionClosure *_update_rset_cl;
+
+public:
+  RemoveSelfForwardPtrHRClosure(G1CollectedHeap* g1h,
+                                OopsInHeapRegionClosure* update_rset_cl) :
+    _g1h(g1h), _update_rset_cl(update_rset_cl),
+    _cm(_g1h->concurrent_mark()) { }
+
+  bool doHeapRegion(HeapRegion *hr) {
+    bool during_initial_mark = _g1h->g1_policy()->during_initial_mark_pause();
+    bool during_conc_mark = _g1h->mark_in_progress();
+
+    assert(!hr->isHumongous(), "sanity");
+    assert(hr->in_collection_set(), "bad CS");
+
+    if (hr->claimHeapRegion(HeapRegion::ParEvacFailureClaimValue)) {
+      if (hr->evacuation_failed()) {
+        RemoveSelfForwardPtrObjClosure rspc(_g1h, _cm, hr, _update_rset_cl,
+                                            during_initial_mark,
+                                            during_conc_mark);
+
+        MemRegion mr(hr->bottom(), hr->end());
+        // We'll recreate the prev marking info so we'll first clear
+        // the prev bitmap range for this region. We never mark any
+        // CSet objects explicitly so the next bitmap range should be
+        // cleared anyway.
+        _cm->clearRangePrevBitmap(mr);
+
+        hr->note_self_forwarding_removal_start(during_initial_mark,
+                                               during_conc_mark);
+
+        // In the common case (i.e. when there is no evacuation
+        // failure) we make sure that the following is done when
+        // the region is freed so that it is "ready-to-go" when it's
+        // re-allocated. However, when evacuation failure happens, a
+        // region will remain in the heap and might ultimately be added
+        // to a CSet in the future. So we have to be careful here and
+        // make sure the region's RSet is ready for parallel iteration
+        // whenever this might be required in the future.
+        hr->rem_set()->reset_for_par_iteration();
+        hr->reset_bot();
+        _update_rset_cl->set_region(hr);
+        hr->object_iterate(&rspc);
+
+        hr->note_self_forwarding_removal_end(during_initial_mark,
+                                             during_conc_mark,
+                                             rspc.marked_bytes());
+      }
+    }
+    return false;
+  }
+};
+
+class G1ParRemoveSelfForwardPtrsTask: public AbstractGangTask {
+protected:
+  G1CollectedHeap* _g1h;
+
+public:
+  G1ParRemoveSelfForwardPtrsTask(G1CollectedHeap* g1h) :
+    AbstractGangTask("G1 Remove Self-forwarding Pointers"),
+    _g1h(g1h) { }
+
+  void work(uint worker_id) {
+    UpdateRSetImmediate immediate_update(_g1h->g1_rem_set());
+    DirtyCardQueue dcq(&_g1h->dirty_card_queue_set());
+    UpdateRSetDeferred deferred_update(_g1h, &dcq);
+
+    OopsInHeapRegionClosure *update_rset_cl = &deferred_update;
+    if (!G1DeferredRSUpdate) {
+      update_rset_cl = &immediate_update;
+    }
+
+    RemoveSelfForwardPtrHRClosure rsfp_cl(_g1h, update_rset_cl);
+
+    HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_id);
+    _g1h->collection_set_iterate_from(hr, &rsfp_cl);
+  }
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1EVACFAILURE_HPP
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/gc_implementation/g1/g1OopClosures.hpp
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -121,17 +121,25 @@
 class G1ParCopyHelper : public G1ParClosureSuper {
   G1ParScanClosure *_scanner;
 protected:
-  template <class T> void mark_object(T* p);
-  oop copy_to_survivor_space(oop obj, bool should_mark_root,
-                                      bool should_mark_copy);
+  // Mark the object if it's not already marked. This is used to mark
+  // objects pointed to by roots that are guaranteed not to move
+  // during the GC (i.e., non-CSet objects). It is MT-safe.
+  void mark_object(oop obj);
+
+  // Mark the object if it's not already marked. This is used to mark
+  // objects pointed to by roots that have been forwarded during a
+  // GC. It is MT-safe.
+  void mark_forwarded_object(oop from_obj, oop to_obj);
+
+  oop copy_to_survivor_space(oop obj);
+
 public:
   G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
                   G1ParScanClosure *scanner) :
     G1ParClosureSuper(g1, par_scan_state), _scanner(scanner) { }
 };
 
-template<bool do_gen_barrier, G1Barrier barrier,
-         bool do_mark_object>
+template <bool do_gen_barrier, G1Barrier barrier, bool do_mark_object>
 class G1ParCopyClosure : public G1ParCopyHelper {
   G1ParScanClosure _scanner;
 
@@ -140,9 +148,8 @@
 public:
   G1ParCopyClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
                    ReferenceProcessor* rp) :
-    _scanner(g1, par_scan_state, rp),
-    G1ParCopyHelper(g1, par_scan_state, &_scanner)
-  {
+      _scanner(g1, par_scan_state, rp),
+      G1ParCopyHelper(g1, par_scan_state, &_scanner) {
     assert(_ref_processor == NULL, "sanity");
   }
 
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/gc_implementation/g1/g1_globals.hpp
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -295,7 +295,7 @@
           "Percentage (0-100) of the heap size to use as minimum "          \
           "young gen size.")                                                \
                                                                             \
-  develop(uintx, G1DefaultMaxNewGenPercent, 50,                             \
+  develop(uintx, G1DefaultMaxNewGenPercent, 80,                             \
           "Percentage (0-100) of the heap size to use as maximum "          \
           "young gen size.")
 
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/gc_implementation/g1/heapRegion.cpp
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -575,6 +575,40 @@
   oops_in_mr_iterate(MemRegion(bottom(), saved_mark_word()), cl);
 }
 
+void HeapRegion::note_self_forwarding_removal_start(bool during_initial_mark,
+                                                    bool during_conc_mark) {
+  // We always recreate the prev marking info and we'll explicitly
+  // mark all objects we find to be self-forwarded on the prev
+  // bitmap. So all objects need to be below PTAMS.
+  _prev_top_at_mark_start = top();
+  _prev_marked_bytes = 0;
+
+  if (during_initial_mark) {
+    // During initial-mark, we'll also explicitly mark all objects
+    // we find to be self-forwarded on the next bitmap. So all
+    // objects need to be below NTAMS.
+    _next_top_at_mark_start = top();
+    set_top_at_conc_mark_count(bottom());
+    _next_marked_bytes = 0;
+  } else if (during_conc_mark) {
+    // During concurrent mark, all objects in the CSet (including
+    // the ones we find to be self-forwarded) are implicitly live.
+    // So all objects need to be above NTAMS.
+    _next_top_at_mark_start = bottom();
+    set_top_at_conc_mark_count(bottom());
+    _next_marked_bytes = 0;
+  }
+}
+
+void HeapRegion::note_self_forwarding_removal_end(bool during_initial_mark,
+                                                  bool during_conc_mark,
+                                                  size_t marked_bytes) {
+  assert(0 <= marked_bytes && marked_bytes <= used(),
+         err_msg("marked: "SIZE_FORMAT" used: "SIZE_FORMAT,
+                 marked_bytes, used()));
+  _prev_marked_bytes = marked_bytes;
+}
+
 HeapWord*
 HeapRegion::object_iterate_mem_careful(MemRegion mr,
                                                  ObjectClosure* cl) {
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/gc_implementation/g1/heapRegion.hpp
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -373,7 +373,8 @@
     ScrubRemSetClaimValue      = 3,
     ParVerifyClaimValue        = 4,
     RebuildRSClaimValue        = 5,
-    CompleteMarkCSetClaimValue = 6
+    CompleteMarkCSetClaimValue = 6,
+    ParEvacFailureClaimValue   = 7
   };
 
   inline HeapWord* par_allocate_no_bot_updates(size_t word_size) {
@@ -582,37 +583,33 @@
   // that the collector is about to start or has finished (concurrently)
   // marking the heap.
 
-  // Note the start of a marking phase. Record the
-  // start of the unmarked area of the region here.
-  void note_start_of_marking(bool during_initial_mark) {
-    init_top_at_conc_mark_count();
-    _next_marked_bytes = 0;
-    if (during_initial_mark && is_young() && !is_survivor())
-      _next_top_at_mark_start = bottom();
-    else
-      _next_top_at_mark_start = top();
-  }
+  // Notify the region that concurrent marking is starting. Initialize
+  // all fields related to the next marking info.
+  inline void note_start_of_marking();
+
+  // Notify the region that concurrent marking has finished. Copy the
+  // (now finalized) next marking info fields into the prev marking
+  // info fields.
+  inline void note_end_of_marking();
+
+  // Notify the region that it will be used as to-space during a GC
+  // and we are about to start copying objects into it.
+  inline void note_start_of_copying(bool during_initial_mark);
 
-  // Note the end of a marking phase. Install the start of
-  // the unmarked area that was captured at start of marking.
-  void note_end_of_marking() {
-    _prev_top_at_mark_start = _next_top_at_mark_start;
-    _prev_marked_bytes = _next_marked_bytes;
-    _next_marked_bytes = 0;
+  // Notify the region that it ceases being to-space during a GC and
+  // we will not copy objects into it any more.
+  inline void note_end_of_copying(bool during_initial_mark);
 
-    guarantee(_prev_marked_bytes <=
-              (size_t) (prev_top_at_mark_start() - bottom()) * HeapWordSize,
-              "invariant");
-  }
+  // Notify the region that we are about to start processing
+  // self-forwarded objects during evac failure handling.
+  void note_self_forwarding_removal_start(bool during_initial_mark,
+                                          bool during_conc_mark);
 
-  // After an evacuation, we need to update _next_top_at_mark_start
-  // to be the current top.  Note this is only valid if we have only
-  // ever evacuated into this region.  If we evacuate, allocate, and
-  // then evacuate we are in deep doodoo.
-  void note_end_of_copying() {
-    assert(top() >= _next_top_at_mark_start, "Increase only");
-    _next_top_at_mark_start = top();
-  }
+  // Notify the region that we have finished processing self-forwarded
+  // objects during evac failure handling.
+  void note_self_forwarding_removal_end(bool during_initial_mark,
+                                        bool during_conc_mark,
+                                        size_t marked_bytes);
 
   // Returns "false" iff no object in the region was allocated when the
   // last mark phase ended.
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/gc_implementation/g1/heapRegion.inline.hpp
--- a/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -55,4 +55,71 @@
   return _offsets.block_start_const(p);
 }
 
+inline void HeapRegion::note_start_of_marking() {
+  init_top_at_conc_mark_count();
+  _next_marked_bytes = 0;
+  _next_top_at_mark_start = top();
+}
+
+inline void HeapRegion::note_end_of_marking() {
+  _prev_top_at_mark_start = _next_top_at_mark_start;
+  _prev_marked_bytes = _next_marked_bytes;
+  _next_marked_bytes = 0;
+
+  assert(_prev_marked_bytes <=
+         (size_t) pointer_delta(prev_top_at_mark_start(), bottom()) *
+         HeapWordSize, "invariant");
+}
+
+inline void HeapRegion::note_start_of_copying(bool during_initial_mark) {
+  if (during_initial_mark) {
+    if (is_survivor()) {
+      assert(false, "should not allocate survivors during IM");
+    } else {
+      // During initial-mark we'll explicitly mark any objects on old
+      // regions that are pointed to by roots. Given that explicit
+      // marks only make sense under NTAMS it'd be nice if we could
+      // check that condition if we wanted to. Given that we don't
+      // know where the top of this region will end up, we simply set
+      // NTAMS to the end of the region so all marks will be below
+      // NTAMS. We'll set it to the actual top when we retire this region.
+      _next_top_at_mark_start = end();
+    }
+  } else {
+    if (is_survivor()) {
+      // This is how we always allocate survivors.
+      assert(_next_top_at_mark_start == bottom(), "invariant");
+    } else {
+      // We could have re-used this old region as to-space over a
+      // couple of GCs since the start of the concurrent marking
+      // cycle. This means that [bottom,NTAMS) will contain objects
+      // copied up to and including initial-mark and [NTAMS, top)
+      // will contain objects copied during the concurrent marking cycle.
+      assert(top() >= _next_top_at_mark_start, "invariant");
+    }
+  }
+}
+
+inline void HeapRegion::note_end_of_copying(bool during_initial_mark) {
+  if (during_initial_mark) {
+    if (is_survivor()) {
+      assert(false, "should not allocate survivors during IM");
+    } else {
+      // See the comment for note_start_of_copying() for the details
+      // on this.
+      assert(_next_top_at_mark_start == end(), "pre-condition");
+      _next_top_at_mark_start = top();
+    }
+  } else {
+    if (is_survivor()) {
+      // This is how we always allocate survivors.
+      assert(_next_top_at_mark_start == bottom(), "invariant");
+    } else {
+      // See the comment for note_start_of_copying() for the details
+      // on this.
+      assert(top() >= _next_top_at_mark_start, "invariant");
+    }
+  }
+}
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGION_INLINE_HPP
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/gc_implementation/g1/ptrQueue.hpp
--- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -70,7 +70,7 @@
   // given PtrQueueSet.
   PtrQueue(PtrQueueSet* qset, bool perm = false, bool active = false);
   // Release any contained resources.
-  void flush();
+  virtual void flush();
   // Calls flush() when destroyed.
   ~PtrQueue() { flush(); }
 
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/gc_implementation/g1/satbQueue.cpp
--- a/src/share/vm/gc_implementation/g1/satbQueue.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,14 @@
 #include "runtime/thread.hpp"
 #include "runtime/vmThread.hpp"
 
+void ObjPtrQueue::flush() {
+  // The buffer might contain refs into the CSet. We have to filter it
+  // first before we flush it, otherwise we might end up with an
+  // enqueued buffer with refs into the CSet which breaks our invariants.
+  filter();
+  PtrQueue::flush();
+}
+
 // This method removes entries from an SATB buffer that will not be
 // useful to the concurrent marking threads. An entry is removed if it
 // satisfies one of the following conditions:
@@ -44,38 +52,27 @@
 //     process it again).
 //
 // The rest of the entries will be retained and are compacted towards
-// the top of the buffer. If with this filtering we clear a large
-// enough chunk of the buffer we can re-use it (instead of enqueueing
-// it) and we can just allow the mutator to carry on executing.
-
-bool ObjPtrQueue::should_enqueue_buffer() {
-  assert(_lock == NULL || _lock->owned_by_self(),
-         "we should have taken the lock before calling this");
+// the top of the buffer. Note that, because we do not allow old
+// regions in the CSet during marking, all objects on the CSet regions
+// are young (eden or survivors) and therefore implicitly live. So any
+// references into the CSet will be removed during filtering.
 
-  // A value of 0 means "don't filter SATB buffers".
-  if (G1SATBBufferEnqueueingThresholdPercent == 0) {
-    return true;
-  }
-
+void ObjPtrQueue::filter() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
-
-  // This method should only be called if there is a non-NULL buffer
-  // that is full.
-  assert(_index == 0, "pre-condition");
-  assert(_buf != NULL, "pre-condition");
-
   void** buf = _buf;
   size_t sz = _sz;
 
+  if (buf == NULL) {
+    // nothing to do
+    return;
+  }
+
   // Used for sanity checking at the end of the loop.
   debug_only(size_t entries = 0; size_t retained = 0;)
 
   size_t i = sz;
   size_t new_index = sz;
 
-  // Given that we are expecting _index == 0, we could have changed
-  // the loop condition to (i > 0). But we are using _index for
-  // generality.
   while (i > _index) {
     assert(i > 0, "we should have at least one more entry to process");
     i -= oopSize;
@@ -103,22 +100,58 @@
       debug_only(retained += 1;)
     }
   }
+
+#ifdef ASSERT
   size_t entries_calc = (sz - _index) / oopSize;
   assert(entries == entries_calc, "the number of entries we counted "
          "should match the number of entries we calculated");
   size_t retained_calc = (sz - new_index) / oopSize;
   assert(retained == retained_calc, "the number of retained entries we counted "
          "should match the number of retained entries we calculated");
-  size_t perc = retained_calc * 100 / entries_calc;
+#endif // ASSERT
+
+  _index = new_index;
+}
+
+// This method will first apply the above filtering to the buffer. If
+// post-filtering a large enough chunk of the buffer has been cleared
+// we can re-use the buffer (instead of enqueueing it) and we can just
+// allow the mutator to carry on executing using the same buffer
+// instead of replacing it.
+
+bool ObjPtrQueue::should_enqueue_buffer() {
+  assert(_lock == NULL || _lock->owned_by_self(),
+         "we should have taken the lock before calling this");
+
+  // Even if G1SATBBufferEnqueueingThresholdPercent == 0 we have to
+  // filter the buffer given that this will remove any references into
+  // the CSet as we currently assume that no such refs will appear in
+  // enqueued buffers.
+
+  // This method should only be called if there is a non-NULL buffer
+  // that is full.
+  assert(_index == 0, "pre-condition");
+  assert(_buf != NULL, "pre-condition");
+
+  filter();
+
+  size_t sz = _sz;
+  size_t all_entries = sz / oopSize;
+  size_t retained_entries = (sz - _index) / oopSize;
+  size_t perc = retained_entries * 100 / all_entries;
   bool should_enqueue = perc > (size_t) G1SATBBufferEnqueueingThresholdPercent;
-  _index = new_index;
-
   return should_enqueue;
 }
 
 void ObjPtrQueue::apply_closure(ObjectClosure* cl) {
   if (_buf != NULL) {
     apply_closure_to_buffer(cl, _buf, _index, _sz);
+  }
+}
+
+void ObjPtrQueue::apply_closure_and_empty(ObjectClosure* cl) {
+  if (_buf != NULL) {
+    apply_closure_to_buffer(cl, _buf, _index, _sz);
     _index = _sz;
   }
 }
@@ -135,6 +168,21 @@
   }
 }
 
+#ifndef PRODUCT
+// Helpful for debugging
+
+void ObjPtrQueue::print(const char* name) {
+  print(name, _buf, _index, _sz);
+}
+
+void ObjPtrQueue::print(const char* name,
+                        void** buf, size_t index, size_t sz) {
+  gclog_or_tty->print_cr("  SATB BUFFER [%s] buf: "PTR_FORMAT" "
+                         "index: "SIZE_FORMAT" sz: "SIZE_FORMAT,
+                         name, buf, index, sz);
+}
+#endif // PRODUCT
+
 #ifdef ASSERT
 void ObjPtrQueue::verify_oops_in_buffer() {
   if (_buf == NULL) return;
@@ -150,12 +198,9 @@
 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 #endif // _MSC_VER
 
-
 SATBMarkQueueSet::SATBMarkQueueSet() :
-  PtrQueueSet(),
-  _closure(NULL), _par_closures(NULL),
-  _shared_satb_queue(this, true /*perm*/)
-{}
+  PtrQueueSet(), _closure(NULL), _par_closures(NULL),
+  _shared_satb_queue(this, true /*perm*/) { }
 
 void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
                                   int process_completed_threshold,
@@ -167,7 +212,6 @@
   }
 }
 
-
 void SATBMarkQueueSet::handle_zero_index_for_thread(JavaThread* t) {
   DEBUG_ONLY(t->satb_mark_queue().verify_oops_in_buffer();)
   t->satb_mark_queue().handle_zero_index();
@@ -228,6 +272,13 @@
   }
 }
 
+void SATBMarkQueueSet::filter_thread_buffers() {
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().filter();
+  }
+  shared_satb_queue()->filter();
+}
+
 void SATBMarkQueueSet::set_closure(ObjectClosure* closure) {
   _closure = closure;
 }
@@ -239,9 +290,9 @@
 
 void SATBMarkQueueSet::iterate_closure_all_threads() {
   for(JavaThread* t = Threads::first(); t; t = t->next()) {
-    t->satb_mark_queue().apply_closure(_closure);
+    t->satb_mark_queue().apply_closure_and_empty(_closure);
   }
-  shared_satb_queue()->apply_closure(_closure);
+  shared_satb_queue()->apply_closure_and_empty(_closure);
 }
 
 void SATBMarkQueueSet::par_iterate_closure_all_threads(int worker) {
@@ -250,7 +301,7 @@
 
   for(JavaThread* t = Threads::first(); t; t = t->next()) {
     if (t->claim_oops_do(true, parity)) {
-      t->satb_mark_queue().apply_closure(_par_closures[worker]);
+      t->satb_mark_queue().apply_closure_and_empty(_par_closures[worker]);
     }
   }
 
@@ -264,7 +315,7 @@
 
   VMThread* vmt = VMThread::vm_thread();
   if (vmt->claim_oops_do(true, parity)) {
-    shared_satb_queue()->apply_closure(_par_closures[worker]);
+    shared_satb_queue()->apply_closure_and_empty(_par_closures[worker]);
   }
 }
 
@@ -292,6 +343,61 @@
   }
 }
 
+void SATBMarkQueueSet::iterate_completed_buffers_read_only(ObjectClosure* cl) {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  assert(cl != NULL, "pre-condition");
+
+  BufferNode* nd = _completed_buffers_head;
+  while (nd != NULL) {
+    void** buf = BufferNode::make_buffer_from_node(nd);
+    ObjPtrQueue::apply_closure_to_buffer(cl, buf, 0, _sz);
+    nd = nd->next();
+  }
+}
+
+void SATBMarkQueueSet::iterate_thread_buffers_read_only(ObjectClosure* cl) {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  assert(cl != NULL, "pre-condition");
+
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().apply_closure(cl);
+  }
+  shared_satb_queue()->apply_closure(cl);
+}
+
+#ifndef PRODUCT
+// Helpful for debugging
+
+#define SATB_PRINTER_BUFFER_SIZE 256
+
+void SATBMarkQueueSet::print_all(const char* msg) {
+  char buffer[SATB_PRINTER_BUFFER_SIZE];
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+
+  gclog_or_tty->cr();
+  gclog_or_tty->print_cr("SATB BUFFERS [%s]", msg);
+
+  BufferNode* nd = _completed_buffers_head;
+  int i = 0;
+  while (nd != NULL) {
+    void** buf = BufferNode::make_buffer_from_node(nd);
+    jio_snprintf(buffer, SATB_PRINTER_BUFFER_SIZE, "Enqueued: %d", i);
+    ObjPtrQueue::print(buffer, buf, 0, _sz);
+    nd = nd->next();
+    i += 1;
+  }
+
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    jio_snprintf(buffer, SATB_PRINTER_BUFFER_SIZE, "Thread: %s", t->name());
+    t->satb_mark_queue().print(buffer);
+  }
+
+  shared_satb_queue()->print("Shared");
+
+  gclog_or_tty->cr();
+}
+#endif // PRODUCT
+
 void SATBMarkQueueSet::abandon_partial_marking() {
   BufferNode* buffers_to_delete = NULL;
   {
@@ -316,5 +422,5 @@
   for (JavaThread* t = Threads::first(); t; t = t->next()) {
     t->satb_mark_queue().reset();
   }
-  shared_satb_queue()->reset();
+ shared_satb_queue()->reset();
 }
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/gc_implementation/g1/satbQueue.hpp
--- a/src/share/vm/gc_implementation/g1/satbQueue.hpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,9 +29,26 @@
 
 class ObjectClosure;
 class JavaThread;
+class SATBMarkQueueSet;
 
 // A ptrQueue whose elements are "oops", pointers to object heads.
 class ObjPtrQueue: public PtrQueue {
+  friend class SATBMarkQueueSet;
+
+private:
+  // Filter out unwanted entries from the buffer.
+  void filter();
+
+  // Apply the closure to all elements.
+  void apply_closure(ObjectClosure* cl);
+
+  // Apply the closure to all elements and empty the buffer;
+  void apply_closure_and_empty(ObjectClosure* cl);
+
+  // Apply the closure to all elements of "buf", down to "index" (inclusive.)
+  static void apply_closure_to_buffer(ObjectClosure* cl,
+                                      void** buf, size_t index, size_t sz);
+
 public:
   ObjPtrQueue(PtrQueueSet* qset, bool perm = false) :
     // SATB queues are only active during marking cycles. We create
@@ -41,23 +58,23 @@
     // field to true. This is done in JavaThread::initialize_queues().
     PtrQueue(qset, perm, false /* active */) { }
 
+  // Overrides PtrQueue::flush() so that it can filter the buffer
+  // before it is flushed.
+  virtual void flush();
+
   // Overrides PtrQueue::should_enqueue_buffer(). See the method's
   // definition for more information.
   virtual bool should_enqueue_buffer();
 
-  // Apply the closure to all elements, and reset the index to make the
-  // buffer empty.
-  void apply_closure(ObjectClosure* cl);
-
-  // Apply the closure to all elements of "buf", down to "index" (inclusive.)
-  static void apply_closure_to_buffer(ObjectClosure* cl,
-                                      void** buf, size_t index, size_t sz);
+#ifndef PRODUCT
+  // Helpful for debugging
+  void print(const char* name);
+  static void print(const char* name, void** buf, size_t index, size_t sz);
+#endif // PRODUCT
 
   void verify_oops_in_buffer() NOT_DEBUG_RETURN;
 };
 
-
-
 class SATBMarkQueueSet: public PtrQueueSet {
   ObjectClosure* _closure;
   ObjectClosure** _par_closures;  // One per ParGCThread.
@@ -88,6 +105,9 @@
   // set itself, has an active value same as expected_active.
   void set_active_all_threads(bool b, bool expected_active);
 
+  // Filter all the currently-active SATB buffers.
+  void filter_thread_buffers();
+
   // Register "blk" as "the closure" for all queues.  Only one such closure
   // is allowed.  The "apply_closure_to_completed_buffer" method will apply
   // this closure to a completed buffer, and "iterate_closure_all_threads"
@@ -98,10 +118,9 @@
   // closures, one for each parallel GC thread.
   void set_par_closure(int i, ObjectClosure* closure);
 
-  // If there is a registered closure for buffers, apply it to all entries
-  // in all currently-active buffers.  This should only be applied at a
-  // safepoint.  (Currently must not be called in parallel; this should
-  // change in the future.)
+  // Apply the registered closure to all entries on each
+  // currently-active buffer and then empty the buffer. It should only
+  // be called serially and at a safepoint.
   void iterate_closure_all_threads();
   // Parallel version of the above.
   void par_iterate_closure_all_threads(int worker);
@@ -117,11 +136,21 @@
     return apply_closure_to_completed_buffer_work(true, worker);
   }
 
+  // Apply the given closure on enqueued and currently-active buffers
+  // respectively. Both methods are read-only, i.e., they do not
+  // modify any of the buffers.
+  void iterate_completed_buffers_read_only(ObjectClosure* cl);
+  void iterate_thread_buffers_read_only(ObjectClosure* cl);
+
+#ifndef PRODUCT
+  // Helpful for debugging
+  void print_all(const char* msg);
+#endif // PRODUCT
+
   ObjPtrQueue* shared_satb_queue() { return &_shared_satb_queue; }
 
   // If a marking is being abandoned, reset any unprocessed log buffers.
   void abandon_partial_marking();
-
 };
 
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_SATBQUEUE_HPP
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/oops/instanceKlass.hpp
--- a/src/share/vm/oops/instanceKlass.hpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/oops/instanceKlass.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -231,6 +231,10 @@
   u2              _java_fields_count;    // The number of declared Java fields
   int             _nonstatic_oop_map_size;// size in words of nonstatic oop map blocks
 
+  bool            _is_marked_dependent;  // used for marking during flushing and deoptimization
+  bool            _rewritten;            // methods rewritten.
+  bool            _has_nonstatic_fields; // for sizing with UseCompressedOops
+  bool            _should_verify_class;  // allow caching of preverification
   u2              _minor_version;        // minor version number of class file
   u2              _major_version;        // major version number of class file
   Thread*         _init_thread;          // Pointer to current thread doing initialization (to handle recusive initialization)
@@ -261,19 +265,6 @@
   // _idnum_allocated_count.
   u1              _init_state;                    // state of class
 
-  // Compact the following four boolean flags into 1-bit each.  These four flags
-  // were defined as separate boolean fields and each was 1-byte before. Since
-  // there are 2 bytes unused after the _idnum_allocated_count field, place the
-  // _misc_flags field after _idnum_allocated_count to utilize the unused bits
-  // and save total 4-bytes.
-  enum {
-    IS_MARKED_DEPENDENT  = 0x1, // used for marking during flushing and deoptimization
-    REWRITTEN            = 0x2, // methods rewritten.
-    HAS_NONSTATIC_FIELDS = 0x4, // for sizing with UseCompressedOops
-    SHOULD_VERIFY_CLASS  = 0x8  // allow caching of preverification
-  };
-  u1              _misc_flags;
-
   // embedded Java vtable follows here
   // embedded Java itables follows here
   // embedded static fields follows here
@@ -283,14 +274,8 @@
   friend class SystemDictionary;
 
  public:
-  bool has_nonstatic_fields() const        { return (_misc_flags & HAS_NONSTATIC_FIELDS) != 0; }
-  void set_has_nonstatic_fields(bool b) {
-    if (b) {
-      _misc_flags |= HAS_NONSTATIC_FIELDS;
-    } else {
-      _misc_flags &= ~HAS_NONSTATIC_FIELDS;
-    }
-  }
+  bool has_nonstatic_fields() const        { return _has_nonstatic_fields; }
+  void set_has_nonstatic_fields(bool b)    { _has_nonstatic_fields = b; }
 
   // field sizes
   int nonstatic_field_size() const         { return _nonstatic_field_size; }
@@ -398,23 +383,15 @@
   bool is_in_error_state() const           { return _init_state == initialization_error; }
   bool is_reentrant_initialization(Thread *thread)  { return thread == _init_thread; }
   ClassState  init_state()                 { return (ClassState)_init_state; }
-  bool is_rewritten() const                { return (_misc_flags & REWRITTEN) != 0; }
+  bool is_rewritten() const                { return _rewritten; }
 
   // defineClass specified verification
-  bool should_verify_class() const         { return (_misc_flags & SHOULD_VERIFY_CLASS) != 0; }
-  void set_should_verify_class(bool value) {
-    if (value) {
-      _misc_flags |= SHOULD_VERIFY_CLASS;
-    } else {
-      _misc_flags &= ~SHOULD_VERIFY_CLASS;
-    }
-  }
-
+  bool should_verify_class() const         { return _should_verify_class; }
+  void set_should_verify_class(bool value) { _should_verify_class = value; }
 
   // marking
-  bool is_marked_dependent() const         { return (_misc_flags & IS_MARKED_DEPENDENT) != 0; }
-  void set_is_marked_dependent()           { _misc_flags |= IS_MARKED_DEPENDENT; }
-  void clear_is_marked_dependent()         { _misc_flags &= ~IS_MARKED_DEPENDENT; }
+  bool is_marked_dependent() const         { return _is_marked_dependent; }
+  void set_is_marked_dependent(bool value) { _is_marked_dependent = value; }
 
   // initialization (virtuals from Klass)
   bool should_be_initialized() const;  // means that initialize should be called
@@ -784,7 +761,7 @@
 #else
   void set_init_state(ClassState state) { _init_state = (u1)state; }
 #endif
-  void set_rewritten()                  { _misc_flags |= REWRITTEN; }
+  void set_rewritten()                  { _rewritten = true; }
   void set_init_thread(Thread *thread)  { _init_thread = thread; }
 
   u2 idnum_allocated_count() const      { return _idnum_allocated_count; }
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/oops/instanceKlassKlass.cpp
--- a/src/share/vm/oops/instanceKlassKlass.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/oops/instanceKlassKlass.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -399,7 +399,7 @@
     ik->set_inner_classes(NULL);
     ik->set_static_oop_field_count(0);
     ik->set_nonstatic_field_size(0);
-    ik->clear_is_marked_dependent();
+    ik->set_is_marked_dependent(false);
     ik->set_init_state(instanceKlass::allocated);
     ik->set_init_thread(NULL);
     ik->set_reference_type(rt);
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/c2_globals.hpp
--- a/src/share/vm/opto/c2_globals.hpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/c2_globals.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -426,6 +426,9 @@
   product(bool, EliminateLocks, true,                                       \
           "Coarsen locks when possible")                                    \
                                                                             \
+  product(bool, EliminateNestedLocks, true,                                 \
+          "Eliminate nested locks of the same object when possible")        \
+                                                                            \
   notproduct(bool, PrintLockStatistics, false,                              \
           "Print precise statistics on the dynamic lock usage")             \
                                                                             \
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/callnode.cpp
--- a/src/share/vm/opto/callnode.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/callnode.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -400,10 +400,10 @@
       Node *box = mcall->monitor_box(this, i);
       Node *obj = mcall->monitor_obj(this, i);
       if ( OptoReg::is_valid(regalloc->get_reg_first(box)) ) {
-        while( !box->is_BoxLock() )  box = box->in(1);
+        box = BoxLockNode::box_node(box);
         format_helper( regalloc, st, box, "MON-BOX[", i, &scobjs );
       } else {
-        OptoReg::Name box_reg = BoxLockNode::stack_slot(box);
+        OptoReg::Name box_reg = BoxLockNode::reg(box);
         st->print(" MON-BOX%d=%s+%d",
                    i,
                    OptoReg::regname(OptoReg::c_frame_pointer),
@@ -411,8 +411,7 @@
       }
       const char* obj_msg = "MON-OBJ[";
       if (EliminateLocks) {
-        while( !box->is_BoxLock() )  box = box->in(1);
-        if (box->as_BoxLock()->is_eliminated())
+        if (BoxLockNode::box_node(box)->is_eliminated())
           obj_msg = "MON-OBJ(LOCK ELIMINATED)[";
       }
       format_helper( regalloc, st, obj, obj_msg, i, &scobjs );
@@ -1387,8 +1386,9 @@
     Node *n = ctrl_proj->in(0);
     if (n != NULL && n->is_Unlock()) {
       UnlockNode *unlock = n->as_Unlock();
-      if ((lock->obj_node() == unlock->obj_node()) &&
-          (lock->box_node() == unlock->box_node()) && !unlock->is_eliminated()) {
+      if (lock->obj_node()->eqv_uncast(unlock->obj_node()) &&
+          BoxLockNode::same_slot(lock->box_node(), unlock->box_node()) &&
+          !unlock->is_eliminated()) {
         lock_ops.append(unlock);
         return true;
       }
@@ -1431,8 +1431,8 @@
   }
   if (ctrl->is_Lock()) {
     LockNode *lock = ctrl->as_Lock();
-    if ((lock->obj_node() == unlock->obj_node()) &&
-            (lock->box_node() == unlock->box_node())) {
+    if (lock->obj_node()->eqv_uncast(unlock->obj_node()) &&
+        BoxLockNode::same_slot(lock->box_node(), unlock->box_node())) {
       lock_result = lock;
     }
   }
@@ -1462,8 +1462,9 @@
       }
       if (lock1_node != NULL && lock1_node->is_Lock()) {
         LockNode *lock1 = lock1_node->as_Lock();
-        if ((lock->obj_node() == lock1->obj_node()) &&
-            (lock->box_node() == lock1->box_node()) && !lock1->is_eliminated()) {
+        if (lock->obj_node()->eqv_uncast(lock1->obj_node()) &&
+            BoxLockNode::same_slot(lock->box_node(), lock1->box_node()) &&
+            !lock1->is_eliminated()) {
           lock_ops.append(lock1);
           return true;
         }
@@ -1507,19 +1508,16 @@
 void AbstractLockNode::create_lock_counter(JVMState* state) {
   _counter = OptoRuntime::new_named_counter(state, NamedCounter::LockCounter);
 }
-#endif
 
-void AbstractLockNode::set_eliminated() {
-  _eliminate = true;
-#ifndef PRODUCT
+void AbstractLockNode::set_eliminated_lock_counter() {
   if (_counter) {
     // Update the counter to indicate that this lock was eliminated.
     // The counter update code will stay around even though the
     // optimizer will eliminate the lock operation itself.
     _counter->set_tag(NamedCounter::EliminatedLockCounter);
   }
+}
 #endif
-}
 
 //=============================================================================
 Node *LockNode::Ideal(PhaseGVN *phase, bool can_reshape) {
@@ -1535,7 +1533,7 @@
   // prevents macro expansion from expanding the lock.  Since we don't
   // modify the graph, the value returned from this function is the
   // one computed above.
-  if (can_reshape && EliminateLocks && (!is_eliminated() || is_coarsened())) {
+  if (can_reshape && EliminateLocks && !is_non_esc_obj()) {
     //
     // If we are locking an unescaped object, the lock/unlock is unnecessary
     //
@@ -1544,16 +1542,11 @@
     if (cgr != NULL)
       es = cgr->escape_state(obj_node());
     if (es != PointsToNode::UnknownEscape && es != PointsToNode::GlobalEscape) {
-      if (!is_eliminated()) {
-        // Mark it eliminated to update any counters
-        this->set_eliminated();
-      } else {
-        assert(is_coarsened(), "sanity");
-        // The lock could be marked eliminated by lock coarsening
-        // code during first IGVN before EA. Clear coarsened flag
-        // to eliminate all associated locks/unlocks.
-        this->clear_coarsened();
-      }
+      assert(!is_eliminated() || is_coarsened(), "sanity");
+      // The lock could be marked eliminated by lock coarsening
+      // code during first IGVN before EA. Replace coarsened flag
+      // to eliminate all associated locks/unlocks.
+      this->set_non_esc_obj();
       return result;
     }
 
@@ -1613,8 +1606,7 @@
         for (int i = 0; i < lock_ops.length(); i++) {
           AbstractLockNode* lock = lock_ops.at(i);
 
-          // Mark it eliminated to update any counters
-          lock->set_eliminated();
+          // Mark it eliminated by coarsening and update any counters
           lock->set_coarsened();
         }
       } else if (ctrl->is_Region() &&
@@ -1632,6 +1624,40 @@
 }
 
 //=============================================================================
+bool LockNode::is_nested_lock_region() {
+  BoxLockNode* box = box_node()->as_BoxLock();
+  int stk_slot = box->stack_slot();
+  if (stk_slot <= 0)
+    return false; // External lock or it is not Box (Phi node).
+
+  // Ignore complex cases: merged locks or multiple locks.
+  Node* obj = obj_node();
+  LockNode* unique_lock = NULL;
+  if (!box->is_simple_lock_region(&unique_lock, obj) ||
+      (unique_lock != this)) {
+    return false;
+  }
+
+  // Look for external lock for the same object.
+  SafePointNode* sfn = this->as_SafePoint();
+  JVMState* youngest_jvms = sfn->jvms();
+  int max_depth = youngest_jvms->depth();
+  for (int depth = 1; depth <= max_depth; depth++) {
+    JVMState* jvms = youngest_jvms->of_depth(depth);
+    int num_mon  = jvms->nof_monitors();
+    // Loop over monitors
+    for (int idx = 0; idx < num_mon; idx++) {
+      Node* obj_node = sfn->monitor_obj(jvms, idx);
+      BoxLockNode* box_node = sfn->monitor_box(jvms, idx)->as_BoxLock();
+      if ((box_node->stack_slot() < stk_slot) && obj_node->eqv_uncast(obj)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+//=============================================================================
 uint UnlockNode::size_of() const { return sizeof(*this); }
 
 //=============================================================================
@@ -1649,7 +1675,7 @@
   // modify the graph, the value returned from this function is the
   // one computed above.
   // Escape state is defined after Parse phase.
-  if (can_reshape && EliminateLocks && (!is_eliminated() || is_coarsened())) {
+  if (can_reshape && EliminateLocks && !is_non_esc_obj()) {
     //
     // If we are unlocking an unescaped object, the lock/unlock is unnecessary.
     //
@@ -1658,16 +1684,11 @@
     if (cgr != NULL)
       es = cgr->escape_state(obj_node());
     if (es != PointsToNode::UnknownEscape && es != PointsToNode::GlobalEscape) {
-      if (!is_eliminated()) {
-        // Mark it eliminated to update any counters
-        this->set_eliminated();
-      } else {
-        assert(is_coarsened(), "sanity");
-        // The lock could be marked eliminated by lock coarsening
-        // code during first IGVN before EA. Clear coarsened flag
-        // to eliminate all associated locks/unlocks.
-        this->clear_coarsened();
-      }
+      assert(!is_eliminated() || is_coarsened(), "sanity");
+      // The lock could be marked eliminated by lock coarsening
+      // code during first IGVN before EA. Replace coarsened flag
+      // to eliminate all associated locks/unlocks.
+      this->set_non_esc_obj();
     }
   }
   return result;
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/callnode.hpp
--- a/src/share/vm/opto/callnode.hpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/callnode.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -840,8 +840,12 @@
 //------------------------------AbstractLockNode-----------------------------------
 class AbstractLockNode: public CallNode {
 private:
-  bool _eliminate;    // indicates this lock can be safely eliminated
-  bool _coarsened;    // indicates this lock was coarsened
+  enum {
+    Regular = 0,  // Normal lock
+    NonEscObj,    // Lock is used for non escaping object
+    Coarsened,    // Lock was coarsened
+    Nested        // Nested lock
+  } _kind;
 #ifndef PRODUCT
   NamedCounter* _counter;
 #endif
@@ -858,12 +862,13 @@
                                GrowableArray<AbstractLockNode*> &lock_ops);
   LockNode *find_matching_lock(UnlockNode* unlock);
 
+  // Update the counter to indicate that this lock was eliminated.
+  void set_eliminated_lock_counter() PRODUCT_RETURN;
 
 public:
   AbstractLockNode(const TypeFunc *tf)
     : CallNode(tf, NULL, TypeRawPtr::BOTTOM),
-      _coarsened(false),
-      _eliminate(false)
+      _kind(Regular)
   {
 #ifndef PRODUCT
     _counter = NULL;
@@ -873,20 +878,23 @@
   Node *   obj_node() const       {return in(TypeFunc::Parms + 0); }
   Node *   box_node() const       {return in(TypeFunc::Parms + 1); }
   Node *   fastlock_node() const  {return in(TypeFunc::Parms + 2); }
+  void     set_box_node(Node* box) { set_req(TypeFunc::Parms + 1, box); }
+
   const Type *sub(const Type *t1, const Type *t2) const { return TypeInt::CC;}
 
   virtual uint size_of() const { return sizeof(*this); }
 
-  bool is_eliminated()         {return _eliminate; }
-  // mark node as eliminated and update the counter if there is one
-  void set_eliminated();
+  bool is_eliminated()  const { return (_kind != Regular); }
+  bool is_non_esc_obj() const { return (_kind == NonEscObj); }
+  bool is_coarsened()   const { return (_kind == Coarsened); }
+  bool is_nested()      const { return (_kind == Nested); }
 
-  bool is_coarsened()  { return _coarsened; }
-  void set_coarsened() { _coarsened = true; }
-  void clear_coarsened() { _coarsened = false; }
+  void set_non_esc_obj() { _kind = NonEscObj; set_eliminated_lock_counter(); }
+  void set_coarsened()   { _kind = Coarsened; set_eliminated_lock_counter(); }
+  void set_nested()      { _kind = Nested; set_eliminated_lock_counter(); }
 
   // locking does not modify its arguments
-  virtual bool        may_modify(const TypePtr *addr_t, PhaseTransform *phase){ return false;}
+  virtual bool may_modify(const TypePtr *addr_t, PhaseTransform *phase){ return false;}
 
 #ifndef PRODUCT
   void create_lock_counter(JVMState* s);
@@ -936,6 +944,8 @@
   virtual void  clone_jvms() {
     set_jvms(jvms()->clone_deep(Compile::current()));
   }
+
+  bool is_nested_lock_region(); // Is this Lock nested?
 };
 
 //------------------------------Unlock---------------------------------------
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/cfgnode.cpp
--- a/src/share/vm/opto/cfgnode.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/cfgnode.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1597,7 +1597,7 @@
       bool is_loop = (r->is_Loop() && r->req() == 3);
       // Then, check if there is a data loop when phi references itself directly
       // or through other data nodes.
-      if (is_loop && !phase->eqv_uncast(uin, in(LoopNode::EntryControl)) ||
+      if (is_loop && !uin->eqv_uncast(in(LoopNode::EntryControl)) ||
          !is_loop && is_unsafe_data_reference(uin)) {
         // Break this data loop to avoid creation of a dead loop.
         if (can_reshape) {
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/chaitin.hpp
--- a/src/share/vm/opto/chaitin.hpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/chaitin.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -485,7 +485,11 @@
     return yank_if_dead(old, current_block, &value, &regnd);
   }
 
-  int yank_if_dead( Node *old, Block *current_block, Node_List *value, Node_List *regnd );
+  int yank_if_dead( Node *old, Block *current_block, Node_List *value, Node_List *regnd ) {
+    return yank_if_dead_recurse(old, old, current_block, value, regnd);
+  }
+  int yank_if_dead_recurse(Node *old, Node *orig_old, Block *current_block,
+                           Node_List *value, Node_List *regnd);
   int yank( Node *old, Block *current_block, Node_List *value, Node_List *regnd );
   int elide_copy( Node *n, int k, Block *current_block, Node_List &value, Node_List &regnd, bool can_change_regs );
   int use_prior_register( Node *copy, uint idx, Node *def, Block *current_block, Node_List &value, Node_List &regnd );
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/escape.cpp
--- a/src/share/vm/opto/escape.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/escape.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1842,20 +1842,15 @@
       Node *n = C->macro_node(i);
       if (n->is_AbstractLock()) { // Lock and Unlock nodes
         AbstractLockNode* alock = n->as_AbstractLock();
-        if (!alock->is_eliminated() || alock->is_coarsened()) {
+        if (!alock->is_non_esc_obj()) {
           PointsToNode::EscapeState es = escape_state(alock->obj_node());
           assert(es != PointsToNode::UnknownEscape, "should know");
           if (es != PointsToNode::UnknownEscape && es != PointsToNode::GlobalEscape) {
-            if (!alock->is_eliminated()) {
-              // Mark it eliminated to update any counters
-              alock->set_eliminated();
-            } else {
-              // The lock could be marked eliminated by lock coarsening
-              // code during first IGVN before EA. Clear coarsened flag
-              // to eliminate all associated locks/unlocks and relock
-              // during deoptimization.
-              alock->clear_coarsened();
-            }
+            assert(!alock->is_eliminated() || alock->is_coarsened(), "sanity");
+            // The lock could be marked eliminated by lock coarsening
+            // code during first IGVN before EA. Replace coarsened flag
+            // to eliminate all associated locks/unlocks.
+            alock->set_non_esc_obj();
           }
         }
       }
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/library_call.cpp
--- a/src/share/vm/opto/library_call.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/library_call.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -819,7 +819,7 @@
   if (stopped())
     return NULL;                // already stopped
   bool zero_offset = _gvn.type(offset) == TypeInt::ZERO;
-  if (zero_offset && _gvn.eqv_uncast(subseq_length, array_length))
+  if (zero_offset && subseq_length->eqv_uncast(array_length))
     return NULL;                // common case of whole-array copy
   Node* last = subseq_length;
   if (!zero_offset)             // last += offset
@@ -4667,7 +4667,7 @@
   if (ReduceBulkZeroing
       && !ZeroTLAB              // pointless if already zeroed
       && basic_elem_type != T_CONFLICT // avoid corner case
-      && !_gvn.eqv_uncast(src, dest)
+      && !src->eqv_uncast(dest)
       && ((alloc = tightly_coupled_allocation(dest, slow_region))
           != NULL)
       && _gvn.find_int_con(alloc->in(AllocateNode::ALength), 1) > 0
@@ -4745,7 +4745,7 @@
     // copy_length is 0.
     if (!stopped() && dest_uninitialized) {
       Node* dest_length = alloc->in(AllocateNode::ALength);
-      if (_gvn.eqv_uncast(copy_length, dest_length)
+      if (copy_length->eqv_uncast(dest_length)
           || _gvn.find_int_con(dest_length, 1) <= 0) {
         // There is no zeroing to do. No need for a secondary raw memory barrier.
       } else {
@@ -4791,7 +4791,7 @@
     // with its attendant messy index arithmetic, and upgrade
     // the copy to a more hardware-friendly word size of 64 bits.
     Node* tail_ctl = NULL;
-    if (!stopped() && !_gvn.eqv_uncast(dest_tail, dest_length)) {
+    if (!stopped() && !dest_tail->eqv_uncast(dest_length)) {
       Node* cmp_lt   = _gvn.transform( new(C,3) CmpINode(dest_tail, dest_length) );
       Node* bol_lt   = _gvn.transform( new(C,2) BoolNode(cmp_lt, BoolTest::lt) );
       tail_ctl = generate_slow_guard(bol_lt, NULL);
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/locknode.cpp
--- a/src/share/vm/opto/locknode.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/locknode.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -49,18 +49,22 @@
 
 //-----------------------------hash--------------------------------------------
 uint BoxLockNode::hash() const {
+  if (EliminateNestedLocks)
+    return NO_HASH; // Each locked region has own BoxLock node
   return Node::hash() + _slot + (_is_eliminated ? Compile::current()->fixed_slots() : 0);
 }
 
 //------------------------------cmp--------------------------------------------
 uint BoxLockNode::cmp( const Node &n ) const {
+  if (EliminateNestedLocks)
+    return (&n == this); // Always fail except on self
   const BoxLockNode &bn = (const BoxLockNode &)n;
   return bn._slot == _slot && bn._is_eliminated == _is_eliminated;
 }
 
-OptoReg::Name BoxLockNode::stack_slot(Node* box_node) {
-  // Chase down the BoxNode
-  while (!box_node->is_BoxLock()) {
+BoxLockNode* BoxLockNode::box_node(Node* box) {
+  // Chase down the BoxNode after RA which may spill box nodes.
+  while (!box->is_BoxLock()) {
     //    if (box_node->is_SpillCopy()) {
     //      Node *m = box_node->in(1);
     //      if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_StoreP) {
@@ -68,10 +72,64 @@
     //        continue;
     //      }
     //    }
-    assert(box_node->is_SpillCopy() || box_node->is_Phi(), "Bad spill of Lock.");
-    box_node = box_node->in(1);
+    assert(box->is_SpillCopy() || box->is_Phi(), "Bad spill of Lock.");
+    // Only BoxLock nodes with the same stack slot are merged.
+    // So it is enough to trace one path to find the slot value.
+    box = box->in(1);
   }
-  return box_node->in_RegMask(0).find_first_elem();
+  return box->as_BoxLock();
+}
+
+OptoReg::Name BoxLockNode::reg(Node* box) {
+  return box_node(box)->in_RegMask(0).find_first_elem();
+}
+
+// Is BoxLock node used for one simple lock region (same box and obj)?
+bool BoxLockNode::is_simple_lock_region(LockNode** unique_lock, Node* obj) {
+  LockNode* lock = NULL;
+  bool has_one_lock = false;
+  for (uint i = 0; i < this->outcnt(); i++) {
+    Node* n = this->raw_out(i);
+    assert(!n->is_Phi(), "should not merge BoxLock nodes");
+    if (n->is_AbstractLock()) {
+      AbstractLockNode* alock = n->as_AbstractLock();
+      // Check lock's box since box could be referenced by Lock's debug info.
+      if (alock->box_node() == this) {
+        if (alock->obj_node()->eqv_uncast(obj)) {
+          if ((unique_lock != NULL) && alock->is_Lock()) {
+            if (lock == NULL) {
+              lock = alock->as_Lock();
+              has_one_lock = true;
+            } else if (lock != alock->as_Lock()) {
+              has_one_lock = false;
+            }
+          }
+        } else {
+          return false; // Different objects
+        }
+      }
+    }
+  }
+#ifdef ASSERT
+  // Verify that FastLock and Safepoint reference only this lock region.
+  for (uint i = 0; i < this->outcnt(); i++) {
+    Node* n = this->raw_out(i);
+    if (n->is_FastLock()) {
+      FastLockNode* flock = n->as_FastLock();
+      assert((flock->box_node() == this) && flock->obj_node()->eqv_uncast(obj),"");
+    }
+    // Don't check monitor info in safepoints since the referenced object could
+    // be different from the locked object. It could be Phi node of different
+    // cast nodes which point to this locked object.
+    // We assume that no other objects could be referenced in monitor info
+    // associated with this BoxLock node because all associated locks and
+    // unlocks are reference only this one object.
+  }
+#endif
+  if (unique_lock != NULL && has_one_lock) {
+    *unique_lock = lock;
+  }
+  return true;
 }
 
 //=============================================================================
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/locknode.hpp
--- a/src/share/vm/opto/locknode.hpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/locknode.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -49,11 +49,11 @@
 
 //------------------------------BoxLockNode------------------------------------
 class BoxLockNode : public Node {
+  const int     _slot; // stack slot
+  RegMask     _inmask; // OptoReg corresponding to stack slot
+  bool _is_eliminated; // Associated locks were safely eliminated
+
 public:
-  const int _slot;
-  RegMask   _inmask;
-  bool _is_eliminated;    // indicates this lock was safely eliminated
-
   BoxLockNode( int lock );
   virtual int Opcode() const;
   virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
@@ -66,11 +66,19 @@
   virtual const class Type *bottom_type() const { return TypeRawPtr::BOTTOM; }
   virtual uint ideal_reg() const { return Op_RegP; }
 
-  static OptoReg::Name stack_slot(Node* box_node);
+  static OptoReg::Name reg(Node* box_node);
+  static BoxLockNode* box_node(Node* box_node);
+  static bool same_slot(Node* box1, Node* box2) {
+    return box1->as_BoxLock()->_slot == box2->as_BoxLock()->_slot;
+  }
+  int stack_slot() const { return _slot; }
 
-  bool is_eliminated()  { return _is_eliminated; }
+  bool is_eliminated() const { return _is_eliminated; }
   // mark lock as eliminated.
-  void set_eliminated() { _is_eliminated = true; }
+  void set_eliminated()      { _is_eliminated = true; }
+
+  // Is BoxLock node used for one simple lock region?
+  bool is_simple_lock_region(LockNode** unique_lock, Node* obj);
 
 #ifndef PRODUCT
   virtual void format( PhaseRegAlloc *, outputStream *st ) const;
@@ -91,6 +99,7 @@
   }
   Node* obj_node() const { return in(1); }
   Node* box_node() const { return in(2); }
+  void  set_box_node(Node* box) { set_req(2, box); }
 
   // FastLock and FastUnlockNode do not hash, we need one for each correspoding
   // LockNode/UnLockNode to avoid creating Phi's.
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/loopnode.cpp
--- a/src/share/vm/opto/loopnode.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/loopnode.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -3278,16 +3278,7 @@
 #ifdef ASSERT
     if (legal->is_Start() && !early->is_Root()) {
       // Bad graph. Print idom path and fail.
-      tty->print_cr( "Bad graph detected in build_loop_late");
-      tty->print("n: ");n->dump(); tty->cr();
-      tty->print("early: ");early->dump(); tty->cr();
-      int ct = 0;
-      Node *dbg_legal = LCA;
-      while(!dbg_legal->is_Start() && ct < 100) {
-        tty->print("idom[%d] ",ct); dbg_legal->dump(); tty->cr();
-        ct++;
-        dbg_legal = idom(dbg_legal);
-      }
+      dump_bad_graph(n, early, LCA);
       assert(false, "Bad graph detected in build_loop_late");
     }
 #endif
@@ -3337,6 +3328,88 @@
     chosen_loop->_body.push(n);// Collect inner loops
 }
 
+#ifdef ASSERT
+void PhaseIdealLoop::dump_bad_graph(Node* n, Node* early, Node* LCA) {
+  tty->print_cr( "Bad graph detected in build_loop_late");
+  tty->print("n: "); n->dump();
+  tty->print("early(n): "); early->dump();
+  if (n->in(0) != NULL  && !n->in(0)->is_top() &&
+      n->in(0) != early && !n->in(0)->is_Root()) {
+    tty->print("n->in(0): "); n->in(0)->dump();
+  }
+  for (uint i = 1; i < n->req(); i++) {
+    Node* in1 = n->in(i);
+    if (in1 != NULL && in1 != n && !in1->is_top()) {
+      tty->print("n->in(%d): ", i); in1->dump();
+      Node* in1_early = get_ctrl(in1);
+      tty->print("early(n->in(%d)): ", i); in1_early->dump();
+      if (in1->in(0) != NULL     && !in1->in(0)->is_top() &&
+          in1->in(0) != in1_early && !in1->in(0)->is_Root()) {
+        tty->print("n->in(%d)->in(0): ", i); in1->in(0)->dump();
+      }
+      for (uint j = 1; j < in1->req(); j++) {
+        Node* in2 = in1->in(j);
+        if (in2 != NULL && in2 != n && in2 != in1 && !in2->is_top()) {
+          tty->print("n->in(%d)->in(%d): ", i, j); in2->dump();
+          Node* in2_early = get_ctrl(in2);
+          tty->print("early(n->in(%d)->in(%d)): ", i, j); in2_early->dump();
+          if (in2->in(0) != NULL     && !in2->in(0)->is_top() &&
+              in2->in(0) != in2_early && !in2->in(0)->is_Root()) {
+            tty->print("n->in(%d)->in(%d)->in(0): ", i, j); in2->in(0)->dump();
+          }
+        }
+      }
+    }
+  }
+  tty->cr();
+  tty->print("LCA(n): "); LCA->dump();
+  for (uint i = 0; i < n->outcnt(); i++) {
+    Node* u1 = n->raw_out(i);
+    if (u1 == n)
+      continue;
+    tty->print("n->out(%d): ", i); u1->dump();
+    if (u1->is_CFG()) {
+      for (uint j = 0; j < u1->outcnt(); j++) {
+        Node* u2 = u1->raw_out(j);
+        if (u2 != u1 && u2 != n && u2->is_CFG()) {
+          tty->print("n->out(%d)->out(%d): ", i, j); u2->dump();
+        }
+      }
+    } else {
+      Node* u1_later = get_ctrl(u1);
+      tty->print("later(n->out(%d)): ", i); u1_later->dump();
+      if (u1->in(0) != NULL     && !u1->in(0)->is_top() &&
+          u1->in(0) != u1_later && !u1->in(0)->is_Root()) {
+        tty->print("n->out(%d)->in(0): ", i); u1->in(0)->dump();
+      }
+      for (uint j = 0; j < u1->outcnt(); j++) {
+        Node* u2 = u1->raw_out(j);
+        if (u2 == n || u2 == u1)
+          continue;
+        tty->print("n->out(%d)->out(%d): ", i, j); u2->dump();
+        if (!u2->is_CFG()) {
+          Node* u2_later = get_ctrl(u2);
+          tty->print("later(n->out(%d)->out(%d)): ", i, j); u2_later->dump();
+          if (u2->in(0) != NULL     && !u2->in(0)->is_top() &&
+              u2->in(0) != u2_later && !u2->in(0)->is_Root()) {
+            tty->print("n->out(%d)->in(0): ", i); u2->in(0)->dump();
+          }
+        }
+      }
+    }
+  }
+  tty->cr();
+  int ct = 0;
+  Node *dbg_legal = LCA;
+  while(!dbg_legal->is_Start() && ct < 100) {
+    tty->print("idom[%d] ",ct); dbg_legal->dump();
+    ct++;
+    dbg_legal = idom(dbg_legal);
+  }
+  tty->cr();
+}
+#endif
+
 #ifndef PRODUCT
 //------------------------------dump-------------------------------------------
 void PhaseIdealLoop::dump( ) const {
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/loopnode.hpp
--- a/src/share/vm/opto/loopnode.hpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/loopnode.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1040,6 +1040,10 @@
   bool created_loop_node()     { return _created_loop_node; }
   void register_new_node( Node *n, Node *blk );
 
+#ifdef ASSERT
+void dump_bad_graph(Node* n, Node* early, Node* LCA);
+#endif
+
 #ifndef PRODUCT
   void dump( ) const;
   void dump( IdealLoopTree *loop, uint rpo_idx, Node_List &rpo_list ) const;
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/loopopts.cpp
--- a/src/share/vm/opto/loopopts.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/loopopts.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -819,6 +819,8 @@
     if( iff->is_If() ) {        // Classic split-if?
       if( iff->in(0) != n_ctrl ) return; // Compare must be in same blk as if
     } else if (iff->is_CMove()) { // Trying to split-up a CMOVE
+      // Can't split CMove with different control edge.
+      if (iff->in(0) != NULL && iff->in(0) != n_ctrl ) return;
       if( get_ctrl(iff->in(2)) == n_ctrl ||
           get_ctrl(iff->in(3)) == n_ctrl )
         return;                 // Inputs not yet split-up
@@ -937,7 +939,7 @@
       }
       bool did_break = (i < imax);  // Did we break out of the previous loop?
       if (!did_break && n->outcnt() > 1) { // All uses in outer loops!
-        Node *late_load_ctrl;
+        Node *late_load_ctrl = NULL;
         if (n->is_Load()) {
           // If n is a load, get and save the result from get_late_ctrl(),
           // to be later used in calculating the control for n's clones.
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/macro.cpp
--- a/src/share/vm/opto/macro.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/macro.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1789,7 +1789,8 @@
                          slow_call_address);
 }
 
-//-----------------------mark_eliminated_locking_nodes-----------------------
+//-------------------mark_eliminated_box----------------------------------
+//
 // During EA obj may point to several objects but after few ideal graph
 // transformations (CCP) it may point to only one non escaping object
 // (but still using phi), corresponding locks and unlocks will be marked
@@ -1800,62 +1801,148 @@
 // marked for elimination since new obj has no escape information.
 // Mark all associated (same box and obj) lock and unlock nodes for
 // elimination if some of them marked already.
-void PhaseMacroExpand::mark_eliminated_locking_nodes(AbstractLockNode *alock) {
-  if (!alock->is_eliminated()) {
+void PhaseMacroExpand::mark_eliminated_box(Node* oldbox, Node* obj) {
+  if (oldbox->as_BoxLock()->is_eliminated())
+    return; // This BoxLock node was processed already.
+
+  // New implementation (EliminateNestedLocks) has separate BoxLock
+  // node for each locked region so mark all associated locks/unlocks as
+  // eliminated even if different objects are referenced in one locked region
+  // (for example, OSR compilation of nested loop inside locked scope).
+  if (EliminateNestedLocks ||
+      oldbox->as_BoxLock()->is_simple_lock_region(NULL, obj)) {
+    // Box is used only in one lock region. Mark this box as eliminated.
+    _igvn.hash_delete(oldbox);
+    oldbox->as_BoxLock()->set_eliminated(); // This changes box's hash value
+    _igvn.hash_insert(oldbox);
+
+    for (uint i = 0; i < oldbox->outcnt(); i++) {
+      Node* u = oldbox->raw_out(i);
+      if (u->is_AbstractLock() && !u->as_AbstractLock()->is_non_esc_obj()) {
+        AbstractLockNode* alock = u->as_AbstractLock();
+        // Check lock's box since box could be referenced by Lock's debug info.
+        if (alock->box_node() == oldbox) {
+          // Mark eliminated all related locks and unlocks.
+          alock->set_non_esc_obj();
+        }
+      }
+    }
     return;
   }
-  if (!alock->is_coarsened()) { // Eliminated by EA
-      // Create new "eliminated" BoxLock node and use it
-      // in monitor debug info for the same object.
-      BoxLockNode* oldbox = alock->box_node()->as_BoxLock();
-      Node* obj = alock->obj_node();
-      if (!oldbox->is_eliminated()) {
-        BoxLockNode* newbox = oldbox->clone()->as_BoxLock();
+
+  // Create new "eliminated" BoxLock node and use it in monitor debug info
+  // instead of oldbox for the same object.
+  BoxLockNode* newbox = oldbox->clone()->as_BoxLock();
+
+  // Note: BoxLock node is marked eliminated only here and it is used
+  // to indicate that all associated lock and unlock nodes are marked
+  // for elimination.
+  newbox->set_eliminated();
+  transform_later(newbox);
+
+  // Replace old box node with new box for all users of the same object.
+  for (uint i = 0; i < oldbox->outcnt();) {
+    bool next_edge = true;
+
+    Node* u = oldbox->raw_out(i);
+    if (u->is_AbstractLock()) {
+      AbstractLockNode* alock = u->as_AbstractLock();
+      if (alock->box_node() == oldbox && alock->obj_node()->eqv_uncast(obj)) {
+        // Replace Box and mark eliminated all related locks and unlocks.
+        alock->set_non_esc_obj();
+        _igvn.hash_delete(alock);
+        alock->set_box_node(newbox);
+        _igvn._worklist.push(alock);
+        next_edge = false;
+      }
+    }
+    if (u->is_FastLock() && u->as_FastLock()->obj_node()->eqv_uncast(obj)) {
+      FastLockNode* flock = u->as_FastLock();
+      assert(flock->box_node() == oldbox, "sanity");
+      _igvn.hash_delete(flock);
+      flock->set_box_node(newbox);
+      _igvn._worklist.push(flock);
+      next_edge = false;
+    }
+
+    // Replace old box in monitor debug info.
+    if (u->is_SafePoint() && u->as_SafePoint()->jvms()) {
+      SafePointNode* sfn = u->as_SafePoint();
+      JVMState* youngest_jvms = sfn->jvms();
+      int max_depth = youngest_jvms->depth();
+      for (int depth = 1; depth <= max_depth; depth++) {
+        JVMState* jvms = youngest_jvms->of_depth(depth);
+        int num_mon  = jvms->nof_monitors();
+        // Loop over monitors
+        for (int idx = 0; idx < num_mon; idx++) {
+          Node* obj_node = sfn->monitor_obj(jvms, idx);
+          Node* box_node = sfn->monitor_box(jvms, idx);
+          if (box_node == oldbox && obj_node->eqv_uncast(obj)) {
+            int j = jvms->monitor_box_offset(idx);
+            _igvn.hash_delete(u);
+            u->set_req(j, newbox);
+            _igvn._worklist.push(u);
+            next_edge = false;
+          }
+        }
+      }
+    }
+    if (next_edge) i++;
+  }
+}
+
+//-----------------------mark_eliminated_locking_nodes-----------------------
+void PhaseMacroExpand::mark_eliminated_locking_nodes(AbstractLockNode *alock) {
+  if (EliminateNestedLocks) {
+    if (alock->is_nested()) {
+       assert(alock->box_node()->as_BoxLock()->is_eliminated(), "sanity");
+       return;
+    } else if (!alock->is_non_esc_obj()) { // Not eliminated or coarsened
+      // Only Lock node has JVMState needed here.
+      if (alock->jvms() != NULL && alock->as_Lock()->is_nested_lock_region()) {
+        // Mark eliminated related nested locks and unlocks.
+        Node* obj = alock->obj_node();
+        BoxLockNode* box_node = alock->box_node()->as_BoxLock();
+        assert(!box_node->is_eliminated(), "should not be marked yet");
         // Note: BoxLock node is marked eliminated only here
         // and it is used to indicate that all associated lock
         // and unlock nodes are marked for elimination.
-        newbox->set_eliminated();
-        transform_later(newbox);
-        // Replace old box node with new box for all users
-        // of the same object.
-        for (uint i = 0; i < oldbox->outcnt();) {
-
-          bool next_edge = true;
-          Node* u = oldbox->raw_out(i);
-          if (u->is_AbstractLock() &&
-              u->as_AbstractLock()->obj_node() == obj &&
-              u->as_AbstractLock()->box_node() == oldbox) {
-            // Mark all associated locks and unlocks.
-            u->as_AbstractLock()->set_eliminated();
-            _igvn.hash_delete(u);
-            u->set_req(TypeFunc::Parms + 1, newbox);
-            next_edge = false;
+        box_node->set_eliminated(); // Box's hash is always NO_HASH here
+        for (uint i = 0; i < box_node->outcnt(); i++) {
+          Node* u = box_node->raw_out(i);
+          if (u->is_AbstractLock()) {
+            alock = u->as_AbstractLock();
+            if (alock->box_node() == box_node) {
+              // Verify that this Box is referenced only by related locks.
+              assert(alock->obj_node()->eqv_uncast(obj), "");
+              // Mark all related locks and unlocks.
+              alock->set_nested();
+            }
           }
-          // Replace old box in monitor debug info.
-          if (u->is_SafePoint() && u->as_SafePoint()->jvms()) {
-            SafePointNode* sfn = u->as_SafePoint();
-            JVMState* youngest_jvms = sfn->jvms();
-            int max_depth = youngest_jvms->depth();
-            for (int depth = 1; depth <= max_depth; depth++) {
-              JVMState* jvms = youngest_jvms->of_depth(depth);
-              int num_mon  = jvms->nof_monitors();
-              // Loop over monitors
-              for (int idx = 0; idx < num_mon; idx++) {
-                Node* obj_node = sfn->monitor_obj(jvms, idx);
-                Node* box_node = sfn->monitor_box(jvms, idx);
-                if (box_node == oldbox && obj_node == obj) {
-                  int j = jvms->monitor_box_offset(idx);
-                  _igvn.hash_delete(u);
-                  u->set_req(j, newbox);
-                  next_edge = false;
-                }
-              } // for (int idx = 0;
-            } // for (int depth = 1;
-          } // if (u->is_SafePoint()
-          if (next_edge) i++;
-        } // for (uint i = 0; i < oldbox->outcnt();)
-      } // if (!oldbox->is_eliminated())
-  } // if (!alock->is_coarsened())
+        }
+      }
+      return;
+    }
+    // Process locks for non escaping object
+    assert(alock->is_non_esc_obj(), "");
+  } // EliminateNestedLocks
+
+  if (alock->is_non_esc_obj()) { // Lock is used for non escaping object
+    // Look for all locks of this object and mark them and
+    // corresponding BoxLock nodes as eliminated.
+    Node* obj = alock->obj_node();
+    for (uint j = 0; j < obj->outcnt(); j++) {
+      Node* o = obj->raw_out(j);
+      if (o->is_AbstractLock() &&
+          o->as_AbstractLock()->obj_node()->eqv_uncast(obj)) {
+        alock = o->as_AbstractLock();
+        Node* box = alock->box_node();
+        // Replace old box node with new eliminated box for all users
+        // of the same object and mark related locks as eliminated.
+        mark_eliminated_box(box, obj);
+      }
+    }
+  }
 }
 
 // we have determined that this lock/unlock can be eliminated, we simply
@@ -1870,7 +1957,7 @@
     return false;
   }
 #ifdef ASSERT
-  if (alock->is_Lock() && !alock->is_coarsened()) {
+  if (!alock->is_coarsened()) {
     // Check that new "eliminated" BoxLock node is created.
     BoxLockNode* oldbox = alock->box_node()->as_BoxLock();
     assert(oldbox->is_eliminated(), "should be done already");
@@ -1962,6 +2049,8 @@
   Node* box = lock->box_node();
   Node* flock = lock->fastlock_node();
 
+  assert(!box->as_BoxLock()->is_eliminated(), "sanity");
+
   // Make the merge point
   Node *region;
   Node *mem_phi;
@@ -2196,6 +2285,8 @@
   Node* obj = unlock->obj_node();
   Node* box = unlock->box_node();
 
+  assert(!box->as_BoxLock()->is_eliminated(), "sanity");
+
   // No need for a null check on unlock
 
   // Make the merge point
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/macro.hpp
--- a/src/share/vm/opto/macro.hpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/macro.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -92,6 +92,7 @@
   void process_users_of_allocation(AllocateNode *alloc);
 
   void eliminate_card_mark(Node *cm);
+  void mark_eliminated_box(Node* box, Node* obj);
   void mark_eliminated_locking_nodes(AbstractLockNode *alock);
   bool eliminate_locking_node(AbstractLockNode *alock);
   void expand_lock_node(LockNode *lock);
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/memnode.cpp
--- a/src/share/vm/opto/memnode.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/memnode.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -2201,7 +2201,7 @@
   // unsafe if I have intervening uses...  Also disallowed for StoreCM
   // since they must follow each StoreP operation.  Redundant StoreCMs
   // are eliminated just before matching in final_graph_reshape.
-  if (mem->is_Store() && phase->eqv_uncast(mem->in(MemNode::Address), address) &&
+  if (mem->is_Store() && mem->in(MemNode::Address)->eqv_uncast(address) &&
       mem->Opcode() != Op_StoreCM) {
     // Looking at a dead closed cycle of memory?
     assert(mem != mem->in(MemNode::Memory), "dead loop in StoreNode::Ideal");
@@ -2274,16 +2274,16 @@
 
   // Load then Store?  Then the Store is useless
   if (val->is_Load() &&
-      phase->eqv_uncast( val->in(MemNode::Address), adr ) &&
-      phase->eqv_uncast( val->in(MemNode::Memory ), mem ) &&
+      val->in(MemNode::Address)->eqv_uncast(adr) &&
+      val->in(MemNode::Memory )->eqv_uncast(mem) &&
       val->as_Load()->store_Opcode() == Opcode()) {
     return mem;
   }
 
   // Two stores in a row of the same value?
   if (mem->is_Store() &&
-      phase->eqv_uncast( mem->in(MemNode::Address), adr ) &&
-      phase->eqv_uncast( mem->in(MemNode::ValueIn), val ) &&
+      mem->in(MemNode::Address)->eqv_uncast(adr) &&
+      mem->in(MemNode::ValueIn)->eqv_uncast(val) &&
       mem->Opcode() == Opcode()) {
     return mem;
   }
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/node.cpp
--- a/src/share/vm/opto/node.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/node.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -833,8 +833,20 @@
 
 //---------------------------uncast_helper-------------------------------------
 Node* Node::uncast_helper(const Node* p) {
-  uint max_depth = 3;
-  for (uint i = 0; i < max_depth; i++) {
+#ifdef ASSERT
+  uint depth_count = 0;
+  const Node* orig_p = p;
+#endif
+
+  while (true) {
+#ifdef ASSERT
+    if (depth_count >= K) {
+      orig_p->dump(4);
+      if (p != orig_p)
+        p->dump(1);
+    }
+    assert(depth_count++ < K, "infinite loop in Node::uncast_helper");
+#endif
     if (p == NULL || p->req() != 2) {
       break;
     } else if (p->is_ConstraintCast()) {
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/node.hpp
--- a/src/share/vm/opto/node.hpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/node.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -429,6 +429,10 @@
 
   // Strip away casting.  (It is depth-limited.)
   Node* uncast() const;
+  // Return whether two Nodes are equivalent, after stripping casting.
+  bool eqv_uncast(const Node* n) const {
+    return (this->uncast() == n->uncast());
+  }
 
 private:
   static Node* uncast_helper(const Node* n);
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/output.cpp
--- a/src/share/vm/opto/output.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/output.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -924,10 +924,10 @@
         scval = new ConstantOopWriteValue(tp->is_oopptr()->const_oop()->constant_encoding());
       }
 
-      OptoReg::Name box_reg = BoxLockNode::stack_slot(box_node);
+      OptoReg::Name box_reg = BoxLockNode::reg(box_node);
       Location basic_lock = Location::new_stk_loc(Location::normal,_regalloc->reg2offset(box_reg));
-      while( !box_node->is_BoxLock() )  box_node = box_node->in(1);
-      monarray->append(new MonitorValue(scval, basic_lock, box_node->as_BoxLock()->is_eliminated()));
+      bool eliminated = (box_node->is_BoxLock() && box_node->as_BoxLock()->is_eliminated());
+      monarray->append(new MonitorValue(scval, basic_lock, eliminated));
     }
 
     // We dump the object pool first, since deoptimization reads it in first.
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/parse1.cpp
--- a/src/share/vm/opto/parse1.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/parse1.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -1604,7 +1604,16 @@
           continue;
         default:                // All normal stuff
           if (phi == NULL) {
-            if (!check_elide_phi || !target->can_elide_SEL_phi(j)) {
+            const JVMState* jvms = map()->jvms();
+            if (EliminateNestedLocks &&
+                jvms->is_mon(j) && jvms->is_monitor_box(j)) {
+              // BoxLock nodes are not commoning.
+              // Use old BoxLock node as merged box.
+              assert(newin->jvms()->is_monitor_box(j), "sanity");
+              // This assert also tests that nodes are BoxLock.
+              assert(BoxLockNode::same_slot(n, m), "sanity");
+              C->gvn_replace_by(n, m);
+            } else if (!check_elide_phi || !target->can_elide_SEL_phi(j)) {
               phi = ensure_phi(j, nophi);
             }
           }
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/phaseX.hpp
--- a/src/share/vm/opto/phaseX.hpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/phaseX.hpp	Tue Jan 17 21:25:28 2012 -0500
@@ -256,11 +256,6 @@
   // For pessimistic optimizations this is simply pointer equivalence.
   bool eqv(const Node* n1, const Node* n2) const { return n1 == n2; }
 
-  // Return whether two Nodes are equivalent, after stripping casting.
-  bool eqv_uncast(const Node* n1, const Node* n2) const {
-    return eqv(n1->uncast(), n2->uncast());
-  }
-
   // For pessimistic passes, the return type must monotonically narrow.
   // For optimistic  passes, the return type must monotonically widen.
   // It is possible to get into a "death march" in either type of pass,
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/postaloc.cpp
--- a/src/share/vm/opto/postaloc.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/postaloc.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -89,32 +89,62 @@
   return blk_adjust;
 }
 
+#ifdef ASSERT
+static bool expected_yanked_node(Node *old, Node *orig_old) {
+  // This code is expected only next original nodes:
+  // - load from constant table node which may have next data input nodes:
+  //     MachConstantBase, Phi, MachTemp, MachSpillCopy
+  // - load constant node which may have next data input nodes:
+  //     MachTemp, MachSpillCopy
+  // - MachSpillCopy
+  // - MachProj and Copy dead nodes
+  if (old->is_MachSpillCopy()) {
+    return true;
+  } else if (old->is_Con()) {
+    return true;
+  } else if (old->is_MachProj()) { // Dead kills projection of Con node
+    return (old == orig_old);
+  } else if (old->is_Copy()) {     // Dead copy of a callee-save value
+    return (old == orig_old);
+  } else if (old->is_MachTemp()) {
+    return orig_old->is_Con();
+  } else if (old->is_Phi() || old->is_MachConstantBase()) {
+    return (orig_old->is_Con() && orig_old->is_MachConstant());
+  }
+  return false;
+}
+#endif
+
 //------------------------------yank_if_dead-----------------------------------
-// Removed an edge from 'old'.  Yank if dead.  Return adjustment counts to
+// Removed edges from 'old'.  Yank if dead.  Return adjustment counts to
 // iterators in the current block.
-int PhaseChaitin::yank_if_dead( Node *old, Block *current_block, Node_List *value, Node_List *regnd ) {
+int PhaseChaitin::yank_if_dead_recurse(Node *old, Node *orig_old, Block *current_block,
+                                       Node_List *value, Node_List *regnd) {
   int blk_adjust=0;
-  while (old->outcnt() == 0 && old != C->top()) {
+  if (old->outcnt() == 0 && old != C->top()) {
+#ifdef ASSERT
+    if (!expected_yanked_node(old, orig_old)) {
+      tty->print_cr("==============================================");
+      tty->print_cr("orig_old:");
+      orig_old->dump();
+      tty->print_cr("old:");
+      old->dump();
+      assert(false, "unexpected yanked node");
+    }
+    if (old->is_Con())
+      orig_old = old; // Reset to satisfy expected nodes checks.
+#endif
     blk_adjust += yank(old, current_block, value, regnd);
 
-    Node *tmp = NULL;
     for (uint i = 1; i < old->req(); i++) {
-      if (old->in(i)->is_MachTemp()) {
-        // handle TEMP inputs
-        Node* machtmp = old->in(i);
-        if (machtmp->outcnt() == 1) {
-          assert(machtmp->unique_out() == old, "sanity");
-          blk_adjust += yank(machtmp, current_block, value, regnd);
-          machtmp->disconnect_inputs(NULL);
-        }
-      } else {
-        assert(tmp == NULL, "can't handle more non MachTemp inputs");
-        tmp = old->in(i);
+      Node* n = old->in(i);
+      if (n != NULL) {
+        old->set_req(i, NULL);
+        blk_adjust += yank_if_dead_recurse(n, orig_old, current_block, value, regnd);
       }
     }
+    // Disconnect control and remove precedence edges if any exist
     old->disconnect_inputs(NULL);
-    if( !tmp ) break;
-    old = tmp;
   }
   return blk_adjust;
 }
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/opto/subnode.cpp
--- a/src/share/vm/opto/subnode.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/opto/subnode.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -91,7 +91,7 @@
 
   // Not correct for SubFnode and AddFNode (must check for infinity)
   // Equal?  Subtract is zero
-  if (phase->eqv_uncast(in1, in2))  return add_id();
+  if (in1->eqv_uncast(in2))  return add_id();
 
   // Either input is BOTTOM ==> the result is the local BOTTOM
   if( t1 == Type::BOTTOM || t2 == Type::BOTTOM )
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/runtime/arguments.cpp
--- a/src/share/vm/runtime/arguments.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/runtime/arguments.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -3160,6 +3160,9 @@
   if (!UseBiasedLocking || EmitSync != 0) {
     UseOptoBiasInlining = false;
   }
+  if (!EliminateLocks) {
+    EliminateNestedLocks = false;
+  }
 #endif
 
   if (PrintAssembly && FLAG_IS_DEFAULT(DebugNonSafepoints)) {
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/runtime/deoptimization.cpp
--- a/src/share/vm/runtime/deoptimization.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/runtime/deoptimization.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -211,7 +211,7 @@
 #ifdef COMPILER2
   // Reallocate the non-escaping objects and restore their fields. Then
   // relock objects if synchronization on them was eliminated.
-  if (DoEscapeAnalysis) {
+  if (DoEscapeAnalysis || EliminateNestedLocks) {
     if (EliminateAllocations) {
       assert (chunk->at(0)->scope() != NULL,"expect only compiled java frames");
       GrowableArray<ScopeValue*>* objects = chunk->at(0)->scope()->objects();
diff -r d7e3846464d0 -r 6520f9861937 src/share/vm/runtime/vmStructs.cpp
--- a/src/share/vm/runtime/vmStructs.cpp	Tue Jan 17 13:08:52 2012 -0500
+++ b/src/share/vm/runtime/vmStructs.cpp	Tue Jan 17 21:25:28 2012 -0500
@@ -307,7 +307,7 @@
   nonstatic_field(instanceKlass,               _static_field_size,                            int)                                   \
   nonstatic_field(instanceKlass,               _static_oop_field_count,                       u2)                                   \
   nonstatic_field(instanceKlass,               _nonstatic_oop_map_size,                       int)                                   \
-  nonstatic_field(instanceKlass,               _misc_flags,                                   u1)                                    \
+  nonstatic_field(instanceKlass,               _is_marked_dependent,                          bool)                                  \
   nonstatic_field(instanceKlass,               _minor_version,                                u2)                                    \
   nonstatic_field(instanceKlass,               _major_version,                                u2)                                    \
   nonstatic_field(instanceKlass,               _init_state,                                   u1)                                    \
@@ -2386,7 +2386,6 @@
   declare_constant(instanceKlass::being_initialized)                      \
   declare_constant(instanceKlass::fully_initialized)                      \
   declare_constant(instanceKlass::initialization_error)                   \
-  declare_constant(instanceKlass::IS_MARKED_DEPENDENT)                    \
                                                                           \
   /*********************************/                                     \
   /* Symbol* - symbol max length */                                     \
diff -r d7e3846464d0 -r 6520f9861937 test/compiler/7116216/StackOverflow.java
--- a/test/compiler/7116216/StackOverflow.java	Tue Jan 17 13:08:52 2012 -0500
+++ b/test/compiler/7116216/StackOverflow.java	Tue Jan 17 21:25:28 2012 -0500
@@ -30,7 +30,7 @@
  * @run main/othervm -Xcomp -Xbatch StackOverflow
  */
 
-class StackOverflow {
+public class StackOverflow {
     static String stackOverflow_largeFrame_liveOopForGC;
 
     public static int stackOverflow_largeFrame(int call_count, String liveOopForGC) {