changeset 4787:2ace1c4ee8da

6888336: G1: avoid explicitly marking and pushing objects in survivor spaces Summary: This change simplifies the interaction between GC and concurrent marking. By disabling survivor spaces during the initial-mark pause we don't need to propagate marks of objects we copy during each GC (since we never need to copy an explicitly marked object). Reviewed-by: johnc, brutisso
author tonyp
date Tue, 10 Jan 2012 18:58:13 -0500
parents 1d6185f732aa
children 9d4f4a1825e4
files src/share/vm/gc_implementation/g1/concurrentMark.cpp src/share/vm/gc_implementation/g1/concurrentMark.hpp src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp src/share/vm/gc_implementation/g1/g1EvacFailure.hpp src/share/vm/gc_implementation/g1/g1OopClosures.hpp src/share/vm/gc_implementation/g1/heapRegion.cpp src/share/vm/gc_implementation/g1/heapRegion.hpp src/share/vm/gc_implementation/g1/heapRegion.inline.hpp src/share/vm/gc_implementation/g1/ptrQueue.hpp src/share/vm/gc_implementation/g1/satbQueue.cpp src/share/vm/gc_implementation/g1/satbQueue.hpp
diffstat 15 files changed, 815 insertions(+), 448 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Tue Jan 10 20:02:41 2012 +0100
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Tue Jan 10 18:58:13 2012 -0500
@@ -31,6 +31,7 @@
 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.hpp"
+#include "gc_implementation/g1/heapRegion.inline.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "gc_implementation/shared/vmGCOperations.hpp"
@@ -183,12 +184,11 @@
 void CMMarkStack::allocate(size_t size) {
   _base = NEW_C_HEAP_ARRAY(oop, size);
   if (_base == NULL) {
-    vm_exit_during_initialization("Failed to allocate "
-                                  "CM region mark stack");
+    vm_exit_during_initialization("Failed to allocate CM region mark stack");
   }
   _index = 0;
   _capacity = (jint) size;
-  _oops_do_bound = -1;
+  _saved_index = -1;
   NOT_PRODUCT(_max_depth = 0);
 }
 
@@ -283,7 +283,6 @@
   }
 }
 
-
 CMRegionStack::CMRegionStack() : _base(NULL) {}
 
 void CMRegionStack::allocate(size_t size) {
@@ -302,6 +301,8 @@
 }
 
 void CMRegionStack::push_lock_free(MemRegion mr) {
+  guarantee(false, "push_lock_free(): don't call this any more");
+
   assert(mr.word_size() > 0, "Precondition");
   while (true) {
     jint index = _index;
@@ -325,6 +326,8 @@
 // marking / remark phases. Should only be called in tandem with
 // other lock-free pops.
 MemRegion CMRegionStack::pop_lock_free() {
+  guarantee(false, "pop_lock_free(): don't call this any more");
+
   while (true) {
     jint index = _index;
 
@@ -390,6 +393,8 @@
 #endif
 
 bool CMRegionStack::invalidate_entries_into_cset() {
+  guarantee(false, "invalidate_entries_into_cset(): don't call this any more");
+
   bool result = false;
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   for (int i = 0; i < _oops_do_bound; ++i) {
@@ -438,14 +443,29 @@
   return res;
 }
 
+void CMMarkStack::note_start_of_gc() {
+  assert(_saved_index == -1,
+         "note_start_of_gc()/end_of_gc() bracketed incorrectly");
+  _saved_index = _index;
+}
+
+void CMMarkStack::note_end_of_gc() {
+  // This is intentionally a guarantee, instead of an assert. If we
+  // accidentally add something to the mark stack during GC, it
+  // will be a correctness issue so it's better if we crash. we'll
+  // only check this once per GC anyway, so it won't be a performance
+  // issue in any way.
+  guarantee(_saved_index == _index,
+            err_msg("saved index: %d index: %d", _saved_index, _index));
+  _saved_index = -1;
+}
+
 void CMMarkStack::oops_do(OopClosure* f) {
-  if (_index == 0) return;
-  assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
-         "Bound must be set.");
-  for (int i = 0; i < _oops_do_bound; i++) {
+  assert(_saved_index == _index,
+         err_msg("saved index: %d index: %d", _saved_index, _index));
+  for (int i = 0; i < _index; i += 1) {
     f->do_oop(&_base[i]);
   }
-  _oops_do_bound = -1;
 }
 
 bool ConcurrentMark::not_yet_marked(oop obj) const {
@@ -783,7 +803,7 @@
 public:
   bool doHeapRegion(HeapRegion* r) {
     if (!r->continuesHumongous()) {
-      r->note_start_of_marking(true);
+      r->note_start_of_marking();
     }
     return false;
   }
@@ -804,6 +824,10 @@
 
   // Initialise marking structures. This has to be done in a STW phase.
   reset();
+
+  // For each region note start of marking.
+  NoteStartOfMarkHRClosure startcl;
+  g1h->heap_region_iterate(&startcl);
 }
 
 
@@ -818,10 +842,6 @@
   // every remark and we'll eventually not need to cause one.
   force_overflow_stw()->init();
 
-  // For each region note start of marking.
-  NoteStartOfMarkHRClosure startcl;
-  g1h->heap_region_iterate(&startcl);
-
   // Start Concurrent Marking weak-reference discovery.
   ReferenceProcessor* rp = g1h->ref_processor_cm();
   // enable ("weak") refs discovery
@@ -946,22 +966,9 @@
 }
 #endif // !PRODUCT
 
-void ConcurrentMark::grayRoot(oop p) {
-  HeapWord* addr = (HeapWord*) p;
-  // We can't really check against _heap_start and _heap_end, since it
-  // is possible during an evacuation pause with piggy-backed
-  // initial-mark that the committed space is expanded during the
-  // pause without CM observing this change. So the assertions below
-  // is a bit conservative; but better than nothing.
-  assert(_g1h->g1_committed().contains(addr),
-         "address should be within the heap bounds");
-
-  if (!_nextMarkBitMap->isMarked(addr)) {
-    _nextMarkBitMap->parMark(addr);
-  }
-}
-
 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
+  guarantee(false, "grayRegionIfNecessary(): don't call this any more");
+
   // The objects on the region have already been marked "in bulk" by
   // the caller. We only need to decide whether to push the region on
   // the region stack or not.
@@ -1007,6 +1014,8 @@
 }
 
 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
+  guarantee(false, "markAndGrayObjectIfNecessary(): don't call this any more");
+
   // The object is not marked by the caller. We need to at least mark
   // it and maybe push in on the stack.
 
@@ -1224,7 +1233,6 @@
                                        true /* expected_active */);
 
     if (VerifyDuringGC) {
-
       HandleMark hm;  // handle scope
       gclog_or_tty->print(" VerifyDuringGC:(after)");
       Universe::heap()->prepare_for_verify();
@@ -1879,10 +1887,6 @@
   double end = os::elapsedTime();
   _cleanup_times.add((end - start) * 1000.0);
 
-  // G1CollectedHeap::heap()->print();
-  // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
-  // G1CollectedHeap::heap()->get_gc_time_stamp());
-
   if (PrintGC || PrintGCDetails) {
     g1h->print_size_transition(gclog_or_tty,
                                start_used_bytes,
@@ -2669,6 +2673,8 @@
 }
 
 void ConcurrentMark::drainAllSATBBuffers() {
+  guarantee(false, "drainAllSATBBuffers(): don't call this any more");
+
   CMGlobalObjectClosure oc(this);
   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   satb_mq_set.set_closure(&oc);
@@ -2687,12 +2693,6 @@
   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
 }
 
-void ConcurrentMark::markPrev(oop p) {
-  // Note we are overriding the read-only view of the prev map here, via
-  // the cast.
-  ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
-}
-
 void ConcurrentMark::clear(oop p) {
   assert(p != NULL && p->is_oop(), "expected an oop");
   HeapWord* addr = (HeapWord*)p;
@@ -2702,13 +2702,21 @@
   _nextMarkBitMap->clear(addr);
 }
 
-void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
+void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
   // Note we are overriding the read-only view of the prev map here, via
   // the cast.
   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
+}
+
+void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
   _nextMarkBitMap->clearRange(mr);
 }
 
+void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
+  clearRangePrevBitmap(mr);
+  clearRangeNextBitmap(mr);
+}
+
 HeapRegion*
 ConcurrentMark::claim_region(int task_num) {
   // "checkpoint" the finger
@@ -2803,6 +2811,9 @@
 }
 
 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
+  guarantee(false, "invalidate_aborted_regions_in_cset(): "
+                   "don't call this any more");
+
   bool result = false;
   for (int i = 0; i < (int)_max_task_num; ++i) {
     CMTask* the_task = _tasks[i];
@@ -2854,24 +2865,135 @@
     // ...then over the contents of the all the task queues.
     queue->oops_do(cl);
   }
-
-  // Invalidate any entries, that are in the region stack, that
-  // point into the collection set
-  if (_regionStack.invalidate_entries_into_cset()) {
-    // otherwise, any gray objects copied during the evacuation pause
-    // might not be visited.
-    assert(_should_gray_objects, "invariant");
+}
+
+#ifndef PRODUCT
+enum VerifyNoCSetOopsPhase {
+  VerifyNoCSetOopsStack,
+  VerifyNoCSetOopsQueues,
+  VerifyNoCSetOopsSATBCompleted,
+  VerifyNoCSetOopsSATBThread
+};
+
+class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
+private:
+  G1CollectedHeap* _g1h;
+  VerifyNoCSetOopsPhase _phase;
+  int _info;
+
+  const char* phase_str() {
+    switch (_phase) {
+    case VerifyNoCSetOopsStack:         return "Stack";
+    case VerifyNoCSetOopsQueues:        return "Queue";
+    case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
+    case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
+    default:                            ShouldNotReachHere();
+    }
+    return NULL;
+  }
+
+  void do_object_work(oop obj) {
+    guarantee(!_g1h->obj_in_cs(obj),
+              err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
+                      (void*) obj, phase_str(), _info));
+  }
+
+public:
+  VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
+
+  void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
+    _phase = phase;
+    _info = info;
+  }
+
+  virtual void do_oop(oop* p) {
+    oop obj = oopDesc::load_decode_heap_oop(p);
+    do_object_work(obj);
+  }
+
+  virtual void do_oop(narrowOop* p) {
+    // We should not come across narrow oops while scanning marking
+    // stacks and SATB buffers.
+    ShouldNotReachHere();
+  }
+
+  virtual void do_object(oop obj) {
+    do_object_work(obj);
+  }
+};
+
+void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
+                                         bool verify_enqueued_buffers,
+                                         bool verify_thread_buffers,
+                                         bool verify_fingers) {
+  assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
+  if (!G1CollectedHeap::heap()->mark_in_progress()) {
+    return;
   }
 
-  // Invalidate any aborted regions, recorded in the individual CM
-  // tasks, that point into the collection set.
-  if (invalidate_aborted_regions_in_cset()) {
-    // otherwise, any gray objects copied during the evacuation pause
-    // might not be visited.
-    assert(_should_gray_objects, "invariant");
+  VerifyNoCSetOopsClosure cl;
+
+  if (verify_stacks) {
+    // Verify entries on the global mark stack
+    cl.set_phase(VerifyNoCSetOopsStack);
+    _markStack.oops_do(&cl);
+
+    // Verify entries on the task queues
+    for (int i = 0; i < (int) _max_task_num; i += 1) {
+      cl.set_phase(VerifyNoCSetOopsQueues, i);
+      OopTaskQueue* queue = _task_queues->queue(i);
+      queue->oops_do(&cl);
+    }
+  }
+
+  SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
+
+  // Verify entries on the enqueued SATB buffers
+  if (verify_enqueued_buffers) {
+    cl.set_phase(VerifyNoCSetOopsSATBCompleted);
+    satb_qs.iterate_completed_buffers_read_only(&cl);
+  }
+
+  // Verify entries on the per-thread SATB buffers
+  if (verify_thread_buffers) {
+    cl.set_phase(VerifyNoCSetOopsSATBThread);
+    satb_qs.iterate_thread_buffers_read_only(&cl);
   }
 
+  if (verify_fingers) {
+    // Verify the global finger
+    HeapWord* global_finger = finger();
+    if (global_finger != NULL && global_finger < _heap_end) {
+      // The global finger always points to a heap region boundary. We
+      // use heap_region_containing_raw() to get the containing region
+      // given that the global finger could be pointing to a free region
+      // which subsequently becomes continues humongous. If that
+      // happens, heap_region_containing() will return the bottom of the
+      // corresponding starts humongous region and the check below will
+      // not hold any more.
+      HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
+      guarantee(global_finger == global_hr->bottom(),
+                err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
+                        global_finger, HR_FORMAT_PARAMS(global_hr)));
+    }
+
+    // Verify the task fingers
+    assert(parallel_marking_threads() <= _max_task_num, "sanity");
+    for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
+      CMTask* task = _tasks[i];
+      HeapWord* task_finger = task->finger();
+      if (task_finger != NULL && task_finger < _heap_end) {
+        // See above note on the global finger verification.
+        HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
+        guarantee(task_finger == task_hr->bottom() ||
+                  !task_hr->in_collection_set(),
+                  err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
+                          task_finger, HR_FORMAT_PARAMS(task_hr)));
+      }
+    }
+  }
 }
+#endif // PRODUCT
 
 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
   _markStack.setEmpty();
@@ -3099,6 +3221,9 @@
 };
 
 void ConcurrentMark::complete_marking_in_collection_set() {
+  guarantee(false, "complete_marking_in_collection_set(): "
+                   "don't call this any more");
+
   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
 
   if (!g1h->mark_in_progress()) {
@@ -3146,6 +3271,8 @@
 // newCSet().
 
 void ConcurrentMark::newCSet() {
+  guarantee(false, "newCSet(): don't call this any more");
+
   if (!concurrent_marking_in_progress()) {
     // nothing to do if marking is not in progress
     return;
@@ -3184,6 +3311,8 @@
 }
 
 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
+  guarantee(false, "registerCSetRegion(): don't call this any more");
+
   if (!concurrent_marking_in_progress()) return;
 
   HeapWord* region_end = hr->end();
@@ -3195,6 +3324,9 @@
 // Resets the region fields of active CMTasks whose values point
 // into the collection set.
 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
+  guarantee(false, "reset_active_task_region_fields_in_cset(): "
+                   "don't call this any more");
+
   assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
   assert(parallel_marking_threads() <= _max_task_num, "sanity");
 
@@ -3905,6 +4037,10 @@
 }
 
 void CMTask::drain_region_stack(BitMapClosure* bc) {
+  assert(_cm->region_stack_empty(), "region stack should be empty");
+  assert(_aborted_region.is_empty(), "aborted region should be empty");
+  return;
+
   if (has_aborted()) return;
 
   assert(_region_finger == NULL,
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Tue Jan 10 20:02:41 2012 +0100
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Tue Jan 10 18:58:13 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -166,10 +166,10 @@
 // Ideally this should be GrowableArray<> just like MSC's marking stack(s).
 class CMMarkStack VALUE_OBJ_CLASS_SPEC {
   ConcurrentMark* _cm;
-  oop*   _base;      // bottom of stack
-  jint   _index;     // one more than last occupied index
-  jint   _capacity;  // max #elements
-  jint   _oops_do_bound;  // Number of elements to include in next iteration.
+  oop*   _base;        // bottom of stack
+  jint   _index;       // one more than last occupied index
+  jint   _capacity;    // max #elements
+  jint   _saved_index; // value of _index saved at start of GC
   NOT_PRODUCT(jint _max_depth;)  // max depth plumbed during run
 
   bool   _overflow;
@@ -247,16 +247,12 @@
 
   void setEmpty()   { _index = 0; clear_overflow(); }
 
-  // Record the current size; a subsequent "oops_do" will iterate only over
-  // indices valid at the time of this call.
-  void set_oops_do_bound(jint bound = -1) {
-    if (bound == -1) {
-      _oops_do_bound = _index;
-    } else {
-      _oops_do_bound = bound;
-    }
-  }
-  jint oops_do_bound() { return _oops_do_bound; }
+  // Record the current index.
+  void note_start_of_gc();
+
+  // Make sure that we have not added any entries to the stack during GC.
+  void note_end_of_gc();
+
   // iterate over the oops in the mark stack, up to the bound recorded via
   // the call above.
   void oops_do(OopClosure* f);
@@ -724,10 +720,9 @@
   // G1CollectedHeap
 
   // This notifies CM that a root during initial-mark needs to be
-  // grayed and it's MT-safe. Currently, we just mark it. But, in the
-  // future, we can experiment with pushing it on the stack and we can
-  // do this without changing G1CollectedHeap.
-  void grayRoot(oop p);
+  // grayed. It is MT-safe.
+  inline void grayRoot(oop obj, size_t word_size);
+
   // It's used during evacuation pauses to gray a region, if
   // necessary, and it's MT-safe. It assumes that the caller has
   // marked any objects on that region. If _should_gray_objects is
@@ -735,6 +730,7 @@
   // pushed on the region stack, if it is located below the global
   // finger, otherwise we do nothing.
   void grayRegionIfNecessary(MemRegion mr);
+
   // It's used during evacuation pauses to mark and, if necessary,
   // gray a single object and it's MT-safe. It assumes the caller did
   // not mark the object. If _should_gray_objects is true and we're
@@ -791,24 +787,40 @@
 
   // Mark in the previous bitmap.  NB: this is usually read-only, so use
   // this carefully!
-  void markPrev(oop p);
+  inline void markPrev(oop p);
+  inline void markNext(oop p);
   void clear(oop p);
-  // Clears marks for all objects in the given range, for both prev and
-  // next bitmaps.  NB: the previous bitmap is usually read-only, so use
-  // this carefully!
-  void clearRangeBothMaps(MemRegion mr);
+  // Clears marks for all objects in the given range, for the prev,
+  // next, or both bitmaps.  NB: the previous bitmap is usually
+  // read-only, so use this carefully!
+  void clearRangePrevBitmap(MemRegion mr);
+  void clearRangeNextBitmap(MemRegion mr);
+  void clearRangeBothBitmaps(MemRegion mr);
 
-  // Record the current top of the mark and region stacks; a
-  // subsequent oops_do() on the mark stack and
-  // invalidate_entries_into_cset() on the region stack will iterate
-  // only over indices valid at the time of this call.
-  void set_oops_do_bound() {
-    _markStack.set_oops_do_bound();
-    _regionStack.set_oops_do_bound();
+  // Notify data structures that a GC has started.
+  void note_start_of_gc() {
+    _markStack.note_start_of_gc();
   }
+
+  // Notify data structures that a GC is finished.
+  void note_end_of_gc() {
+    _markStack.note_end_of_gc();
+  }
+
   // Iterate over the oops in the mark stack and all local queues. It
   // also calls invalidate_entries_into_cset() on the region stack.
   void oops_do(OopClosure* f);
+
+  // Verify that there are no CSet oops on the stacks (taskqueues /
+  // global mark stack), enqueued SATB buffers, per-thread SATB
+  // buffers, and fingers (global / per-task). The boolean parameters
+  // decide which of the above data structures to verify. If marking
+  // is not in progress, it's a no-op.
+  void verify_no_cset_oops(bool verify_stacks,
+                           bool verify_enqueued_buffers,
+                           bool verify_thread_buffers,
+                           bool verify_fingers) PRODUCT_RETURN;
+
   // It is called at the end of an evacuation pause during marking so
   // that CM is notified of where the new end of the heap is. It
   // doesn't do anything if concurrent_marking_in_progress() is false,
@@ -1166,6 +1178,7 @@
   // It keeps picking SATB buffers and processing them until no SATB
   // buffers are available.
   void drain_satb_buffers();
+
   // It keeps popping regions from the region stack and processing
   // them until the region stack is empty.
   void drain_region_stack(BitMapClosure* closure);
--- a/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp	Tue Jan 10 20:02:41 2012 +0100
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp	Tue Jan 10 18:58:13 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -153,4 +153,46 @@
   }
 }
 
+inline void ConcurrentMark::markPrev(oop p) {
+  assert(!_prevMarkBitMap->isMarked((HeapWord*) p), "sanity");
+  // Note we are overriding the read-only view of the prev map here, via
+  // the cast.
+  ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*) p);
+}
+
+inline void ConcurrentMark::markNext(oop p) {
+  assert(!_nextMarkBitMap->isMarked((HeapWord*) p), "sanity");
+  _nextMarkBitMap->mark((HeapWord*) p);
+}
+
+inline void ConcurrentMark::grayRoot(oop obj, size_t word_size) {
+  HeapWord* addr = (HeapWord*) obj;
+
+  // Currently we don't do anything with word_size but we will use it
+  // in the very near future in the liveness calculation piggy-backing
+  // changes.
+
+#ifdef ASSERT
+  HeapRegion* hr = _g1h->heap_region_containing(addr);
+  assert(hr != NULL, "sanity");
+  assert(!hr->is_survivor(), "should not allocate survivors during IM");
+  assert(addr < hr->next_top_at_mark_start(),
+         err_msg("addr: "PTR_FORMAT" hr: "HR_FORMAT" NTAMS: "PTR_FORMAT,
+                 addr, HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start()));
+  // We cannot assert that word_size == obj->size() given that obj
+  // might not be in a consistent state (another thread might be in
+  // the process of copying it). So the best thing we can do is to
+  // assert that word_size is under an upper bound which is its
+  // containing region's capacity.
+  assert(word_size * HeapWordSize <= hr->capacity(),
+         err_msg("size: "SIZE_FORMAT" capacity: "SIZE_FORMAT" "HR_FORMAT,
+                 word_size * HeapWordSize, hr->capacity(),
+                 HR_FORMAT_PARAMS(hr)));
+#endif // ASSERT
+
+  if (!_nextMarkBitMap->isMarked(addr)) {
+    _nextMarkBitMap->parMark(addr);
+  }
+}
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Jan 10 20:02:41 2012 +0100
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Jan 10 18:58:13 2012 -0500
@@ -36,6 +36,7 @@
 #include "gc_implementation/g1/g1MarkSweep.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.inline.hpp"
+#include "gc_implementation/g1/heapRegion.inline.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "gc_implementation/g1/vm_operations_g1.hpp"
@@ -3018,14 +3019,20 @@
       } else {
         VerifyObjsInRegionClosure not_dead_yet_cl(r, _vo);
         r->object_iterate(&not_dead_yet_cl);
-        if (r->max_live_bytes() < not_dead_yet_cl.live_bytes()) {
-          gclog_or_tty->print_cr("["PTR_FORMAT","PTR_FORMAT"] "
-                                 "max_live_bytes "SIZE_FORMAT" "
-                                 "< calculated "SIZE_FORMAT,
-                                 r->bottom(), r->end(),
-                                 r->max_live_bytes(),
+        if (_vo != VerifyOption_G1UseNextMarking) {
+          if (r->max_live_bytes() < not_dead_yet_cl.live_bytes()) {
+            gclog_or_tty->print_cr("["PTR_FORMAT","PTR_FORMAT"] "
+                                   "max_live_bytes "SIZE_FORMAT" "
+                                   "< calculated "SIZE_FORMAT,
+                                   r->bottom(), r->end(),
+                                   r->max_live_bytes(),
                                  not_dead_yet_cl.live_bytes());
-          _failures = true;
+            _failures = true;
+          }
+        } else {
+          // When vo == UseNextMarking we cannot currently do a sanity
+          // check on the live bytes as the calculation has not been
+          // finalized yet.
         }
       }
     }
@@ -3659,25 +3666,6 @@
         }
         perm_gen()->save_marks();
 
-        // We must do this before any possible evacuation that should propagate
-        // marks.
-        if (mark_in_progress()) {
-          double start_time_sec = os::elapsedTime();
-
-          _cm->drainAllSATBBuffers();
-          double finish_mark_ms = (os::elapsedTime() - start_time_sec) * 1000.0;
-          g1_policy()->record_satb_drain_time(finish_mark_ms);
-        }
-        // Record the number of elements currently on the mark stack, so we
-        // only iterate over these.  (Since evacuation may add to the mark
-        // stack, doing more exposes race conditions.)  If no mark is in
-        // progress, this will be zero.
-        _cm->set_oops_do_bound();
-
-        if (mark_in_progress()) {
-          concurrent_mark()->newCSet();
-        }
-
 #if YOUNG_LIST_VERBOSE
         gclog_or_tty->print_cr("\nBefore choosing collection set.\nYoung_list:");
         _young_list->print();
@@ -3686,6 +3674,16 @@
 
         g1_policy()->choose_collection_set(target_pause_time_ms);
 
+        _cm->note_start_of_gc();
+        // We should not verify the per-thread SATB buffers given that
+        // we have not filtered them yet (we'll do so during the
+        // GC). We also call this after choose_collection_set() to
+        // ensure that the CSet has been finalized.
+        _cm->verify_no_cset_oops(true  /* verify_stacks */,
+                                 true  /* verify_enqueued_buffers */,
+                                 false /* verify_thread_buffers */,
+                                 true  /* verify_fingers */);
+
         if (_hr_printer.is_active()) {
           HeapRegion* hr = g1_policy()->collection_set();
           while (hr != NULL) {
@@ -3702,16 +3700,6 @@
           }
         }
 
-        // We have chosen the complete collection set. If marking is
-        // active then, we clear the region fields of any of the
-        // concurrent marking tasks whose region fields point into
-        // the collection set as these values will become stale. This
-        // will cause the owning marking threads to claim a new region
-        // when marking restarts.
-        if (mark_in_progress()) {
-          concurrent_mark()->reset_active_task_region_fields_in_cset();
-        }
-
 #ifdef ASSERT
         VerifyCSetClosure cl;
         collection_set_iterate(&cl);
@@ -3725,6 +3713,16 @@
         // Actually do the work...
         evacuate_collection_set();
 
+        // We do this to mainly verify the per-thread SATB buffers
+        // (which have been filtered by now) since we didn't verify
+        // them earlier. No point in re-checking the stacks / enqueued
+        // buffers given that the CSet has not changed since last time
+        // we checked.
+        _cm->verify_no_cset_oops(false /* verify_stacks */,
+                                 false /* verify_enqueued_buffers */,
+                                 true  /* verify_thread_buffers */,
+                                 true  /* verify_fingers */);
+
         free_collection_set(g1_policy()->collection_set());
         g1_policy()->clear_collection_set();
 
@@ -3804,6 +3802,14 @@
           }
         }
 
+        // We redo the verificaiton but now wrt to the new CSet which
+        // has just got initialized after the previous CSet was freed.
+        _cm->verify_no_cset_oops(true  /* verify_stacks */,
+                                 true  /* verify_enqueued_buffers */,
+                                 true  /* verify_thread_buffers */,
+                                 true  /* verify_fingers */);
+        _cm->note_end_of_gc();
+
         double end_time_sec = os::elapsedTime();
         double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
         g1_policy()->record_pause_time_ms(pause_time_ms);
@@ -3954,6 +3960,8 @@
     // we allocate to in the region sets. We'll re-add it later, when
     // it's retired again.
     _old_set.remove(retained_region);
+    bool during_im = g1_policy()->during_initial_mark_pause();
+    retained_region->note_start_of_copying(during_im);
     _old_gc_alloc_region.set(retained_region);
     _hr_printer.reuse(retained_region);
   }
@@ -4047,8 +4055,7 @@
 
 oop
 G1CollectedHeap::handle_evacuation_failure_par(OopsInHeapRegionClosure* cl,
-                                               oop old,
-                                               bool should_mark_root) {
+                                               oop old) {
   assert(obj_in_cs(old),
          err_msg("obj: "PTR_FORMAT" should still be in the CSet",
                  (HeapWord*) old));
@@ -4057,15 +4064,6 @@
   if (forward_ptr == NULL) {
     // Forward-to-self succeeded.
 
-    // should_mark_root will be true when this routine is called
-    // from a root scanning closure during an initial mark pause.
-    // In this case the thread that succeeds in self-forwarding the
-    // object is also responsible for marking the object.
-    if (should_mark_root) {
-      assert(!oopDesc::is_null(old), "shouldn't be");
-      _cm->grayRoot(old);
-    }
-
     if (_evac_failure_closure != cl) {
       MutexLockerEx x(EvacFailureStack_lock, Mutex::_no_safepoint_check_flag);
       assert(!_drain_in_progress,
@@ -4161,30 +4159,8 @@
   return NULL;
 }
 
-#ifndef PRODUCT
-bool GCLabBitMapClosure::do_bit(size_t offset) {
-  HeapWord* addr = _bitmap->offsetToHeapWord(offset);
-  guarantee(_cm->isMarked(oop(addr)), "it should be!");
-  return true;
-}
-#endif // PRODUCT
-
 G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size) :
-  ParGCAllocBuffer(gclab_word_size),
-  _should_mark_objects(false),
-  _bitmap(G1CollectedHeap::heap()->reserved_region().start(), gclab_word_size),
-  _retired(false)
-{
-  //_should_mark_objects is set to true when G1ParCopyHelper needs to
-  // mark the forwarded location of an evacuated object.
-  // We set _should_mark_objects to true if marking is active, i.e. when we
-  // need to propagate a mark, or during an initial mark pause, i.e. when we
-  // need to mark objects immediately reachable by the roots.
-  if (G1CollectedHeap::heap()->mark_in_progress() ||
-      G1CollectedHeap::heap()->g1_policy()->during_initial_mark_pause()) {
-    _should_mark_objects = true;
-  }
-}
+  ParGCAllocBuffer(gclab_word_size), _retired(false) { }
 
 G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num)
   : _g1h(g1h),
@@ -4198,8 +4174,7 @@
     _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured)),
     _age_table(false),
     _strong_roots_time(0), _term_time(0),
-    _alloc_buffer_waste(0), _undo_waste(0)
-{
+    _alloc_buffer_waste(0), _undo_waste(0) {
   // we allocate G1YoungSurvRateNumRegions plus one entries, since
   // we "sacrifice" entry 0 to keep track of surviving bytes for
   // non-young regions (where the age is -1)
@@ -4304,35 +4279,53 @@
   } while (!refs()->is_empty());
 }
 
-G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1,
+                                     G1ParScanThreadState* par_scan_state) :
   _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
   _par_scan_state(par_scan_state),
   _during_initial_mark(_g1->g1_policy()->during_initial_mark_pause()),
   _mark_in_progress(_g1->mark_in_progress()) { }
 
-template <class T> void G1ParCopyHelper::mark_object(T* p) {
-  // This is called from do_oop_work for objects that are not
-  // in the collection set. Objects in the collection set
-  // are marked after they have been evacuated.
-
-  T heap_oop = oopDesc::load_heap_oop(p);
-  if (!oopDesc::is_null(heap_oop)) {
-    oop obj = oopDesc::decode_heap_oop(heap_oop);
-    HeapWord* addr = (HeapWord*)obj;
-    if (_g1->is_in_g1_reserved(addr)) {
-      _cm->grayRoot(oop(addr));
-    }
-  }
-}
-
-oop G1ParCopyHelper::copy_to_survivor_space(oop old, bool should_mark_root,
-                                                     bool should_mark_copy) {
+void G1ParCopyHelper::mark_object(oop obj) {
+#ifdef ASSERT
+  HeapRegion* hr = _g1->heap_region_containing(obj);
+  assert(hr != NULL, "sanity");
+  assert(!hr->in_collection_set(), "should not mark objects in the CSet");
+#endif // ASSERT
+
+  // We know that the object is not moving so it's safe to read its size.
+  _cm->grayRoot(obj, (size_t) obj->size());
+}
+
+void G1ParCopyHelper::mark_forwarded_object(oop from_obj, oop to_obj) {
+#ifdef ASSERT
+  assert(from_obj->is_forwarded(), "from obj should be forwarded");
+  assert(from_obj->forwardee() == to_obj, "to obj should be the forwardee");
+  assert(from_obj != to_obj, "should not be self-forwarded");
+
+  HeapRegion* from_hr = _g1->heap_region_containing(from_obj);
+  assert(from_hr != NULL, "sanity");
+  assert(from_hr->in_collection_set(), "from obj should be in the CSet");
+
+  HeapRegion* to_hr = _g1->heap_region_containing(to_obj);
+  assert(to_hr != NULL, "sanity");
+  assert(!to_hr->in_collection_set(), "should not mark objects in the CSet");
+#endif // ASSERT
+
+  // The object might be in the process of being copied by another
+  // worker so we cannot trust that its to-space image is
+  // well-formed. So we have to read its size from its from-space
+  // image which we know should not be changing.
+  _cm->grayRoot(to_obj, (size_t) from_obj->size());
+}
+
+oop G1ParCopyHelper::copy_to_survivor_space(oop old) {
   size_t    word_sz = old->size();
   HeapRegion* from_region = _g1->heap_region_containing_raw(old);
   // +1 to make the -1 indexes valid...
   int       young_index = from_region->young_index_in_cset()+1;
-  assert( (from_region->is_young() && young_index > 0) ||
-          (!from_region->is_young() && young_index == 0), "invariant" );
+  assert( (from_region->is_young() && young_index >  0) ||
+         (!from_region->is_young() && young_index == 0), "invariant" );
   G1CollectorPolicy* g1p = _g1->g1_policy();
   markOop m = old->mark();
   int age = m->has_displaced_mark_helper() ? m->displaced_mark_helper()->age()
@@ -4346,7 +4339,7 @@
     // This will either forward-to-self, or detect that someone else has
     // installed a forwarding pointer.
     OopsInHeapRegionClosure* cl = _par_scan_state->evac_failure_closure();
-    return _g1->handle_evacuation_failure_par(cl, old, should_mark_root);
+    return _g1->handle_evacuation_failure_par(cl, old);
   }
 
   // We're going to allocate linearly, so might as well prefetch ahead.
@@ -4382,23 +4375,6 @@
       obj->set_mark(m);
     }
 
-    // Mark the evacuated object or propagate "next" mark bit
-    if (should_mark_copy) {
-      if (!use_local_bitmaps ||
-          !_par_scan_state->alloc_buffer(alloc_purpose)->mark(obj_ptr)) {
-        // if we couldn't mark it on the local bitmap (this happens when
-        // the object was not allocated in the GCLab), we have to bite
-        // the bullet and do the standard parallel mark
-        _cm->markAndGrayObjectIfNecessary(obj);
-      }
-
-      if (_g1->isMarkedNext(old)) {
-        // Unmark the object's old location so that marking
-        // doesn't think the old object is alive.
-        _cm->nextMarkBitMap()->parClear((HeapWord*)old);
-      }
-    }
-
     size_t* surv_young_words = _par_scan_state->surviving_young_words();
     surv_young_words[young_index] += word_sz;
 
@@ -4428,61 +4404,24 @@
 ::do_oop_work(T* p) {
   oop obj = oopDesc::load_decode_heap_oop(p);
   assert(barrier != G1BarrierRS || obj != NULL,
-         "Precondition: G1BarrierRS implies obj is nonNull");
-
-  // Marking:
-  // If the object is in the collection set, then the thread
-  // that copies the object should mark, or propagate the
-  // mark to, the evacuated object.
-  // If the object is not in the collection set then we
-  // should call the mark_object() method depending on the
-  // value of the template parameter do_mark_object (which will
-  // be true for root scanning closures during an initial mark
-  // pause).
-  // The mark_object() method first checks whether the object
-  // is marked and, if not, attempts to mark the object.
+         "Precondition: G1BarrierRS implies obj is non-NULL");
 
   // here the null check is implicit in the cset_fast_test() test
   if (_g1->in_cset_fast_test(obj)) {
+    oop forwardee;
     if (obj->is_forwarded()) {
-      oopDesc::encode_store_heap_oop(p, obj->forwardee());
-      // If we are a root scanning closure during an initial
-      // mark pause (i.e. do_mark_object will be true) then
-      // we also need to handle marking of roots in the
-      // event of an evacuation failure. In the event of an
-      // evacuation failure, the object is forwarded to itself
-      // and not copied. For root-scanning closures, the
-      // object would be marked after a successful self-forward
-      // but an object could be pointed to by both a root and non
-      // root location and be self-forwarded by a non-root-scanning
-      // closure. Therefore we also have to attempt to mark the
-      // self-forwarded root object here.
-      if (do_mark_object && obj->forwardee() == obj) {
-        mark_object(p);
-      }
+      forwardee = obj->forwardee();
     } else {
-      // During an initial mark pause, objects that are pointed to
-      // by the roots need to be marked - even in the event of an
-      // evacuation failure. We pass the template parameter
-      // do_mark_object (which is true for root scanning closures
-      // during an initial mark pause) to copy_to_survivor_space
-      // which will pass it on to the evacuation failure handling
-      // code. The thread that successfully self-forwards a root
-      // object to itself is responsible for marking the object.
-      bool should_mark_root = do_mark_object;
-
-      // We need to mark the copied object if we're a root scanning
-      // closure during an initial mark pause (i.e. do_mark_object
-      // will be true), or the object is already marked and we need
-      // to propagate the mark to the evacuated copy.
-      bool should_mark_copy = do_mark_object ||
-                              _during_initial_mark ||
-                              (_mark_in_progress && !_g1->is_obj_ill(obj));
-
-      oop copy_oop = copy_to_survivor_space(obj, should_mark_root,
-                                                 should_mark_copy);
-      oopDesc::encode_store_heap_oop(p, copy_oop);
+      forwardee = copy_to_survivor_space(obj);
     }
+    assert(forwardee != NULL, "forwardee should not be NULL");
+    oopDesc::encode_store_heap_oop(p, forwardee);
+    if (do_mark_object && forwardee != obj) {
+      // If the object is self-forwarded we don't need to explicitly
+      // mark it, the evacuation failure protocol will do so.
+      mark_forwarded_object(obj, forwardee);
+    }
+
     // When scanning the RS, we only care about objs in CS.
     if (barrier == G1BarrierRS) {
       _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
@@ -4491,8 +4430,8 @@
     // The object is not in collection set. If we're a root scanning
     // closure during an initial mark pause (i.e. do_mark_object will
     // be true) then attempt to mark the object.
-    if (do_mark_object) {
-      mark_object(p);
+    if (do_mark_object && _g1->is_in_g1_reserved(obj)) {
+      mark_object(obj);
     }
   }
 
@@ -4787,12 +4726,16 @@
 
   g1_policy()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
 
-  // Scan strong roots in mark stack.
-  if (!_process_strong_tasks->is_task_claimed(G1H_PS_mark_stack_oops_do)) {
-    concurrent_mark()->oops_do(scan_non_heap_roots);
-  }
-  double mark_stack_scan_ms = (os::elapsedTime() - ext_roots_end) * 1000.0;
-  g1_policy()->record_mark_stack_scan_time(worker_i, mark_stack_scan_ms);
+  // During conc marking we have to filter the per-thread SATB buffers
+  // to make sure we remove any oops into the CSet (which will show up
+  // as implicitly live).
+  if (!_process_strong_tasks->is_task_claimed(G1H_PS_filter_satb_buffers)) {
+    if (mark_in_progress()) {
+      JavaThread::satb_mark_queue_set().filter_thread_buffers();
+    }
+  }
+  double satb_filtering_ms = (os::elapsedTime() - ext_roots_end) * 1000.0;
+  g1_policy()->record_satb_filtering_time(worker_i, satb_filtering_ms);
 
   // Now scan the complement of the collection set.
   if (scan_rs != NULL) {
@@ -5411,13 +5354,6 @@
 
   finalize_for_evac_failure();
 
-  // Must do this before clearing the per-region evac-failure flags
-  // (which is currently done when we free the collection set).
-  // We also only do this if marking is actually in progress and so
-  // have to do this before we set the mark_in_progress flag at the
-  // end of an initial mark pause.
-  concurrent_mark()->complete_marking_in_collection_set();
-
   if (evacuation_failed()) {
     remove_self_forwarding_pointers();
     if (PrintGCDetails) {
@@ -6074,6 +6010,8 @@
       } else {
         _hr_printer.alloc(new_alloc_region, G1HRPrinter::Old);
       }
+      bool during_im = g1_policy()->during_initial_mark_pause();
+      new_alloc_region->note_start_of_copying(during_im);
       return new_alloc_region;
     } else {
       g1_policy()->note_alloc_region_limit_reached(ap);
@@ -6085,7 +6023,8 @@
 void G1CollectedHeap::retire_gc_alloc_region(HeapRegion* alloc_region,
                                              size_t allocated_bytes,
                                              GCAllocPurpose ap) {
-  alloc_region->note_end_of_copying();
+  bool during_im = g1_policy()->during_initial_mark_pause();
+  alloc_region->note_end_of_copying(during_im);
   g1_policy()->record_bytes_copied_during_gc(allocated_bytes);
   if (ap == GCAllocForSurvived) {
     young_list()->add_survivor_region(alloc_region);
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Tue Jan 10 20:02:41 2012 +0100
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Tue Jan 10 18:58:13 2012 -0500
@@ -869,8 +869,7 @@
   void finalize_for_evac_failure();
 
   // An attempt to evacuate "obj" has failed; take necessary steps.
-  oop handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, oop obj,
-                                    bool should_mark_root);
+  oop handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, oop obj);
   void handle_evacuation_failure_common(oop obj, markOop m);
 
   // ("Weak") Reference processing support.
@@ -962,7 +961,7 @@
   unsigned int* _worker_cset_start_region_time_stamp;
 
   enum G1H_process_strong_roots_tasks {
-    G1H_PS_mark_stack_oops_do,
+    G1H_PS_filter_satb_buffers,
     G1H_PS_refProcessor_oops_do,
     // Leave this one last.
     G1H_PS_NumElements
@@ -1752,10 +1751,8 @@
       _gclab_word_size(gclab_word_size),
       _real_start_word(NULL),
       _real_end_word(NULL),
-      _start_word(NULL)
-  {
-    guarantee( size_in_words() >= bitmap_size_in_words(),
-               "just making sure");
+      _start_word(NULL) {
+    guarantee(false, "GCLabBitMap::GCLabBitmap(): don't call this any more");
   }
 
   inline unsigned heapWordToOffset(HeapWord* addr) {
@@ -1809,6 +1806,8 @@
   }
 
   void set_buffer(HeapWord* start) {
+    guarantee(false, "set_buffer(): don't call this any more");
+
     guarantee(use_local_bitmaps, "invariant");
     clear();
 
@@ -1832,6 +1831,8 @@
 #endif // PRODUCT
 
   void retire() {
+    guarantee(false, "retire(): don't call this any more");
+
     guarantee(use_local_bitmaps, "invariant");
     assert(fields_well_formed(), "invariant");
 
@@ -1865,32 +1866,18 @@
 class G1ParGCAllocBuffer: public ParGCAllocBuffer {
 private:
   bool        _retired;
-  bool        _should_mark_objects;
-  GCLabBitMap _bitmap;
 
 public:
   G1ParGCAllocBuffer(size_t gclab_word_size);
 
-  inline bool mark(HeapWord* addr) {
-    guarantee(use_local_bitmaps, "invariant");
-    assert(_should_mark_objects, "invariant");
-    return _bitmap.mark(addr);
-  }
-
-  inline void set_buf(HeapWord* buf) {
-    if (use_local_bitmaps && _should_mark_objects) {
-      _bitmap.set_buffer(buf);
-    }
+  void set_buf(HeapWord* buf) {
     ParGCAllocBuffer::set_buf(buf);
     _retired = false;
   }
 
-  inline void retire(bool end_of_gc, bool retain) {
+  void retire(bool end_of_gc, bool retain) {
     if (_retired)
       return;
-    if (use_local_bitmaps && _should_mark_objects) {
-      _bitmap.retire();
-    }
     ParGCAllocBuffer::retire(end_of_gc, retain);
     _retired = true;
   }
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Tue Jan 10 20:02:41 2012 +0100
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Tue Jan 10 18:58:13 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -281,7 +281,7 @@
 
   _par_last_gc_worker_start_times_ms = new double[_parallel_gc_threads];
   _par_last_ext_root_scan_times_ms = new double[_parallel_gc_threads];
-  _par_last_mark_stack_scan_times_ms = new double[_parallel_gc_threads];
+  _par_last_satb_filtering_times_ms = new double[_parallel_gc_threads];
 
   _par_last_update_rs_times_ms = new double[_parallel_gc_threads];
   _par_last_update_rs_processed_buffers = new double[_parallel_gc_threads];
@@ -905,10 +905,19 @@
     gclog_or_tty->print(" (%s)", gcs_are_young() ? "young" : "mixed");
   }
 
-  // We only need to do this here as the policy will only be applied
-  // to the GC we're about to start. so, no point is calculating this
-  // every time we calculate / recalculate the target young length.
-  update_survivors_policy();
+  if (!during_initial_mark_pause()) {
+    // We only need to do this here as the policy will only be applied
+    // to the GC we're about to start. so, no point is calculating this
+    // every time we calculate / recalculate the target young length.
+    update_survivors_policy();
+  } else {
+    // The marking phase has a "we only copy implicitly live
+    // objects during marking" invariant. The easiest way to ensure it
+    // holds is not to allocate any survivor regions and tenure all
+    // objects. In the future we might change this and handle survivor
+    // regions specially during marking.
+    tenure_all_objects();
+  }
 
   assert(_g1->used() == _g1->recalculate_used(),
          err_msg("sanity, used: "SIZE_FORMAT" recalculate_used: "SIZE_FORMAT,
@@ -939,7 +948,7 @@
   for (int i = 0; i < _parallel_gc_threads; ++i) {
     _par_last_gc_worker_start_times_ms[i] = -1234.0;
     _par_last_ext_root_scan_times_ms[i] = -1234.0;
-    _par_last_mark_stack_scan_times_ms[i] = -1234.0;
+    _par_last_satb_filtering_times_ms[i] = -1234.0;
     _par_last_update_rs_times_ms[i] = -1234.0;
     _par_last_update_rs_processed_buffers[i] = -1234.0;
     _par_last_scan_rs_times_ms[i] = -1234.0;
@@ -1227,7 +1236,7 @@
   // of the PrintGCDetails output, in the non-parallel case.
 
   double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms);
-  double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms);
+  double satb_filtering_time = avg_value(_par_last_satb_filtering_times_ms);
   double update_rs_time = avg_value(_par_last_update_rs_times_ms);
   double update_rs_processed_buffers =
     sum_of_values(_par_last_update_rs_processed_buffers);
@@ -1236,7 +1245,7 @@
   double termination_time = avg_value(_par_last_termination_times_ms);
 
   double known_time = ext_root_scan_time +
-                      mark_stack_scan_time +
+                      satb_filtering_time +
                       update_rs_time +
                       scan_rs_time +
                       obj_copy_time;
@@ -1282,7 +1291,7 @@
     body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms);
 
     body_summary->record_ext_root_scan_time_ms(ext_root_scan_time);
-    body_summary->record_mark_stack_scan_time_ms(mark_stack_scan_time);
+    body_summary->record_satb_filtering_time_ms(satb_filtering_time);
     body_summary->record_update_rs_time_ms(update_rs_time);
     body_summary->record_scan_rs_time_ms(scan_rs_time);
     body_summary->record_obj_copy_time_ms(obj_copy_time);
@@ -1376,16 +1385,12 @@
                            (last_pause_included_initial_mark) ? " (initial-mark)" : "",
                            elapsed_ms / 1000.0);
 
-    if (print_marking_info) {
-      print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms);
-    }
-
     if (parallel) {
       print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
       print_par_stats(2, "GC Worker Start", _par_last_gc_worker_start_times_ms);
       print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms);
       if (print_marking_info) {
-        print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms);
+        print_par_stats(2, "SATB Filtering", _par_last_satb_filtering_times_ms);
       }
       print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
       print_par_sizes(3, "Processed Buffers", _par_last_update_rs_processed_buffers);
@@ -1399,7 +1404,7 @@
         _par_last_gc_worker_times_ms[i] = _par_last_gc_worker_end_times_ms[i] - _par_last_gc_worker_start_times_ms[i];
 
         double worker_known_time = _par_last_ext_root_scan_times_ms[i] +
-                                   _par_last_mark_stack_scan_times_ms[i] +
+                                   _par_last_satb_filtering_times_ms[i] +
                                    _par_last_update_rs_times_ms[i] +
                                    _par_last_scan_rs_times_ms[i] +
                                    _par_last_obj_copy_times_ms[i] +
@@ -1412,7 +1417,7 @@
     } else {
       print_stats(1, "Ext Root Scanning", ext_root_scan_time);
       if (print_marking_info) {
-        print_stats(1, "Mark Stack Scanning", mark_stack_scan_time);
+        print_stats(1, "SATB Filtering", satb_filtering_time);
       }
       print_stats(1, "Update RS", update_rs_time);
       print_stats(2, "Processed Buffers", (int)update_rs_processed_buffers);
@@ -1983,11 +1988,10 @@
   if (summary->get_total_seq()->num() > 0) {
     print_summary_sd(0, "Evacuation Pauses", summary->get_total_seq());
     if (body_summary != NULL) {
-      print_summary(1, "SATB Drain", body_summary->get_satb_drain_seq());
       if (parallel) {
         print_summary(1, "Parallel Time", body_summary->get_parallel_seq());
         print_summary(2, "Ext Root Scanning", body_summary->get_ext_root_scan_seq());
-        print_summary(2, "Mark Stack Scanning", body_summary->get_mark_stack_scan_seq());
+        print_summary(2, "SATB Filtering", body_summary->get_satb_filtering_seq());
         print_summary(2, "Update RS", body_summary->get_update_rs_seq());
         print_summary(2, "Scan RS", body_summary->get_scan_rs_seq());
         print_summary(2, "Object Copy", body_summary->get_obj_copy_seq());
@@ -1996,7 +2000,7 @@
         {
           NumberSeq* other_parts[] = {
             body_summary->get_ext_root_scan_seq(),
-            body_summary->get_mark_stack_scan_seq(),
+            body_summary->get_satb_filtering_seq(),
             body_summary->get_update_rs_seq(),
             body_summary->get_scan_rs_seq(),
             body_summary->get_obj_copy_seq(),
@@ -2009,7 +2013,7 @@
         }
       } else {
         print_summary(1, "Ext Root Scanning", body_summary->get_ext_root_scan_seq());
-        print_summary(1, "Mark Stack Scanning", body_summary->get_mark_stack_scan_seq());
+        print_summary(1, "SATB Filtering", body_summary->get_satb_filtering_seq());
         print_summary(1, "Update RS", body_summary->get_update_rs_seq());
         print_summary(1, "Scan RS", body_summary->get_scan_rs_seq());
         print_summary(1, "Object Copy", body_summary->get_obj_copy_seq());
@@ -2036,7 +2040,7 @@
             body_summary->get_satb_drain_seq(),
             body_summary->get_update_rs_seq(),
             body_summary->get_ext_root_scan_seq(),
-            body_summary->get_mark_stack_scan_seq(),
+            body_summary->get_satb_filtering_seq(),
             body_summary->get_scan_rs_seq(),
             body_summary->get_obj_copy_seq()
           };
@@ -2433,9 +2437,6 @@
   assert(_inc_cset_build_state == Active, "Precondition");
   assert(!hr->is_young(), "non-incremental add of young region");
 
-  if (_g1->mark_in_progress())
-    _g1->concurrent_mark()->registerCSetRegion(hr);
-
   assert(!hr->in_collection_set(), "should not already be in the CSet");
   hr->set_in_collection_set(true);
   hr->set_next_in_collection_set(_collection_set);
@@ -2705,9 +2706,6 @@
   // Clear the fields that point to the survivor list - they are all young now.
   young_list->clear_survivors();
 
-  if (_g1->mark_in_progress())
-    _g1->concurrent_mark()->register_collection_set_finger(_inc_cset_max_finger);
-
   _collection_set = _inc_cset_head;
   _collection_set_bytes_used_before = _inc_cset_bytes_used_before;
   time_remaining_ms -= _inc_cset_predicted_elapsed_time_ms;
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Tue Jan 10 20:02:41 2012 +0100
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Tue Jan 10 18:58:13 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -67,7 +67,7 @@
   define_num_seq(satb_drain) // optional
   define_num_seq(parallel) // parallel only
     define_num_seq(ext_root_scan)
-    define_num_seq(mark_stack_scan)
+    define_num_seq(satb_filtering)
     define_num_seq(update_rs)
     define_num_seq(scan_rs)
     define_num_seq(obj_copy)
@@ -215,7 +215,7 @@
 
   double* _par_last_gc_worker_start_times_ms;
   double* _par_last_ext_root_scan_times_ms;
-  double* _par_last_mark_stack_scan_times_ms;
+  double* _par_last_satb_filtering_times_ms;
   double* _par_last_update_rs_times_ms;
   double* _par_last_update_rs_processed_buffers;
   double* _par_last_scan_rs_times_ms;
@@ -841,8 +841,8 @@
     _par_last_ext_root_scan_times_ms[worker_i] = ms;
   }
 
-  void record_mark_stack_scan_time(int worker_i, double ms) {
-    _par_last_mark_stack_scan_times_ms[worker_i] = ms;
+  void record_satb_filtering_time(int worker_i, double ms) {
+    _par_last_satb_filtering_times_ms[worker_i] = ms;
   }
 
   void record_satb_drain_time(double ms) {
@@ -1146,6 +1146,11 @@
     _survivor_surv_rate_group->stop_adding_regions();
   }
 
+  void tenure_all_objects() {
+    _max_survivor_regions = 0;
+    _tenuring_threshold = 0;
+  }
+
   void record_survivor_regions(size_t      regions,
                                HeapRegion* head,
                                HeapRegion* tail) {
--- a/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp	Tue Jan 10 20:02:41 2012 +0100
+++ b/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp	Tue Jan 10 18:58:13 2012 -0500
@@ -66,19 +66,22 @@
   G1CollectedHeap* _g1;
   ConcurrentMark* _cm;
   HeapRegion* _hr;
-  size_t _prev_marked_bytes;
-  size_t _next_marked_bytes;
+  size_t _marked_bytes;
   OopsInHeapRegionClosure *_update_rset_cl;
+  bool _during_initial_mark;
+  bool _during_conc_mark;
 public:
   RemoveSelfForwardPtrObjClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
                                  HeapRegion* hr,
-                                 OopsInHeapRegionClosure* update_rset_cl) :
-    _g1(g1), _cm(cm), _hr(hr),
+                                 OopsInHeapRegionClosure* update_rset_cl,
+                                 bool during_initial_mark,
+                                 bool during_conc_mark) :
+    _g1(g1), _cm(cm), _hr(hr), _marked_bytes(0),
     _update_rset_cl(update_rset_cl),
-    _prev_marked_bytes(0), _next_marked_bytes(0) {}
+    _during_initial_mark(during_initial_mark),
+    _during_conc_mark(during_conc_mark) { }
 
-  size_t prev_marked_bytes() { return _prev_marked_bytes; }
-  size_t next_marked_bytes() { return _next_marked_bytes; }
+  size_t marked_bytes() { return _marked_bytes; }
 
   // <original comment>
   // The original idea here was to coalesce evacuated and dead objects.
@@ -100,18 +103,29 @@
     HeapWord* obj_addr = (HeapWord*) obj;
     assert(_hr->is_in(obj_addr), "sanity");
     size_t obj_size = obj->size();
-
     _hr->update_bot_for_object(obj_addr, obj_size);
 
     if (obj->is_forwarded() && obj->forwardee() == obj) {
       // The object failed to move.
-      assert(!_g1->is_obj_dead(obj), "We should not be preserving dead objs.");
+
+      // We consider all objects that we find self-forwarded to be
+      // live. What we'll do is that we'll update the prev marking
+      // info so that they are all under PTAMS and explicitly marked.
       _cm->markPrev(obj);
-      assert(_cm->isPrevMarked(obj), "Should be marked!");
-      _prev_marked_bytes += (obj_size * HeapWordSize);
-      if (_g1->mark_in_progress() && !_g1->is_obj_ill(obj)) {
-        _cm->markAndGrayObjectIfNecessary(obj);
+      if (_during_initial_mark) {
+        // For the next marking info we'll only mark the
+        // self-forwarded objects explicitly if we are during
+        // initial-mark (since, normally, we only mark objects pointed
+        // to by roots if we succeed in copying them). By marking all
+        // self-forwarded objects we ensure that we mark any that are
+        // still pointed to be roots. During concurrent marking, and
+        // after initial-mark, we don't need to mark any objects
+        // explicitly and all objects in the CSet are considered
+        // (implicitly) live. So, we won't mark them explicitly and
+        // we'll leave them over NTAMS.
+        _cm->markNext(obj);
       }
+      _marked_bytes += (obj_size * HeapWordSize);
       obj->set_mark(markOopDesc::prototype());
 
       // While we were processing RSet buffers during the collection,
@@ -126,15 +140,13 @@
       // The problem is that, if evacuation fails, we might have
       // remembered set entries missing given that we skipped cards on
       // the collection set. So, we'll recreate such entries now.
-
       obj->oop_iterate(_update_rset_cl);
       assert(_cm->isPrevMarked(obj), "Should be marked!");
     } else {
       // The object has been either evacuated or is dead. Fill it with a
       // dummy object.
-      MemRegion mr((HeapWord*)obj, obj_size);
+      MemRegion mr((HeapWord*) obj, obj_size);
       CollectedHeap::fill_with_object(mr);
-      _cm->clearRangeBothMaps(mr);
     }
   }
 };
@@ -151,12 +163,27 @@
     _cm(_g1h->concurrent_mark()) { }
 
   bool doHeapRegion(HeapRegion *hr) {
+    bool during_initial_mark = _g1h->g1_policy()->during_initial_mark_pause();
+    bool during_conc_mark = _g1h->mark_in_progress();
+
     assert(!hr->isHumongous(), "sanity");
     assert(hr->in_collection_set(), "bad CS");
 
     if (hr->claimHeapRegion(HeapRegion::ParEvacFailureClaimValue)) {
       if (hr->evacuation_failed()) {
-        RemoveSelfForwardPtrObjClosure rspc(_g1h, _cm, hr, _update_rset_cl);
+        RemoveSelfForwardPtrObjClosure rspc(_g1h, _cm, hr, _update_rset_cl,
+                                            during_initial_mark,
+                                            during_conc_mark);
+
+        MemRegion mr(hr->bottom(), hr->end());
+        // We'll recreate the prev marking info so we'll first clear
+        // the prev bitmap range for this region. We never mark any
+        // CSet objects explicitly so the next bitmap range should be
+        // cleared anyway.
+        _cm->clearRangePrevBitmap(mr);
+
+        hr->note_self_forwarding_removal_start(during_initial_mark,
+                                               during_conc_mark);
 
         // In the common case (i.e. when there is no evacuation
         // failure) we make sure that the following is done when
@@ -171,28 +198,9 @@
         _update_rset_cl->set_region(hr);
         hr->object_iterate(&rspc);
 
-        // A number of manipulations to make the TAMS for this region
-        // be the current top, and the marked bytes be the ones observed
-        // in the iteration.
-        if (_cm->at_least_one_mark_complete()) {
-          // The comments below are the postconditions achieved by the
-          // calls.  Note especially the last such condition, which says that
-          // the count of marked bytes has been properly restored.
-          hr->note_start_of_marking(false);
-          // _next_top_at_mark_start == top, _next_marked_bytes == 0
-          hr->add_to_marked_bytes(rspc.prev_marked_bytes());
-          // _next_marked_bytes == prev_marked_bytes.
-          hr->note_end_of_marking();
-          // _prev_top_at_mark_start == top(),
-          // _prev_marked_bytes == prev_marked_bytes
-        }
-        // If there is no mark in progress, we modified the _next variables
-        // above needlessly, but harmlessly.
-        if (_g1h->mark_in_progress()) {
-          hr->note_start_of_marking(false);
-          // _next_top_at_mark_start == top, _next_marked_bytes == 0
-          // _next_marked_bytes == next_marked_bytes.
-        }
+        hr->note_self_forwarding_removal_end(during_initial_mark,
+                                             during_conc_mark,
+                                             rspc.marked_bytes());
       }
     }
     return false;
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Tue Jan 10 20:02:41 2012 +0100
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Tue Jan 10 18:58:13 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -121,17 +121,25 @@
 class G1ParCopyHelper : public G1ParClosureSuper {
   G1ParScanClosure *_scanner;
 protected:
-  template <class T> void mark_object(T* p);
-  oop copy_to_survivor_space(oop obj, bool should_mark_root,
-                                      bool should_mark_copy);
+  // Mark the object if it's not already marked. This is used to mark
+  // objects pointed to by roots that are guaranteed not to move
+  // during the GC (i.e., non-CSet objects). It is MT-safe.
+  void mark_object(oop obj);
+
+  // Mark the object if it's not already marked. This is used to mark
+  // objects pointed to by roots that have been forwarded during a
+  // GC. It is MT-safe.
+  void mark_forwarded_object(oop from_obj, oop to_obj);
+
+  oop copy_to_survivor_space(oop obj);
+
 public:
   G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
                   G1ParScanClosure *scanner) :
     G1ParClosureSuper(g1, par_scan_state), _scanner(scanner) { }
 };
 
-template<bool do_gen_barrier, G1Barrier barrier,
-         bool do_mark_object>
+template <bool do_gen_barrier, G1Barrier barrier, bool do_mark_object>
 class G1ParCopyClosure : public G1ParCopyHelper {
   G1ParScanClosure _scanner;
 
@@ -140,9 +148,8 @@
 public:
   G1ParCopyClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
                    ReferenceProcessor* rp) :
-    _scanner(g1, par_scan_state, rp),
-    G1ParCopyHelper(g1, par_scan_state, &_scanner)
-  {
+      _scanner(g1, par_scan_state, rp),
+      G1ParCopyHelper(g1, par_scan_state, &_scanner) {
     assert(_ref_processor == NULL, "sanity");
   }
 
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Tue Jan 10 20:02:41 2012 +0100
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Tue Jan 10 18:58:13 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -575,6 +575,40 @@
   oops_in_mr_iterate(MemRegion(bottom(), saved_mark_word()), cl);
 }
 
+void HeapRegion::note_self_forwarding_removal_start(bool during_initial_mark,
+                                                    bool during_conc_mark) {
+  // We always recreate the prev marking info and we'll explicitly
+  // mark all objects we find to be self-forwarded on the prev
+  // bitmap. So all objects need to be below PTAMS.
+  _prev_top_at_mark_start = top();
+  _prev_marked_bytes = 0;
+
+  if (during_initial_mark) {
+    // During initial-mark, we'll also explicitly mark all objects
+    // we find to be self-forwarded on the next bitmap. So all
+    // objects need to be below NTAMS.
+    _next_top_at_mark_start = top();
+    set_top_at_conc_mark_count(bottom());
+    _next_marked_bytes = 0;
+  } else if (during_conc_mark) {
+    // During concurrent mark, all objects in the CSet (including
+    // the ones we find to be self-forwarded) are implicitly live.
+    // So all objects need to be above NTAMS.
+    _next_top_at_mark_start = bottom();
+    set_top_at_conc_mark_count(bottom());
+    _next_marked_bytes = 0;
+  }
+}
+
+void HeapRegion::note_self_forwarding_removal_end(bool during_initial_mark,
+                                                  bool during_conc_mark,
+                                                  size_t marked_bytes) {
+  assert(0 <= marked_bytes && marked_bytes <= used(),
+         err_msg("marked: "SIZE_FORMAT" used: "SIZE_FORMAT,
+                 marked_bytes, used()));
+  _prev_marked_bytes = marked_bytes;
+}
+
 HeapWord*
 HeapRegion::object_iterate_mem_careful(MemRegion mr,
                                                  ObjectClosure* cl) {
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Tue Jan 10 20:02:41 2012 +0100
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Tue Jan 10 18:58:13 2012 -0500
@@ -583,37 +583,33 @@
   // that the collector is about to start or has finished (concurrently)
   // marking the heap.
 
-  // Note the start of a marking phase. Record the
-  // start of the unmarked area of the region here.
-  void note_start_of_marking(bool during_initial_mark) {
-    init_top_at_conc_mark_count();
-    _next_marked_bytes = 0;
-    if (during_initial_mark && is_young() && !is_survivor())
-      _next_top_at_mark_start = bottom();
-    else
-      _next_top_at_mark_start = top();
-  }
+  // Notify the region that concurrent marking is starting. Initialize
+  // all fields related to the next marking info.
+  inline void note_start_of_marking();
+
+  // Notify the region that concurrent marking has finished. Copy the
+  // (now finalized) next marking info fields into the prev marking
+  // info fields.
+  inline void note_end_of_marking();
+
+  // Notify the region that it will be used as to-space during a GC
+  // and we are about to start copying objects into it.
+  inline void note_start_of_copying(bool during_initial_mark);
 
-  // Note the end of a marking phase. Install the start of
-  // the unmarked area that was captured at start of marking.
-  void note_end_of_marking() {
-    _prev_top_at_mark_start = _next_top_at_mark_start;
-    _prev_marked_bytes = _next_marked_bytes;
-    _next_marked_bytes = 0;
+  // Notify the region that it ceases being to-space during a GC and
+  // we will not copy objects into it any more.
+  inline void note_end_of_copying(bool during_initial_mark);
 
-    guarantee(_prev_marked_bytes <=
-              (size_t) (prev_top_at_mark_start() - bottom()) * HeapWordSize,
-              "invariant");
-  }
+  // Notify the region that we are about to start processing
+  // self-forwarded objects during evac failure handling.
+  void note_self_forwarding_removal_start(bool during_initial_mark,
+                                          bool during_conc_mark);
 
-  // After an evacuation, we need to update _next_top_at_mark_start
-  // to be the current top.  Note this is only valid if we have only
-  // ever evacuated into this region.  If we evacuate, allocate, and
-  // then evacuate we are in deep doodoo.
-  void note_end_of_copying() {
-    assert(top() >= _next_top_at_mark_start, "Increase only");
-    _next_top_at_mark_start = top();
-  }
+  // Notify the region that we have finished processing self-forwarded
+  // objects during evac failure handling.
+  void note_self_forwarding_removal_end(bool during_initial_mark,
+                                        bool during_conc_mark,
+                                        size_t marked_bytes);
 
   // Returns "false" iff no object in the region was allocated when the
   // last mark phase ended.
--- a/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Tue Jan 10 20:02:41 2012 +0100
+++ b/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Tue Jan 10 18:58:13 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -55,4 +55,71 @@
   return _offsets.block_start_const(p);
 }
 
+inline void HeapRegion::note_start_of_marking() {
+  init_top_at_conc_mark_count();
+  _next_marked_bytes = 0;
+  _next_top_at_mark_start = top();
+}
+
+inline void HeapRegion::note_end_of_marking() {
+  _prev_top_at_mark_start = _next_top_at_mark_start;
+  _prev_marked_bytes = _next_marked_bytes;
+  _next_marked_bytes = 0;
+
+  assert(_prev_marked_bytes <=
+         (size_t) pointer_delta(prev_top_at_mark_start(), bottom()) *
+         HeapWordSize, "invariant");
+}
+
+inline void HeapRegion::note_start_of_copying(bool during_initial_mark) {
+  if (during_initial_mark) {
+    if (is_survivor()) {
+      assert(false, "should not allocate survivors during IM");
+    } else {
+      // During initial-mark we'll explicitly mark any objects on old
+      // regions that are pointed to by roots. Given that explicit
+      // marks only make sense under NTAMS it'd be nice if we could
+      // check that condition if we wanted to. Given that we don't
+      // know where the top of this region will end up, we simply set
+      // NTAMS to the end of the region so all marks will be below
+      // NTAMS. We'll set it to the actual top when we retire this region.
+      _next_top_at_mark_start = end();
+    }
+  } else {
+    if (is_survivor()) {
+      // This is how we always allocate survivors.
+      assert(_next_top_at_mark_start == bottom(), "invariant");
+    } else {
+      // We could have re-used this old region as to-space over a
+      // couple of GCs since the start of the concurrent marking
+      // cycle. This means that [bottom,NTAMS) will contain objects
+      // copied up to and including initial-mark and [NTAMS, top)
+      // will contain objects copied during the concurrent marking cycle.
+      assert(top() >= _next_top_at_mark_start, "invariant");
+    }
+  }
+}
+
+inline void HeapRegion::note_end_of_copying(bool during_initial_mark) {
+  if (during_initial_mark) {
+    if (is_survivor()) {
+      assert(false, "should not allocate survivors during IM");
+    } else {
+      // See the comment for note_start_of_copying() for the details
+      // on this.
+      assert(_next_top_at_mark_start == end(), "pre-condition");
+      _next_top_at_mark_start = top();
+    }
+  } else {
+    if (is_survivor()) {
+      // This is how we always allocate survivors.
+      assert(_next_top_at_mark_start == bottom(), "invariant");
+    } else {
+      // See the comment for note_start_of_copying() for the details
+      // on this.
+      assert(top() >= _next_top_at_mark_start, "invariant");
+    }
+  }
+}
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGION_INLINE_HPP
--- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Tue Jan 10 20:02:41 2012 +0100
+++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Tue Jan 10 18:58:13 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -70,7 +70,7 @@
   // given PtrQueueSet.
   PtrQueue(PtrQueueSet* qset, bool perm = false, bool active = false);
   // Release any contained resources.
-  void flush();
+  virtual void flush();
   // Calls flush() when destroyed.
   ~PtrQueue() { flush(); }
 
--- a/src/share/vm/gc_implementation/g1/satbQueue.cpp	Tue Jan 10 20:02:41 2012 +0100
+++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp	Tue Jan 10 18:58:13 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,14 @@
 #include "runtime/thread.hpp"
 #include "runtime/vmThread.hpp"
 
+void ObjPtrQueue::flush() {
+  // The buffer might contain refs into the CSet. We have to filter it
+  // first before we flush it, otherwise we might end up with an
+  // enqueued buffer with refs into the CSet which breaks our invariants.
+  filter();
+  PtrQueue::flush();
+}
+
 // This method removes entries from an SATB buffer that will not be
 // useful to the concurrent marking threads. An entry is removed if it
 // satisfies one of the following conditions:
@@ -44,38 +52,27 @@
 //     process it again).
 //
 // The rest of the entries will be retained and are compacted towards
-// the top of the buffer. If with this filtering we clear a large
-// enough chunk of the buffer we can re-use it (instead of enqueueing
-// it) and we can just allow the mutator to carry on executing.
-
-bool ObjPtrQueue::should_enqueue_buffer() {
-  assert(_lock == NULL || _lock->owned_by_self(),
-         "we should have taken the lock before calling this");
+// the top of the buffer. Note that, because we do not allow old
+// regions in the CSet during marking, all objects on the CSet regions
+// are young (eden or survivors) and therefore implicitly live. So any
+// references into the CSet will be removed during filtering.
 
-  // A value of 0 means "don't filter SATB buffers".
-  if (G1SATBBufferEnqueueingThresholdPercent == 0) {
-    return true;
-  }
-
+void ObjPtrQueue::filter() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
-
-  // This method should only be called if there is a non-NULL buffer
-  // that is full.
-  assert(_index == 0, "pre-condition");
-  assert(_buf != NULL, "pre-condition");
-
   void** buf = _buf;
   size_t sz = _sz;
 
+  if (buf == NULL) {
+    // nothing to do
+    return;
+  }
+
   // Used for sanity checking at the end of the loop.
   debug_only(size_t entries = 0; size_t retained = 0;)
 
   size_t i = sz;
   size_t new_index = sz;
 
-  // Given that we are expecting _index == 0, we could have changed
-  // the loop condition to (i > 0). But we are using _index for
-  // generality.
   while (i > _index) {
     assert(i > 0, "we should have at least one more entry to process");
     i -= oopSize;
@@ -103,22 +100,58 @@
       debug_only(retained += 1;)
     }
   }
+
+#ifdef ASSERT
   size_t entries_calc = (sz - _index) / oopSize;
   assert(entries == entries_calc, "the number of entries we counted "
          "should match the number of entries we calculated");
   size_t retained_calc = (sz - new_index) / oopSize;
   assert(retained == retained_calc, "the number of retained entries we counted "
          "should match the number of retained entries we calculated");
-  size_t perc = retained_calc * 100 / entries_calc;
+#endif // ASSERT
+
+  _index = new_index;
+}
+
+// This method will first apply the above filtering to the buffer. If
+// post-filtering a large enough chunk of the buffer has been cleared
+// we can re-use the buffer (instead of enqueueing it) and we can just
+// allow the mutator to carry on executing using the same buffer
+// instead of replacing it.
+
+bool ObjPtrQueue::should_enqueue_buffer() {
+  assert(_lock == NULL || _lock->owned_by_self(),
+         "we should have taken the lock before calling this");
+
+  // Even if G1SATBBufferEnqueueingThresholdPercent == 0 we have to
+  // filter the buffer given that this will remove any references into
+  // the CSet as we currently assume that no such refs will appear in
+  // enqueued buffers.
+
+  // This method should only be called if there is a non-NULL buffer
+  // that is full.
+  assert(_index == 0, "pre-condition");
+  assert(_buf != NULL, "pre-condition");
+
+  filter();
+
+  size_t sz = _sz;
+  size_t all_entries = sz / oopSize;
+  size_t retained_entries = (sz - _index) / oopSize;
+  size_t perc = retained_entries * 100 / all_entries;
   bool should_enqueue = perc > (size_t) G1SATBBufferEnqueueingThresholdPercent;
-  _index = new_index;
-
   return should_enqueue;
 }
 
 void ObjPtrQueue::apply_closure(ObjectClosure* cl) {
   if (_buf != NULL) {
     apply_closure_to_buffer(cl, _buf, _index, _sz);
+  }
+}
+
+void ObjPtrQueue::apply_closure_and_empty(ObjectClosure* cl) {
+  if (_buf != NULL) {
+    apply_closure_to_buffer(cl, _buf, _index, _sz);
     _index = _sz;
   }
 }
@@ -135,6 +168,21 @@
   }
 }
 
+#ifndef PRODUCT
+// Helpful for debugging
+
+void ObjPtrQueue::print(const char* name) {
+  print(name, _buf, _index, _sz);
+}
+
+void ObjPtrQueue::print(const char* name,
+                        void** buf, size_t index, size_t sz) {
+  gclog_or_tty->print_cr("  SATB BUFFER [%s] buf: "PTR_FORMAT" "
+                         "index: "SIZE_FORMAT" sz: "SIZE_FORMAT,
+                         name, buf, index, sz);
+}
+#endif // PRODUCT
+
 #ifdef ASSERT
 void ObjPtrQueue::verify_oops_in_buffer() {
   if (_buf == NULL) return;
@@ -150,12 +198,9 @@
 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 #endif // _MSC_VER
 
-
 SATBMarkQueueSet::SATBMarkQueueSet() :
-  PtrQueueSet(),
-  _closure(NULL), _par_closures(NULL),
-  _shared_satb_queue(this, true /*perm*/)
-{}
+  PtrQueueSet(), _closure(NULL), _par_closures(NULL),
+  _shared_satb_queue(this, true /*perm*/) { }
 
 void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
                                   int process_completed_threshold,
@@ -167,7 +212,6 @@
   }
 }
 
-
 void SATBMarkQueueSet::handle_zero_index_for_thread(JavaThread* t) {
   DEBUG_ONLY(t->satb_mark_queue().verify_oops_in_buffer();)
   t->satb_mark_queue().handle_zero_index();
@@ -228,6 +272,13 @@
   }
 }
 
+void SATBMarkQueueSet::filter_thread_buffers() {
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().filter();
+  }
+  shared_satb_queue()->filter();
+}
+
 void SATBMarkQueueSet::set_closure(ObjectClosure* closure) {
   _closure = closure;
 }
@@ -239,9 +290,9 @@
 
 void SATBMarkQueueSet::iterate_closure_all_threads() {
   for(JavaThread* t = Threads::first(); t; t = t->next()) {
-    t->satb_mark_queue().apply_closure(_closure);
+    t->satb_mark_queue().apply_closure_and_empty(_closure);
   }
-  shared_satb_queue()->apply_closure(_closure);
+  shared_satb_queue()->apply_closure_and_empty(_closure);
 }
 
 void SATBMarkQueueSet::par_iterate_closure_all_threads(int worker) {
@@ -250,7 +301,7 @@
 
   for(JavaThread* t = Threads::first(); t; t = t->next()) {
     if (t->claim_oops_do(true, parity)) {
-      t->satb_mark_queue().apply_closure(_par_closures[worker]);
+      t->satb_mark_queue().apply_closure_and_empty(_par_closures[worker]);
     }
   }
 
@@ -264,7 +315,7 @@
 
   VMThread* vmt = VMThread::vm_thread();
   if (vmt->claim_oops_do(true, parity)) {
-    shared_satb_queue()->apply_closure(_par_closures[worker]);
+    shared_satb_queue()->apply_closure_and_empty(_par_closures[worker]);
   }
 }
 
@@ -292,6 +343,61 @@
   }
 }
 
+void SATBMarkQueueSet::iterate_completed_buffers_read_only(ObjectClosure* cl) {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  assert(cl != NULL, "pre-condition");
+
+  BufferNode* nd = _completed_buffers_head;
+  while (nd != NULL) {
+    void** buf = BufferNode::make_buffer_from_node(nd);
+    ObjPtrQueue::apply_closure_to_buffer(cl, buf, 0, _sz);
+    nd = nd->next();
+  }
+}
+
+void SATBMarkQueueSet::iterate_thread_buffers_read_only(ObjectClosure* cl) {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  assert(cl != NULL, "pre-condition");
+
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().apply_closure(cl);
+  }
+  shared_satb_queue()->apply_closure(cl);
+}
+
+#ifndef PRODUCT
+// Helpful for debugging
+
+#define SATB_PRINTER_BUFFER_SIZE 256
+
+void SATBMarkQueueSet::print_all(const char* msg) {
+  char buffer[SATB_PRINTER_BUFFER_SIZE];
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+
+  gclog_or_tty->cr();
+  gclog_or_tty->print_cr("SATB BUFFERS [%s]", msg);
+
+  BufferNode* nd = _completed_buffers_head;
+  int i = 0;
+  while (nd != NULL) {
+    void** buf = BufferNode::make_buffer_from_node(nd);
+    jio_snprintf(buffer, SATB_PRINTER_BUFFER_SIZE, "Enqueued: %d", i);
+    ObjPtrQueue::print(buffer, buf, 0, _sz);
+    nd = nd->next();
+    i += 1;
+  }
+
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    jio_snprintf(buffer, SATB_PRINTER_BUFFER_SIZE, "Thread: %s", t->name());
+    t->satb_mark_queue().print(buffer);
+  }
+
+  shared_satb_queue()->print("Shared");
+
+  gclog_or_tty->cr();
+}
+#endif // PRODUCT
+
 void SATBMarkQueueSet::abandon_partial_marking() {
   BufferNode* buffers_to_delete = NULL;
   {
@@ -316,5 +422,5 @@
   for (JavaThread* t = Threads::first(); t; t = t->next()) {
     t->satb_mark_queue().reset();
   }
-  shared_satb_queue()->reset();
+ shared_satb_queue()->reset();
 }
--- a/src/share/vm/gc_implementation/g1/satbQueue.hpp	Tue Jan 10 20:02:41 2012 +0100
+++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp	Tue Jan 10 18:58:13 2012 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,9 +29,26 @@
 
 class ObjectClosure;
 class JavaThread;
+class SATBMarkQueueSet;
 
 // A ptrQueue whose elements are "oops", pointers to object heads.
 class ObjPtrQueue: public PtrQueue {
+  friend class SATBMarkQueueSet;
+
+private:
+  // Filter out unwanted entries from the buffer.
+  void filter();
+
+  // Apply the closure to all elements.
+  void apply_closure(ObjectClosure* cl);
+
+  // Apply the closure to all elements and empty the buffer;
+  void apply_closure_and_empty(ObjectClosure* cl);
+
+  // Apply the closure to all elements of "buf", down to "index" (inclusive.)
+  static void apply_closure_to_buffer(ObjectClosure* cl,
+                                      void** buf, size_t index, size_t sz);
+
 public:
   ObjPtrQueue(PtrQueueSet* qset, bool perm = false) :
     // SATB queues are only active during marking cycles. We create
@@ -41,23 +58,23 @@
     // field to true. This is done in JavaThread::initialize_queues().
     PtrQueue(qset, perm, false /* active */) { }
 
+  // Overrides PtrQueue::flush() so that it can filter the buffer
+  // before it is flushed.
+  virtual void flush();
+
   // Overrides PtrQueue::should_enqueue_buffer(). See the method's
   // definition for more information.
   virtual bool should_enqueue_buffer();
 
-  // Apply the closure to all elements, and reset the index to make the
-  // buffer empty.
-  void apply_closure(ObjectClosure* cl);
-
-  // Apply the closure to all elements of "buf", down to "index" (inclusive.)
-  static void apply_closure_to_buffer(ObjectClosure* cl,
-                                      void** buf, size_t index, size_t sz);
+#ifndef PRODUCT
+  // Helpful for debugging
+  void print(const char* name);
+  static void print(const char* name, void** buf, size_t index, size_t sz);
+#endif // PRODUCT
 
   void verify_oops_in_buffer() NOT_DEBUG_RETURN;
 };
 
-
-
 class SATBMarkQueueSet: public PtrQueueSet {
   ObjectClosure* _closure;
   ObjectClosure** _par_closures;  // One per ParGCThread.
@@ -88,6 +105,9 @@
   // set itself, has an active value same as expected_active.
   void set_active_all_threads(bool b, bool expected_active);
 
+  // Filter all the currently-active SATB buffers.
+  void filter_thread_buffers();
+
   // Register "blk" as "the closure" for all queues.  Only one such closure
   // is allowed.  The "apply_closure_to_completed_buffer" method will apply
   // this closure to a completed buffer, and "iterate_closure_all_threads"
@@ -98,10 +118,9 @@
   // closures, one for each parallel GC thread.
   void set_par_closure(int i, ObjectClosure* closure);
 
-  // If there is a registered closure for buffers, apply it to all entries
-  // in all currently-active buffers.  This should only be applied at a
-  // safepoint.  (Currently must not be called in parallel; this should
-  // change in the future.)
+  // Apply the registered closure to all entries on each
+  // currently-active buffer and then empty the buffer. It should only
+  // be called serially and at a safepoint.
   void iterate_closure_all_threads();
   // Parallel version of the above.
   void par_iterate_closure_all_threads(int worker);
@@ -117,11 +136,21 @@
     return apply_closure_to_completed_buffer_work(true, worker);
   }
 
+  // Apply the given closure on enqueued and currently-active buffers
+  // respectively. Both methods are read-only, i.e., they do not
+  // modify any of the buffers.
+  void iterate_completed_buffers_read_only(ObjectClosure* cl);
+  void iterate_thread_buffers_read_only(ObjectClosure* cl);
+
+#ifndef PRODUCT
+  // Helpful for debugging
+  void print_all(const char* msg);
+#endif // PRODUCT
+
   ObjPtrQueue* shared_satb_queue() { return &_shared_satb_queue; }
 
   // If a marking is being abandoned, reset any unprocessed log buffers.
   void abandon_partial_marking();
-
 };
 
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_SATBQUEUE_HPP