# HG changeset patch
# User ysr
# Date 1212706676 25200
# Node ID 37f87013dfd831979f18702ebddc42cceeb2b445
# Parent  0b27f3512f9eaba6da4e866b3887c38850408055
6711316: Open source the Garbage-First garbage collector
Summary: First mercurial integration of the code for the Garbage-First garbage collector.
Reviewed-by: apetrusenko, iveresov, jmasa, sgoldman, tonyp, ysr

diff -r 0b27f3512f9e -r 37f87013dfd8 make/linux/makefiles/top.make
--- a/make/linux/makefiles/top.make	Wed Jun 04 13:51:09 2008 -0700
+++ b/make/linux/makefiles/top.make	Thu Jun 05 15:57:56 2008 -0700
@@ -64,6 +64,7 @@
                           $(VM)/gc_implementation/includeDB_gc_parallelScavenge \
                           $(VM)/gc_implementation/includeDB_gc_concurrentMarkSweep \
                           $(VM)/gc_implementation/includeDB_gc_parNew \
+                          $(VM)/gc_implementation/includeDB_gc_g1     \
                           $(VM)/gc_implementation/includeDB_gc_serial \
                           $(VM)/gc_implementation/includeDB_gc_shared
 
diff -r 0b27f3512f9e -r 37f87013dfd8 make/solaris/makefiles/top.make
--- a/make/solaris/makefiles/top.make	Wed Jun 04 13:51:09 2008 -0700
+++ b/make/solaris/makefiles/top.make	Thu Jun 05 15:57:56 2008 -0700
@@ -54,6 +54,7 @@
                      $(VM)/gc_implementation/includeDB_gc_parallelScavenge \
                      $(VM)/gc_implementation/includeDB_gc_concurrentMarkSweep \
                      $(VM)/gc_implementation/includeDB_gc_parNew \
+                     $(VM)/gc_implementation/includeDB_gc_g1 \
                      $(VM)/gc_implementation/includeDB_gc_serial \
                      $(VM)/gc_implementation/includeDB_gc_shared
 
diff -r 0b27f3512f9e -r 37f87013dfd8 make/windows/makefiles/generated.make
--- a/make/windows/makefiles/generated.make	Wed Jun 04 13:51:09 2008 -0700
+++ b/make/windows/makefiles/generated.make	Thu Jun 05 15:57:56 2008 -0700
@@ -50,7 +50,8 @@
            $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge \
            $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_shared \
            $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_parNew \
-           $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep
+           $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep \
+           $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_g1
 
 IncludeDBs_core=$(IncludeDBs_base) $(IncludeDBs_gc) \
                 $(WorkSpace)/src/share/vm/includeDB_features
diff -r 0b27f3512f9e -r 37f87013dfd8 make/windows/makefiles/makedeps.make
--- a/make/windows/makefiles/makedeps.make	Wed Jun 04 13:51:09 2008 -0700
+++ b/make/windows/makefiles/makedeps.make	Thu Jun 05 15:57:56 2008 -0700
@@ -64,6 +64,7 @@
         -relativeInclude src\share\vm\gc_implementation\shared \
         -relativeInclude src\share\vm\gc_implementation\parNew \
         -relativeInclude src\share\vm\gc_implementation\concurrentMarkSweep \
+        -relativeInclude src\share\vm\gc_implementation\g1 \
         -relativeInclude src\share\vm\gc_interface \
         -relativeInclude src\share\vm\asm \
         -relativeInclude src\share\vm\memory \
@@ -115,6 +116,7 @@
         -additionalFile includeDB_gc_parallel \
         -additionalFile includeDB_gc_parallelScavenge \
         -additionalFile includeDB_gc_concurrentMarkSweep \
+        -additionalFile includeDB_gc_g1 \
         -additionalFile includeDB_gc_parNew \
         -additionalFile includeDB_gc_shared \
         -additionalFile includeDB_gc_serial \
diff -r 0b27f3512f9e -r 37f87013dfd8 make/windows/makefiles/vm.make
--- a/make/windows/makefiles/vm.make	Wed Jun 04 13:51:09 2008 -0700
+++ b/make/windows/makefiles/vm.make	Thu Jun 05 15:57:56 2008 -0700
@@ -110,6 +110,7 @@
   /I "$(WorkSpace)\src\share\vm\gc_implementation\shared"\
   /I "$(WorkSpace)\src\share\vm\gc_implementation\parNew"\
   /I "$(WorkSpace)\src\share\vm\gc_implementation\concurrentMarkSweep"\
+  /I "$(WorkSpace)\src\share\vm\gc_implementation\g1"\
   /I "$(WorkSpace)\src\share\vm\gc_interface"\
   /I "$(WorkSpace)\src\share\vm\asm"         \
   /I "$(WorkSpace)\src\share\vm\memory"      \
@@ -139,6 +140,7 @@
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/shared
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/parNew
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/concurrentMarkSweep
+VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/g1
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_interface
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/asm
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/memory
@@ -215,6 +217,9 @@
 {$(WorkSpace)\src\share\vm\gc_implementation\concurrentMarkSweep}.cpp.obj::
         $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
 
+{$(WorkSpace)\src\share\vm\gc_implementation\g1}.cpp.obj::
+        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+
 {$(WorkSpace)\src\share\vm\gc_interface}.cpp.obj::
         $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
 
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/sparc/vm/assembler_sparc.cpp
--- a/src/cpu/sparc/vm/assembler_sparc.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -130,6 +130,20 @@
   return 0x00;                  // illegal instruction 0x00000000
 }
 
+Assembler::Condition Assembler::reg_cond_to_cc_cond(Assembler::RCondition in) {
+  switch (in) {
+  case rc_z:   return equal;
+  case rc_lez: return lessEqual;
+  case rc_lz:  return less;
+  case rc_nz:  return notEqual;
+  case rc_gz:  return greater;
+  case rc_gez: return greaterEqual;
+  default:
+    ShouldNotReachHere();
+  }
+  return equal;
+}
+
 // Generate a bunch 'o stuff (including v9's
 #ifndef PRODUCT
 void Assembler::test_v9() {
@@ -1213,31 +1227,19 @@
 }
 
 
-void MacroAssembler::store_check(Register tmp, Register obj) {
-  // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.)
-
-  /* $$$ This stuff needs to go into one of the BarrierSet generator
-     functions.  (The particular barrier sets will have to be friends of
-     MacroAssembler, I guess.) */
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
-  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
-  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+void MacroAssembler::card_table_write(jbyte* byte_map_base,
+                                      Register tmp, Register obj) {
 #ifdef _LP64
   srlx(obj, CardTableModRefBS::card_shift, obj);
 #else
   srl(obj, CardTableModRefBS::card_shift, obj);
 #endif
   assert( tmp != obj, "need separate temp reg");
-  Address rs(tmp, (address)ct->byte_map_base);
+  Address rs(tmp, (address)byte_map_base);
   load_address(rs);
   stb(G0, rs.base(), obj);
 }
 
-void MacroAssembler::store_check(Register tmp, Register obj, Register offset) {
-  store_check(tmp, obj);
-}
-
 // %%% Note:  The following six instructions have been moved,
 //            unchanged, from assembler_sparc.inline.hpp.
 //            They will be refactored at a later date.
@@ -1648,11 +1650,21 @@
 
   if (reg == G0)  return;       // always NULL, which is always an oop
 
-  char buffer[16];
+  char buffer[64];
+#ifdef COMPILER1
+  if (CommentedAssembly) {
+    snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
+    block_comment(buffer);
+  }
+#endif
+
+  int len = strlen(file) + strlen(msg) + 1 + 4;
   sprintf(buffer, "%d", line);
-  int len = strlen(file) + strlen(msg) + 1 + 4 + strlen(buffer);
+  len += strlen(buffer);
+  sprintf(buffer, " at offset %d ", offset());
+  len += strlen(buffer);
   char * real_msg = new char[len];
-  sprintf(real_msg, "%s (%s:%d)", msg, file, line);
+  sprintf(real_msg, "%s%s(%s:%d)", msg, buffer, file, line);
 
   // Call indirectly to solve generation ordering problem
   Address a(O7, (address)StubRoutines::verify_oop_subroutine_entry_address());
@@ -2044,6 +2056,27 @@
 #endif
 }
 
+void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
+                                     Register s1, address d,
+                                     relocInfo::relocType rt ) {
+  if (VM_Version::v9_instructions_work()) {
+    bpr(rc, a, p, s1, d, rt);
+  } else {
+    tst(s1);
+    br(reg_cond_to_cc_cond(rc), a, p, d, rt);
+  }
+}
+
+void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
+                                     Register s1, Label& L ) {
+  if (VM_Version::v9_instructions_work()) {
+    bpr(rc, a, p, s1, L);
+  } else {
+    tst(s1);
+    br(reg_cond_to_cc_cond(rc), a, p, L);
+  }
+}
+
 
 // instruction sequences factored across compiler & interpreter
 
@@ -3226,68 +3259,74 @@
   assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size");
   assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
 
-  // get eden boundaries
-  // note: we need both top & top_addr!
-  const Register top_addr = t1;
-  const Register end      = t2;
-
-  CollectedHeap* ch = Universe::heap();
-  set((intx)ch->top_addr(), top_addr);
-  intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
-  ld_ptr(top_addr, delta, end);
-  ld_ptr(top_addr, 0, obj);
-
-  // try to allocate
-  Label retry;
-  bind(retry);
-#ifdef ASSERT
-  // make sure eden top is properly aligned
-  {
-    Label L;
-    btst(MinObjAlignmentInBytesMask, obj);
-    br(Assembler::zero, false, Assembler::pt, L);
+  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+    // No allocation in the shared eden.
+    br(Assembler::always, false, Assembler::pt, slow_case);
     delayed()->nop();
-    stop("eden top is not properly aligned");
-    bind(L);
-  }
+  } else {
+    // get eden boundaries
+    // note: we need both top & top_addr!
+    const Register top_addr = t1;
+    const Register end      = t2;
+
+    CollectedHeap* ch = Universe::heap();
+    set((intx)ch->top_addr(), top_addr);
+    intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
+    ld_ptr(top_addr, delta, end);
+    ld_ptr(top_addr, 0, obj);
+
+    // try to allocate
+    Label retry;
+    bind(retry);
+#ifdef ASSERT
+    // make sure eden top is properly aligned
+    {
+      Label L;
+      btst(MinObjAlignmentInBytesMask, obj);
+      br(Assembler::zero, false, Assembler::pt, L);
+      delayed()->nop();
+      stop("eden top is not properly aligned");
+      bind(L);
+    }
 #endif // ASSERT
-  const Register free = end;
-  sub(end, obj, free);                                   // compute amount of free space
-  if (var_size_in_bytes->is_valid()) {
-    // size is unknown at compile time
-    cmp(free, var_size_in_bytes);
-    br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
-    delayed()->add(obj, var_size_in_bytes, end);
-  } else {
-    // size is known at compile time
-    cmp(free, con_size_in_bytes);
-    br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
-    delayed()->add(obj, con_size_in_bytes, end);
-  }
-  // Compare obj with the value at top_addr; if still equal, swap the value of
-  // end with the value at top_addr. If not equal, read the value at top_addr
-  // into end.
-  casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
-  // if someone beat us on the allocation, try again, otherwise continue
-  cmp(obj, end);
-  brx(Assembler::notEqual, false, Assembler::pn, retry);
-  delayed()->mov(end, obj);                              // nop if successfull since obj == end
+    const Register free = end;
+    sub(end, obj, free);                                   // compute amount of free space
+    if (var_size_in_bytes->is_valid()) {
+      // size is unknown at compile time
+      cmp(free, var_size_in_bytes);
+      br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
+      delayed()->add(obj, var_size_in_bytes, end);
+    } else {
+      // size is known at compile time
+      cmp(free, con_size_in_bytes);
+      br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
+      delayed()->add(obj, con_size_in_bytes, end);
+    }
+    // Compare obj with the value at top_addr; if still equal, swap the value of
+    // end with the value at top_addr. If not equal, read the value at top_addr
+    // into end.
+    casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
+    // if someone beat us on the allocation, try again, otherwise continue
+    cmp(obj, end);
+    brx(Assembler::notEqual, false, Assembler::pn, retry);
+    delayed()->mov(end, obj);                              // nop if successfull since obj == end
 
 #ifdef ASSERT
-  // make sure eden top is properly aligned
-  {
-    Label L;
-    const Register top_addr = t1;
-
-    set((intx)ch->top_addr(), top_addr);
-    ld_ptr(top_addr, 0, top_addr);
-    btst(MinObjAlignmentInBytesMask, top_addr);
-    br(Assembler::zero, false, Assembler::pt, L);
-    delayed()->nop();
-    stop("eden top is not properly aligned");
-    bind(L);
+    // make sure eden top is properly aligned
+    {
+      Label L;
+      const Register top_addr = t1;
+
+      set((intx)ch->top_addr(), top_addr);
+      ld_ptr(top_addr, 0, top_addr);
+      btst(MinObjAlignmentInBytesMask, top_addr);
+      br(Assembler::zero, false, Assembler::pt, L);
+      delayed()->nop();
+      stop("eden top is not properly aligned");
+      bind(L);
+    }
+#endif // ASSERT
   }
-#endif // ASSERT
 }
 
 
@@ -3537,6 +3576,468 @@
   }
 }
 
+///////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+static uint num_stores = 0;
+static uint num_null_pre_stores = 0;
+
+static void count_null_pre_vals(void* pre_val) {
+  num_stores++;
+  if (pre_val == NULL) num_null_pre_stores++;
+  if ((num_stores % 1000000) == 0) {
+    tty->print_cr(UINT32_FORMAT " stores, " UINT32_FORMAT " (%5.2f%%) with null pre-vals.",
+                  num_stores, num_null_pre_stores,
+                  100.0*(float)num_null_pre_stores/(float)num_stores);
+  }
+}
+
+static address satb_log_enqueue_with_frame = 0;
+static u_char* satb_log_enqueue_with_frame_end = 0;
+
+static address satb_log_enqueue_frameless = 0;
+static u_char* satb_log_enqueue_frameless_end = 0;
+
+static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
+
+// The calls to this don't work.  We'd need to do a fair amount of work to
+// make it work.
+static void check_index(int ind) {
+  assert(0 <= ind && ind <= 64*K && ((ind % oopSize) == 0),
+         "Invariants.")
+}
+
+static void generate_satb_log_enqueue(bool with_frame) {
+  BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
+  CodeBuffer buf(bb->instructions_begin(), bb->instructions_size());
+  MacroAssembler masm(&buf);
+  address start = masm.pc();
+  Register pre_val;
+
+  Label refill, restart;
+  if (with_frame) {
+    masm.save_frame(0);
+    pre_val = I0;  // Was O0 before the save.
+  } else {
+    pre_val = O0;
+  }
+  int satb_q_index_byte_offset =
+    in_bytes(JavaThread::satb_mark_queue_offset() +
+             PtrQueue::byte_offset_of_index());
+  int satb_q_buf_byte_offset =
+    in_bytes(JavaThread::satb_mark_queue_offset() +
+             PtrQueue::byte_offset_of_buf());
+  assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) &&
+         in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
+         "check sizes in assembly below");
+
+  masm.bind(restart);
+  masm.ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
+
+  masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill);
+  // If the branch is taken, no harm in executing this in the delay slot.
+  masm.delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
+  masm.sub(L0, oopSize, L0);
+
+  masm.st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
+  if (!with_frame) {
+    // Use return-from-leaf
+    masm.retl();
+    masm.delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
+  } else {
+    // Not delayed.
+    masm.st_ptr(L0, G2_thread, satb_q_index_byte_offset);
+  }
+  if (with_frame) {
+    masm.ret();
+    masm.delayed()->restore();
+  }
+  masm.bind(refill);
+
+  address handle_zero =
+    CAST_FROM_FN_PTR(address,
+                     &SATBMarkQueueSet::handle_zero_index_for_thread);
+  // This should be rare enough that we can afford to save all the
+  // scratch registers that the calling context might be using.
+  masm.mov(G1_scratch, L0);
+  masm.mov(G3_scratch, L1);
+  masm.mov(G4, L2);
+  // We need the value of O0 above (for the write into the buffer), so we
+  // save and restore it.
+  masm.mov(O0, L3);
+  // Since the call will overwrite O7, we save and restore that, as well.
+  masm.mov(O7, L4);
+  masm.call_VM_leaf(L5, handle_zero, G2_thread);
+  masm.mov(L0, G1_scratch);
+  masm.mov(L1, G3_scratch);
+  masm.mov(L2, G4);
+  masm.mov(L3, O0);
+  masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+  masm.delayed()->mov(L4, O7);
+
+  if (with_frame) {
+    satb_log_enqueue_with_frame = start;
+    satb_log_enqueue_with_frame_end = masm.pc();
+  } else {
+    satb_log_enqueue_frameless = start;
+    satb_log_enqueue_frameless_end = masm.pc();
+  }
+}
+
+static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) {
+  if (with_frame) {
+    if (satb_log_enqueue_with_frame == 0) {
+      generate_satb_log_enqueue(with_frame);
+      assert(satb_log_enqueue_with_frame != 0, "postcondition.");
+      if (G1SATBPrintStubs) {
+        tty->print_cr("Generated with-frame satb enqueue:");
+        Disassembler::decode((u_char*)satb_log_enqueue_with_frame,
+                             satb_log_enqueue_with_frame_end,
+                             tty);
+      }
+    }
+  } else {
+    if (satb_log_enqueue_frameless == 0) {
+      generate_satb_log_enqueue(with_frame);
+      assert(satb_log_enqueue_frameless != 0, "postcondition.");
+      if (G1SATBPrintStubs) {
+        tty->print_cr("Generated frameless satb enqueue:");
+        Disassembler::decode((u_char*)satb_log_enqueue_frameless,
+                             satb_log_enqueue_frameless_end,
+                             tty);
+      }
+    }
+  }
+}
+
+void MacroAssembler::g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs) {
+  assert(offset == 0 || index == noreg, "choose one");
+
+  if (G1DisablePreBarrier) return;
+  // satb_log_barrier(tmp, obj, offset, preserve_o_regs);
+  Label filtered;
+  // satb_log_barrier_work0(tmp, filtered);
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    ld(G2,
+       in_bytes(JavaThread::satb_mark_queue_offset() +
+                PtrQueue::byte_offset_of_active()),
+       tmp);
+  } else {
+    guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
+              "Assumption");
+    ldsb(G2,
+         in_bytes(JavaThread::satb_mark_queue_offset() +
+                  PtrQueue::byte_offset_of_active()),
+         tmp);
+  }
+  // Check on whether to annul.
+  br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
+  delayed() -> nop();
+
+  // satb_log_barrier_work1(tmp, offset);
+  if (index == noreg) {
+    if (Assembler::is_simm13(offset)) {
+      ld_ptr(obj, offset, tmp);
+    } else {
+      set(offset, tmp);
+      ld_ptr(obj, tmp, tmp);
+    }
+  } else {
+    ld_ptr(obj, index, tmp);
+  }
+
+  // satb_log_barrier_work2(obj, tmp, offset);
+
+  // satb_log_barrier_work3(tmp, filtered, preserve_o_regs);
+
+  const Register pre_val = tmp;
+
+  if (G1SATBBarrierPrintNullPreVals) {
+    save_frame(0);
+    mov(pre_val, O0);
+    // Save G-regs that target may use.
+    mov(G1, L1);
+    mov(G2, L2);
+    mov(G3, L3);
+    mov(G4, L4);
+    mov(G5, L5);
+    call(CAST_FROM_FN_PTR(address, &count_null_pre_vals));
+    delayed()->nop();
+    // Restore G-regs that target may have used.
+    mov(L1, G1);
+    mov(L2, G2);
+    mov(L3, G3);
+    mov(L4, G4);
+    mov(L5, G5);
+    restore(G0, G0, G0);
+  }
+
+  // Check on whether to annul.
+  br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);
+  delayed() -> nop();
+
+  // OK, it's not filtered, so we'll need to call enqueue.  In the normal
+  // case, pre_val will be a scratch G-reg, but there's some cases in which
+  // it's an O-reg.  In the first case, do a normal call.  In the latter,
+  // do a save here and call the frameless version.
+
+  guarantee(pre_val->is_global() || pre_val->is_out(),
+            "Or we need to think harder.");
+  if (pre_val->is_global() && !preserve_o_regs) {
+    generate_satb_log_enqueue_if_necessary(true); // with frame.
+    call(satb_log_enqueue_with_frame);
+    delayed()->mov(pre_val, O0);
+  } else {
+    generate_satb_log_enqueue_if_necessary(false); // with frameless.
+    save_frame(0);
+    call(satb_log_enqueue_frameless);
+    delayed()->mov(pre_val->after_save(), O0);
+    restore();
+  }
+
+  bind(filtered);
+}
+
+static jint num_ct_writes = 0;
+static jint num_ct_writes_filtered_in_hr = 0;
+static jint num_ct_writes_filtered_null = 0;
+static jint num_ct_writes_filtered_pop = 0;
+static G1CollectedHeap* g1 = NULL;
+
+static Thread* count_ct_writes(void* filter_val, void* new_val) {
+  Atomic::inc(&num_ct_writes);
+  if (filter_val == NULL) {
+    Atomic::inc(&num_ct_writes_filtered_in_hr);
+  } else if (new_val == NULL) {
+    Atomic::inc(&num_ct_writes_filtered_null);
+  } else {
+    if (g1 == NULL) {
+      g1 = G1CollectedHeap::heap();
+    }
+    if ((HeapWord*)new_val < g1->popular_object_boundary()) {
+      Atomic::inc(&num_ct_writes_filtered_pop);
+    }
+  }
+  if ((num_ct_writes % 1000000) == 0) {
+    jint num_ct_writes_filtered =
+      num_ct_writes_filtered_in_hr +
+      num_ct_writes_filtered_null +
+      num_ct_writes_filtered_pop;
+
+    tty->print_cr("%d potential CT writes: %5.2f%% filtered\n"
+                  "   (%5.2f%% intra-HR, %5.2f%% null, %5.2f%% popular).",
+                  num_ct_writes,
+                  100.0*(float)num_ct_writes_filtered/(float)num_ct_writes,
+                  100.0*(float)num_ct_writes_filtered_in_hr/
+                  (float)num_ct_writes,
+                  100.0*(float)num_ct_writes_filtered_null/
+                  (float)num_ct_writes,
+                  100.0*(float)num_ct_writes_filtered_pop/
+                  (float)num_ct_writes);
+  }
+  return Thread::current();
+}
+
+static address dirty_card_log_enqueue = 0;
+static u_char* dirty_card_log_enqueue_end = 0;
+
+// This gets to assume that o0 contains the object address.
+static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
+  BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
+  CodeBuffer buf(bb->instructions_begin(), bb->instructions_size());
+  MacroAssembler masm(&buf);
+  address start = masm.pc();
+
+  Label not_already_dirty, restart, refill;
+
+#ifdef _LP64
+  masm.srlx(O0, CardTableModRefBS::card_shift, O0);
+#else
+  masm.srl(O0, CardTableModRefBS::card_shift, O0);
+#endif
+  Address rs(O1, (address)byte_map_base);
+  masm.load_address(rs); // O1 := <card table base>
+  masm.ldub(O0, O1, O2); // O2 := [O0 + O1]
+
+  masm.br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
+                      O2, not_already_dirty);
+  // Get O1 + O2 into a reg by itself -- useful in the take-the-branch
+  // case, harmless if not.
+  masm.delayed()->add(O0, O1, O3);
+
+  // We didn't take the branch, so we're already dirty: return.
+  // Use return-from-leaf
+  masm.retl();
+  masm.delayed()->nop();
+
+  // Not dirty.
+  masm.bind(not_already_dirty);
+  // First, dirty it.
+  masm.stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
+  int dirty_card_q_index_byte_offset =
+    in_bytes(JavaThread::dirty_card_queue_offset() +
+             PtrQueue::byte_offset_of_index());
+  int dirty_card_q_buf_byte_offset =
+    in_bytes(JavaThread::dirty_card_queue_offset() +
+             PtrQueue::byte_offset_of_buf());
+  masm.bind(restart);
+  masm.ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
+
+  masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
+                      L0, refill);
+  // If the branch is taken, no harm in executing this in the delay slot.
+  masm.delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
+  masm.sub(L0, oopSize, L0);
+
+  masm.st_ptr(O3, L1, L0);  // [_buf + index] := I0
+  // Use return-from-leaf
+  masm.retl();
+  masm.delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
+
+  masm.bind(refill);
+  address handle_zero =
+    CAST_FROM_FN_PTR(address,
+                     &DirtyCardQueueSet::handle_zero_index_for_thread);
+  // This should be rare enough that we can afford to save all the
+  // scratch registers that the calling context might be using.
+  masm.mov(G1_scratch, L3);
+  masm.mov(G3_scratch, L5);
+  // We need the value of O3 above (for the write into the buffer), so we
+  // save and restore it.
+  masm.mov(O3, L6);
+  // Since the call will overwrite O7, we save and restore that, as well.
+  masm.mov(O7, L4);
+
+  masm.call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
+  masm.mov(L3, G1_scratch);
+  masm.mov(L5, G3_scratch);
+  masm.mov(L6, O3);
+  masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+  masm.delayed()->mov(L4, O7);
+
+  dirty_card_log_enqueue = start;
+  dirty_card_log_enqueue_end = masm.pc();
+  // XXX Should have a guarantee here about not going off the end!
+  // Does it already do so?  Do an experiment...
+}
+
+static inline void
+generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) {
+  if (dirty_card_log_enqueue == 0) {
+    generate_dirty_card_log_enqueue(byte_map_base);
+    assert(dirty_card_log_enqueue != 0, "postcondition.");
+    if (G1SATBPrintStubs) {
+      tty->print_cr("Generated dirty_card enqueue:");
+      Disassembler::decode((u_char*)dirty_card_log_enqueue,
+                           dirty_card_log_enqueue_end,
+                           tty);
+    }
+  }
+}
+
+
+void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
+
+  Label filtered;
+  MacroAssembler* post_filter_masm = this;
+
+  if (new_val == G0) return;
+  if (G1DisablePostBarrier) return;
+
+  G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::G1SATBCT ||
+         bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier");
+  if (G1RSBarrierRegionFilter) {
+    xor3(store_addr, new_val, tmp);
+#ifdef _LP64
+    srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
+#else
+    srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
+#endif
+    if (G1PrintCTFilterStats) {
+      guarantee(tmp->is_global(), "Or stats won't work...");
+      // This is a sleazy hack: I'm temporarily hijacking G2, which I
+      // promise to restore.
+      mov(new_val, G2);
+      save_frame(0);
+      mov(tmp, O0);
+      mov(G2, O1);
+      // Save G-regs that target may use.
+      mov(G1, L1);
+      mov(G2, L2);
+      mov(G3, L3);
+      mov(G4, L4);
+      mov(G5, L5);
+      call(CAST_FROM_FN_PTR(address, &count_ct_writes));
+      delayed()->nop();
+      mov(O0, G2);
+      // Restore G-regs that target may have used.
+      mov(L1, G1);
+      mov(L3, G3);
+      mov(L4, G4);
+      mov(L5, G5);
+      restore(G0, G0, G0);
+    }
+    // XXX Should I predict this taken or not?  Does it mattern?
+    br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
+    delayed()->nop();
+  }
+
+  // Now we decide how to generate the card table write.  If we're
+  // enqueueing, we call out to a generated function.  Otherwise, we do it
+  // inline here.
+
+  if (G1RSBarrierUseQueue) {
+    // If the "store_addr" register is an "in" or "local" register, move it to
+    // a scratch reg so we can pass it as an argument.
+    bool use_scr = !(store_addr->is_global() || store_addr->is_out());
+    // Pick a scratch register different from "tmp".
+    Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
+    // Make sure we use up the delay slot!
+    if (use_scr) {
+      post_filter_masm->mov(store_addr, scr);
+    } else {
+      post_filter_masm->nop();
+    }
+    generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
+    save_frame(0);
+    call(dirty_card_log_enqueue);
+    if (use_scr) {
+      delayed()->mov(scr, O0);
+    } else {
+      delayed()->mov(store_addr->after_save(), O0);
+    }
+    restore();
+
+  } else {
+
+#ifdef _LP64
+    post_filter_masm->srlx(store_addr, CardTableModRefBS::card_shift, store_addr);
+#else
+    post_filter_masm->srl(store_addr, CardTableModRefBS::card_shift, store_addr);
+#endif
+    assert( tmp != store_addr, "need separate temp reg");
+    Address rs(tmp, (address)bs->byte_map_base);
+    load_address(rs);
+    stb(G0, rs.base(), store_addr);
+  }
+
+  bind(filtered);
+
+}
+
+#endif  // SERIALGC
+///////////////////////////////////////////////////////////////////////////////////
+
+void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
+  // If we're writing constant NULL, we can skip the write barrier.
+  if (new_val == G0) return;
+  CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::CardTableModRef ||
+         bs->kind() == BarrierSet::CardTableExtension, "wrong barrier");
+  card_table_write(bs->byte_map_base, tmp, store_addr);
+}
+
 void MacroAssembler::load_klass(Register s, Register d) {
   // The number of bytes in this code is used by
   // MachCallDynamicJavaNode::ret_addr_offset()
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/sparc/vm/assembler_sparc.hpp
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -1439,7 +1439,11 @@
   // pp 214
 
   void save(    Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | rs2(s2) ); }
-  void save(    Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void save(    Register s1, int simm13a, Register d ) {
+    // make sure frame is at least large enough for the register save area
+    assert(-simm13a >= 16 * wordSize, "frame too small");
+    emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) );
+  }
 
   void restore( Register s1 = G0,  Register s2 = G0, Register d = G0 ) { emit_long( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | rs2(s2) ); }
   void restore( Register s1,       int simm13a,      Register d      ) { emit_long( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
@@ -1594,6 +1598,11 @@
   inline void wrasi(  Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(3, 29, 25)); }
   inline void wrfprs( Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); }
 
+  // For a given register condition, return the appropriate condition code
+  // Condition (the one you would use to get the same effect after "tst" on
+  // the target register.)
+  Assembler::Condition reg_cond_to_cc_cond(RCondition in);
+
 
   // Creation
   Assembler(CodeBuffer* code) : AbstractAssembler(code) {
@@ -1630,6 +1639,8 @@
 
   // restore global registers in case C code disturbed them
   static void restore_registers(MacroAssembler* a, Register r);
+
+
 };
 
 
@@ -1722,6 +1733,12 @@
   void br_null   ( Register s1, bool a, Predict p, Label& L );
   void br_notnull( Register s1, bool a, Predict p, Label& L );
 
+  // These versions will do the most efficient thing on v8 and v9.  Perhaps
+  // this is what the routine above was meant to do, but it didn't (and
+  // didn't cover both target address kinds.)
+  void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none );
+  void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, Label& L);
+
   inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
   inline void bp( Condition c, bool a, CC cc, Predict p, Label& L );
 
@@ -2055,9 +2072,23 @@
 #endif // ASSERT
 
  public:
-  // Stores
-  void store_check(Register tmp, Register obj);                // store check for obj - register is destroyed afterwards
-  void store_check(Register tmp, Register obj, Register offset); // store check for obj - register is destroyed afterwards
+
+  // Write to card table for - register is destroyed afterwards.
+  void card_table_write(jbyte* byte_map_base, Register tmp, Register obj);
+
+  void card_write_barrier_post(Register store_addr, Register new_val, Register tmp);
+
+#ifndef SERIALGC
+  // Array store and offset
+  void g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs);
+
+  void g1_write_barrier_post(Register store_addr, Register new_val, Register tmp);
+
+  // May do filtering, depending on the boolean arguments.
+  void g1_card_table_write(jbyte* byte_map_base,
+                           Register tmp, Register obj, Register new_val,
+                           bool region_filter, bool null_filter);
+#endif // SERIALGC
 
   // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
   void push_fTOS();
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp
--- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -404,4 +404,55 @@
 }
 
 
+///////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  assert(pre_val()->is_register(), "Precondition.");
+
+  Register pre_val_reg = pre_val()->as_register();
+
+  ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false);
+  __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
+                    pre_val_reg, _continuation);
+  __ delayed()->nop();
+
+  __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id));
+  __ delayed()->mov(pre_val_reg, G4);
+  __ br(Assembler::always, false, Assembler::pt, _continuation);
+  __ delayed()->nop();
+
+}
+
+jbyte* G1PostBarrierStub::_byte_map_base = NULL;
+
+jbyte* G1PostBarrierStub::byte_map_base_slow() {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->is_a(BarrierSet::G1SATBCTLogging),
+         "Must be if we're using this.");
+  return ((G1SATBCardTableModRefBS*)bs)->byte_map_base;
+}
+
+void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  assert(addr()->is_register(), "Precondition.");
+  assert(new_val()->is_register(), "Precondition.");
+  Register addr_reg = addr()->as_pointer_register();
+  Register new_val_reg = new_val()->as_register();
+  __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
+                    new_val_reg, _continuation);
+  __ delayed()->nop();
+
+  __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_post_barrier_slow_id));
+  __ delayed()->mov(addr_reg, G4);
+  __ br(Assembler::always, false, Assembler::pt, _continuation);
+  __ delayed()->nop();
+}
+
+#endif // SERIALGC
+///////////////////////////////////////////////////////////////////////////////////
+
 #undef __
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp
--- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -2093,7 +2093,11 @@
   // the known type isn't loaded since the code sanity checks
   // in debug mode and the type isn't required when we know the exact type
   // also check that the type is an array type.
-  if (op->expected_type() == NULL) {
+  // We also, for now, always call the stub if the barrier set requires a
+  // write_ref_pre barrier (which the stub does, but none of the optimized
+  // cases currently does).
+  if (op->expected_type() == NULL ||
+      Universe::heap()->barrier_set()->has_write_ref_pre_barrier()) {
     __ mov(src,     O0);
     __ mov(src_pos, O1);
     __ mov(dst,     O2);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp
--- a/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -365,6 +365,10 @@
     __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3, store_check_info);
   }
 
+  if (obj_store) {
+    // Needs GC write barriers.
+    pre_barrier(LIR_OprFact::address(array_addr), false, NULL);
+  }
   __ move(value.result(), array_addr, null_check_info);
   if (obj_store) {
     // Is this precise?
@@ -663,6 +667,10 @@
 
   __ add(obj.result(), offset.result(), addr);
 
+  if (type == objectType) {  // Write-barrier needed for Object fields.
+    pre_barrier(obj.result(), false, NULL);
+  }
+
   if (type == objectType)
     __ cas_obj(addr, cmp.result(), val.result(), t1, t2);
   else if (type == intType)
@@ -677,7 +685,11 @@
   LIR_Opr result = rlock_result(x);
   __ cmove(lir_cond_equal, LIR_OprFact::intConst(1), LIR_OprFact::intConst(0), result);
   if (type == objectType) {  // Write-barrier needed for Object fields.
+#ifdef PRECISE_CARDMARK
+    post_barrier(addr, val.result());
+#else
     post_barrier(obj.result(), val.result());
+#endif // PRECISE_CARDMARK
   }
 }
 
@@ -1153,6 +1165,10 @@
         addr = new LIR_Address(base_op, index_op, type);
       }
 
+      if (is_obj) {
+        pre_barrier(LIR_OprFact::address(addr), false, NULL);
+        // _bs->c1_write_barrier_pre(this, LIR_OprFact::address(addr));
+      }
       __ move(data, addr);
       if (is_obj) {
         // This address is precise
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/sparc/vm/c1_Runtime1_sparc.cpp
--- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -832,6 +832,163 @@
       }
       break;
 
+#ifndef SERIALGC
+    case g1_pre_barrier_slow_id:
+      { // G4: previous value of memory
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+          __ save_frame(0);
+          __ set((int)id, O1);
+          __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0);
+          __ should_not_reach_here();
+          break;
+        }
+
+        __ set_info("g1_pre_barrier_slow_id", dont_gc_arguments);
+
+        Register pre_val = G4;
+        Register tmp  = G1_scratch;
+        Register tmp2 = G3_scratch;
+
+        Label refill, restart;
+        bool with_frame = false; // I don't know if we can do with-frame.
+        int satb_q_index_byte_offset =
+          in_bytes(JavaThread::satb_mark_queue_offset() +
+                   PtrQueue::byte_offset_of_index());
+        int satb_q_buf_byte_offset =
+          in_bytes(JavaThread::satb_mark_queue_offset() +
+                   PtrQueue::byte_offset_of_buf());
+        __ bind(restart);
+        __ ld_ptr(G2_thread, satb_q_index_byte_offset, tmp);
+
+        __ br_on_reg_cond(Assembler::rc_z, /*annul*/false,
+                          Assembler::pn, tmp, refill);
+
+        // If the branch is taken, no harm in executing this in the delay slot.
+        __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2);
+        __ sub(tmp, oopSize, tmp);
+
+        __ st_ptr(pre_val, tmp2, tmp);  // [_buf + index] := <address_of_card>
+        // Use return-from-leaf
+        __ retl();
+        __ delayed()->st_ptr(tmp, G2_thread, satb_q_index_byte_offset);
+
+        __ bind(refill);
+        __ save_frame(0);
+
+        __ mov(pre_val, L0);
+        __ mov(tmp,     L1);
+        __ mov(tmp2,    L2);
+
+        __ call_VM_leaf(L7_thread_cache,
+                        CAST_FROM_FN_PTR(address,
+                                         SATBMarkQueueSet::handle_zero_index_for_thread),
+                                         G2_thread);
+
+        __ mov(L0, pre_val);
+        __ mov(L1, tmp);
+        __ mov(L2, tmp2);
+
+        __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+        __ delayed()->restore();
+      }
+      break;
+
+    case g1_post_barrier_slow_id:
+      {
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+          __ save_frame(0);
+          __ set((int)id, O1);
+          __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0);
+          __ should_not_reach_here();
+          break;
+        }
+
+        __ set_info("g1_post_barrier_slow_id", dont_gc_arguments);
+
+        Register addr = G4;
+        Register cardtable = G5;
+        Register tmp  = G1_scratch;
+        Register tmp2 = G3_scratch;
+        jbyte* byte_map_base = ((CardTableModRefBS*)bs)->byte_map_base;
+
+        Label not_already_dirty, restart, refill;
+
+#ifdef _LP64
+        __ srlx(addr, CardTableModRefBS::card_shift, addr);
+#else
+        __ srl(addr, CardTableModRefBS::card_shift, addr);
+#endif
+
+        Address rs(cardtable, (address)byte_map_base);
+        __ load_address(rs); // cardtable := <card table base>
+        __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]
+
+        __ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
+                          tmp, not_already_dirty);
+        // Get cardtable + tmp into a reg by itself -- useful in the take-the-branch
+        // case, harmless if not.
+        __ delayed()->add(addr, cardtable, tmp2);
+
+        // We didn't take the branch, so we're already dirty: return.
+        // Use return-from-leaf
+        __ retl();
+        __ delayed()->nop();
+
+        // Not dirty.
+        __ bind(not_already_dirty);
+        // First, dirty it.
+        __ stb(G0, tmp2, 0);  // [cardPtr] := 0  (i.e., dirty).
+
+        Register tmp3 = cardtable;
+        Register tmp4 = tmp;
+
+        // these registers are now dead
+        addr = cardtable = tmp = noreg;
+
+        int dirty_card_q_index_byte_offset =
+          in_bytes(JavaThread::dirty_card_queue_offset() +
+                   PtrQueue::byte_offset_of_index());
+        int dirty_card_q_buf_byte_offset =
+          in_bytes(JavaThread::dirty_card_queue_offset() +
+                   PtrQueue::byte_offset_of_buf());
+        __ bind(restart);
+        __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, tmp3);
+
+        __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
+                          tmp3, refill);
+        // If the branch is taken, no harm in executing this in the delay slot.
+        __ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4);
+        __ sub(tmp3, oopSize, tmp3);
+
+        __ st_ptr(tmp2, tmp4, tmp3);  // [_buf + index] := <address_of_card>
+        // Use return-from-leaf
+        __ retl();
+        __ delayed()->st_ptr(tmp3, G2_thread, dirty_card_q_index_byte_offset);
+
+        __ bind(refill);
+        __ save_frame(0);
+
+        __ mov(tmp2, L0);
+        __ mov(tmp3, L1);
+        __ mov(tmp4, L2);
+
+        __ call_VM_leaf(L7_thread_cache,
+                        CAST_FROM_FN_PTR(address,
+                                         DirtyCardQueueSet::handle_zero_index_for_thread),
+                                         G2_thread);
+
+        __ mov(L0, tmp2);
+        __ mov(L1, tmp3);
+        __ mov(L2, tmp4);
+
+        __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+        __ delayed()->restore();
+      }
+      break;
+#endif // !SERIALGC
+
     default:
       { __ set_info("unimplemented entry", dont_gc_arguments);
         __ save_frame(0);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/sparc/vm/sharedRuntime_sparc.cpp
--- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -699,17 +699,16 @@
 // Stores long into offset pointed to by base
 void AdapterGenerator::store_c2i_long(Register r, Register base,
                                       const int st_off, bool is_stack) {
-#ifdef COMPILER2
 #ifdef _LP64
   // In V9, longs are given 2 64-bit slots in the interpreter, but the
   // data is passed in only 1 slot.
   __ stx(r, base, next_arg_slot(st_off));
 #else
+#ifdef COMPILER2
   // Misaligned store of 64-bit data
   __ stw(r, base, arg_slot(st_off));    // lo bits
   __ srlx(r, 32, r);
   __ stw(r, base, next_arg_slot(st_off));  // hi bits
-#endif // _LP64
 #else
   if (is_stack) {
     // Misaligned store of 64-bit data
@@ -721,6 +720,7 @@
     __ stw(r             , base, next_arg_slot(st_off)); // hi bits
   }
 #endif // COMPILER2
+#endif // _LP64
   tag_c2i_arg(frame::TagCategory2, base, st_off, r);
 }
 
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/sparc/vm/stubGenerator_sparc.cpp
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -1110,30 +1110,31 @@
   //  The input registers are overwritten.
   //
   void gen_write_ref_array_pre_barrier(Register addr, Register count) {
-#if 0 // G1 only
     BarrierSet* bs = Universe::heap()->barrier_set();
     if (bs->has_write_ref_pre_barrier()) {
       assert(bs->has_write_ref_array_pre_opt(),
              "Else unsupported barrier set.");
 
-      assert(addr->is_global() && count->is_global(),
-             "If not, then we have to fix this code to handle more "
-             "general cases.");
-      // Get some new fresh output registers.
       __ save_frame(0);
       // Save the necessary global regs... will be used after.
-      __ mov(addr, L0);
-      __ mov(count, L1);
-
-      __ mov(addr, O0);
+      if (addr->is_global()) {
+        __ mov(addr, L0);
+      }
+      if (count->is_global()) {
+        __ mov(count, L1);
+      }
+      __ mov(addr->after_save(), O0);
       // Get the count into O1
       __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
-      __ delayed()->mov(count, O1);
-      __ mov(L0, addr);
-      __ mov(L1, count);
+      __ delayed()->mov(count->after_save(), O1);
+      if (addr->is_global()) {
+        __ mov(L0, addr);
+      }
+      if (count->is_global()) {
+        __ mov(L1, count);
+      }
       __ restore();
     }
-#endif // 0
   }
   //
   //  Generate post-write barrier for array.
@@ -1150,22 +1151,17 @@
     BarrierSet* bs = Universe::heap()->barrier_set();
 
     switch (bs->kind()) {
-#if 0 // G1 - only
       case BarrierSet::G1SATBCT:
       case BarrierSet::G1SATBCTLogging:
         {
-          assert(addr->is_global() && count->is_global(),
-                 "If not, then we have to fix this code to handle more "
-                 "general cases.");
           // Get some new fresh output registers.
           __ save_frame(0);
-          __ mov(addr, O0);
+          __ mov(addr->after_save(), O0);
           __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
-          __ delayed()->mov(count, O1);
+          __ delayed()->mov(count->after_save(), O1);
           __ restore();
         }
         break;
-#endif // 0 G1 - only
       case BarrierSet::CardTableModRef:
       case BarrierSet::CardTableExtension:
         {
@@ -2412,8 +2408,7 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
 
-    gen_write_ref_array_pre_barrier(G1, G5);
-
+    gen_write_ref_array_pre_barrier(O1, O2);
 
 #ifdef ASSERT
     // We sometimes save a frame (see partial_subtype_check below).
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/sparc/vm/templateTable_sparc.cpp
--- a/src/cpu/sparc/vm/templateTable_sparc.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/sparc/vm/templateTable_sparc.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -28,6 +28,79 @@
 #ifndef CC_INTERP
 #define __ _masm->
 
+// Misc helpers
+
+// Do an oop store like *(base + index + offset) = val
+// index can be noreg,
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+                         Register base,
+                         Register index,
+                         int offset,
+                         Register val,
+                         Register tmp,
+                         BarrierSet::Name barrier,
+                         bool precise) {
+  assert(tmp != val && tmp != base && tmp != index, "register collision");
+  assert(index == noreg || offset == 0, "only one offset");
+  switch (barrier) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      {
+        __ g1_write_barrier_pre( base, index, offset, tmp, /*preserve_o_regs*/true);
+        if (index == noreg ) {
+          assert(Assembler::is_simm13(offset), "fix this code");
+          __ store_heap_oop(val, base, offset);
+        } else {
+          __ store_heap_oop(val, base, index);
+        }
+
+        // No need for post barrier if storing NULL
+        if (val != G0) {
+          if (precise) {
+            if (index == noreg) {
+              __ add(base, offset, base);
+            } else {
+              __ add(base, index, base);
+            }
+          }
+          __ g1_write_barrier_post(base, val, tmp);
+        }
+      }
+      break;
+#endif // SERIALGC
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      {
+        if (index == noreg ) {
+          assert(Assembler::is_simm13(offset), "fix this code");
+          __ store_heap_oop(val, base, offset);
+        } else {
+          __ store_heap_oop(val, base, index);
+        }
+        // No need for post barrier if storing NULL
+        if (val != G0) {
+          if (precise) {
+            if (index == noreg) {
+              __ add(base, offset, base);
+            } else {
+              __ add(base, index, base);
+            }
+          }
+          __ card_write_barrier_post(base, val, tmp);
+        }
+      }
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      ShouldNotReachHere();
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
+
 
 //----------------------------------------------------------------------------------------------------
 // Platform-dependent initialization
@@ -758,6 +831,8 @@
   // O4:        array element klass
   // O5:        value klass
 
+  // Address element(O1, 0, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
   // Generate a fast subtype check.  Branch to store_ok if no
   // failure.  Throw if failure.
   __ gen_subtype_check( O5, O4, G3_scratch, G4_scratch, G1_scratch, store_ok );
@@ -767,18 +842,14 @@
 
   // Store is OK.
   __ bind(store_ok);
-  __ store_heap_oop(Otos_i, O1, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-  // Quote from rememberedSet.hpp: For objArrays, the precise card
-  // corresponding to the pointer store is dirtied so we don't need to
-  // scavenge the entire array.
-  Address element(O1, 0, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-  __ add(element, O1);              // address the element precisely
-  __ store_check(G3_scratch, O1);
+  do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Otos_i, G3_scratch, _bs->kind(), true);
+
   __ ba(false,done);
   __ delayed()->inc(Lesp, 3* Interpreter::stackElementSize()); // adj sp (pops array, index and value)
 
   __ bind(is_null);
-  __ store_heap_oop(Otos_i, element);
+  do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), G0, G4_scratch, _bs->kind(), true);
+
   __ profile_null_seen(G3_scratch);
   __ inc(Lesp, 3* Interpreter::stackElementSize());     // adj sp (pops array, index and value)
   __ bind(done);
@@ -2449,8 +2520,9 @@
     // atos
     __ pop_ptr();
     __ verify_oop(Otos_i);
-    __ store_heap_oop(Otos_i, Rclass, Roffset);
-    __ store_check(G1_scratch, Rclass, Roffset);
+
+    do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
+
     __ ba(false, checkVolatile);
     __ delayed()->tst(Lscratch);
 
@@ -2491,8 +2563,9 @@
     __ pop_ptr();
     pop_and_check_object(Rclass);
     __ verify_oop(Otos_i);
-    __ store_heap_oop(Otos_i, Rclass, Roffset);
-    __ store_check(G1_scratch, Rclass, Roffset);
+
+    do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
+
     patch_bytecode(Bytecodes::_fast_aputfield, G3_scratch, G4_scratch);
     __ ba(false, checkVolatile);
     __ delayed()->tst(Lscratch);
@@ -2646,8 +2719,7 @@
       __ stf(FloatRegisterImpl::D, Ftos_d, Rclass, Roffset);
       break;
     case Bytecodes::_fast_aputfield:
-      __ store_heap_oop(Otos_i, Rclass, Roffset);
-      __ store_check(G1_scratch, Rclass, Roffset);
+      do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
       break;
     default:
       ShouldNotReachHere();
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/assembler_x86_32.cpp
--- a/src/cpu/x86/vm/assembler_x86_32.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/x86/vm/assembler_x86_32.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -3372,13 +3372,142 @@
   call_VM_leaf(entry_point, 3);
 }
 
-
 // Calls to C land
 //
 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded
 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
 // has to be reset to 0. This is required to allow proper stack traversal.
 
+//////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void MacroAssembler::g1_write_barrier_pre(Register obj,
+                                          Register thread,
+                                          Register tmp,
+                                          Register tmp2,
+                                          bool tosca_live) {
+  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_active()));
+
+  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_buf()));
+
+
+  Label done;
+  Label runtime;
+
+  // if (!marking_in_progress) goto done;
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    cmpl(in_progress, 0);
+  } else {
+    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
+    cmpb(in_progress, 0);
+  }
+  jcc(Assembler::equal, done);
+
+  // if (x.f == NULL) goto done;
+  cmpl(Address(obj, 0), NULL_WORD);
+  jcc(Assembler::equal, done);
+
+  // Can we store original value in the thread's buffer?
+
+  movl(tmp2, Address(obj, 0));
+  cmpl(index, 0);
+  jcc(Assembler::equal, runtime);
+  subl(index, wordSize);
+  movl(tmp, buffer);
+  addl(tmp, index);
+  movl(Address(tmp, 0), tmp2);
+  jmp(done);
+  bind(runtime);
+  // save the live input values
+  if(tosca_live) pushl(rax);
+  pushl(obj);
+  pushl(thread);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
+  popl(thread);
+  popl(obj);
+  if(tosca_live) popl(rax);
+  bind(done);
+
+}
+
+void MacroAssembler::g1_write_barrier_post(Register store_addr,
+                                           Register new_val,
+                                           Register thread,
+                                           Register tmp,
+                                           Register tmp2) {
+
+  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       PtrQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       PtrQueue::byte_offset_of_buf()));
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+  Label done;
+  Label runtime;
+
+  // Does store cross heap regions?
+
+  movl(tmp, store_addr); // ebx = edx
+  xorl(tmp, new_val);    // ebx ^= eax
+  shrl(tmp, HeapRegion::LogOfHRGrainBytes); // ebx <<= 9
+  jcc(Assembler::equal, done);
+
+  // crosses regions, storing NULL?
+
+  cmpl(new_val, NULL_WORD);
+  jcc(Assembler::equal, done);
+
+  // storing region crossing non-NULL, is card already dirty?
+
+  const Register card_index = tmp;
+
+  movl(card_index, store_addr);       // ebx = edx
+  shrl(card_index, CardTableModRefBS::card_shift); // ebx >>= 9
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+  ExternalAddress cardtable((address)ct->byte_map_base);
+  Address index(noreg, card_index, Address::times_1);
+  const Register card_addr = tmp;
+  leal(card_addr, as_Address(ArrayAddress(cardtable, index)));
+  cmpb(Address(card_addr, 0), 0);
+  jcc(Assembler::equal, done);
+
+  // storing a region crossing, non-NULL oop, card is clean.
+  // dirty card and log.
+
+  movb(Address(card_addr, 0), 0);
+
+  cmpl(queue_index, 0);
+  jcc(Assembler::equal, runtime);
+  subl(queue_index, wordSize);
+  movl(tmp2, buffer);
+  addl(tmp2, queue_index);
+  movl(Address(tmp2, 0), card_index);
+  jmp(done);
+
+  bind(runtime);
+  // save the live input values
+  pushl(store_addr);
+  pushl(new_val);
+  pushl(thread);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+  popl(thread);
+  popl(new_val);
+  popl(store_addr);
+
+  bind(done);
+
+
+}
+
+#endif // SERIALGC
+//////////////////////////////////////////////////////////////////////////////////
+
+
 void MacroAssembler::store_check(Register obj) {
   // Does a store check for the oop in register obj. The content of
   // register obj is destroyed afterwards.
@@ -4548,29 +4677,33 @@
                                    Register t1, Label& slow_case) {
   assert(obj == rax, "obj must be in rax, for cmpxchg");
   assert_different_registers(obj, var_size_in_bytes, t1);
-  Register end = t1;
-  Label retry;
-  bind(retry);
-  ExternalAddress heap_top((address) Universe::heap()->top_addr());
-  movptr(obj, heap_top);
-  if (var_size_in_bytes == noreg) {
-    leal(end, Address(obj, con_size_in_bytes));
+  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+    jmp(slow_case);
   } else {
-    leal(end, Address(obj, var_size_in_bytes, Address::times_1));
+    Register end = t1;
+    Label retry;
+    bind(retry);
+    ExternalAddress heap_top((address) Universe::heap()->top_addr());
+    movptr(obj, heap_top);
+    if (var_size_in_bytes == noreg) {
+      leal(end, Address(obj, con_size_in_bytes));
+    } else {
+      leal(end, Address(obj, var_size_in_bytes, Address::times_1));
+    }
+    // if end < obj then we wrapped around => object too long => slow case
+    cmpl(end, obj);
+    jcc(Assembler::below, slow_case);
+    cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
+    jcc(Assembler::above, slow_case);
+    // Compare obj with the top addr, and if still equal, store the new top addr in
+    // end at the address of the top addr pointer. Sets ZF if was equal, and clears
+    // it otherwise. Use lock prefix for atomicity on MPs.
+    if (os::is_MP()) {
+      lock();
+    }
+    cmpxchgptr(end, heap_top);
+    jcc(Assembler::notEqual, retry);
   }
-  // if end < obj then we wrapped around => object too long => slow case
-  cmpl(end, obj);
-  jcc(Assembler::below, slow_case);
-  cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
-  jcc(Assembler::above, slow_case);
-  // Compare obj with the top addr, and if still equal, store the new top addr in
-  // end at the address of the top addr pointer. Sets ZF if was equal, and clears
-  // it otherwise. Use lock prefix for atomicity on MPs.
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgptr(end, heap_top);
-  jcc(Assembler::notEqual, retry);
 }
 
 
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/assembler_x86_32.hpp
--- a/src/cpu/x86/vm/assembler_x86_32.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/x86/vm/assembler_x86_32.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -216,9 +216,11 @@
 #endif // ASSERT
 
   // accessors
-  bool uses(Register reg) const {
-    return _base == reg || _index == reg;
-  }
+  bool        uses(Register reg) const { return _base == reg || _index == reg; }
+  Register    base()             const { return _base;  }
+  Register    index()            const { return _index; }
+  ScaleFactor scale()            const { return _scale; }
+  int         disp()             const { return _disp;  }
 
   // Convert the raw encoding form into the form expected by the constructor for
   // Address.  An index of 4 (rsp) corresponds to having no index, so convert
@@ -990,7 +992,8 @@
 // on arguments should also go in here.
 
 class MacroAssembler: public Assembler {
- friend class LIR_Assembler;
+  friend class LIR_Assembler;
+  friend class Runtime1;      // as_Address()
  protected:
 
   Address as_Address(AddressLiteral adr);
@@ -1151,6 +1154,10 @@
   void store_check(Register obj);                // store check for obj - register is destroyed afterwards
   void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
 
+  void g1_write_barrier_pre(Register obj, Register thread, Register tmp, Register tmp2, bool tosca_live );
+  void g1_write_barrier_post(Register store_addr, Register new_val, Register thread, Register tmp, Register tmp2);
+
+
   // split store_check(Register obj) to enhance instruction interleaving
   void store_check_part_1(Register obj);
   void store_check_part_2(Register obj);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/assembler_x86_64.cpp
--- a/src/cpu/x86/vm/assembler_x86_64.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/x86/vm/assembler_x86_64.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -4405,6 +4405,129 @@
   call_VM_leaf(entry_point, 3);
 }
 
+/////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void MacroAssembler::g1_write_barrier_pre(Register obj, Register tmp, Register tmp2, bool tosca_live ) {
+  Address in_progress(r15_thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                           PtrQueue::byte_offset_of_active()));
+
+  Address index(r15_thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                     PtrQueue::byte_offset_of_index()));
+  Address buffer(r15_thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                      PtrQueue::byte_offset_of_buf()));
+
+
+  Label done;
+  Label runtime;
+
+  // if (!marking_in_progress) goto done;
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    cmpl(in_progress, 0);
+  } else {
+    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
+    cmpb(in_progress, 0);
+  }
+  jcc(Assembler::equal, done);
+
+  // if (x.f == NULL) goto done;
+  cmpq(Address(obj, 0), (int)NULL_WORD);
+  jcc(Assembler::equal, done);
+
+  // Can we store original value in the thread's buffer?
+
+  movslq(tmp, index);
+  movq(tmp2, Address(obj, 0));
+  cmpq(tmp, 0);
+  jcc(Assembler::equal, runtime);
+  subq(tmp, wordSize);
+  movl(index, tmp);
+  addq(tmp, buffer);
+  movq(Address(tmp, 0), tmp2);
+  jmp(done);
+  bind(runtime);
+  // save live inputs
+  if (tosca_live) pushq(rax);
+  pushq(obj);
+  movq(c_rarg0, Address(obj, 0));
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread);
+  popq(obj);
+  if (tosca_live) popq(rax);
+  bind(done);
+}
+
+void MacroAssembler::g1_write_barrier_post(Register store_addr,
+                                           Register new_val,
+                                           Register tmp,
+                                           Register tmp2) {
+
+  Address index(r15_thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                     PtrQueue::byte_offset_of_index()));
+  Address buffer(r15_thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                     PtrQueue::byte_offset_of_buf()));
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+  Label done;
+  Label runtime;
+
+  // Does store cross heap regions?
+
+  movq(tmp, store_addr);
+  xorq(tmp, new_val);
+  shrq(tmp, HeapRegion::LogOfHRGrainBytes);
+  jcc(Assembler::equal, done);
+
+  // crosses regions, storing NULL?
+
+  cmpq(new_val, (int)NULL_WORD);
+  jcc(Assembler::equal, done);
+
+  // storing region crossing non-NULL, is card already dirty?
+  const Register card_addr = tmp;
+
+  movq(card_addr, store_addr);
+  shrq(card_addr, CardTableModRefBS::card_shift);
+
+  ExternalAddress  cardtable((address) ct->byte_map_base);
+  lea(tmp2, cardtable);
+
+  // get the address of the card
+  addq(card_addr, tmp2);
+
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+  cmpb(Address(card_addr, 0), 0);
+  jcc(Assembler::equal, done);
+
+  // storing region crossing non-NULL, card is clean.
+  // dirty card and log.
+
+  movb(Address(card_addr, 0), 0);
+
+  cmpl(index, 0);
+  jcc(Assembler::equal, runtime);
+  subl(index, wordSize);
+  movq(tmp2, buffer);
+  movslq(rscratch1, index);
+  addq(tmp2, rscratch1);
+  // log the card
+  movq(Address(tmp2, 0), card_addr);
+  jmp(done);
+
+  bind(runtime);
+  // save live inputs
+  pushq(store_addr);
+  pushq(new_val);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
+  popq(new_val);
+  popq(store_addr);
+
+  bind(done);
+
+
+}
+
+#endif // SERIALGC
+/////////////////////////////////////////////////////////////////////////////
 
 // Calls to C land
 //
@@ -4802,32 +4925,36 @@
                                    Label& slow_case) {
   assert(obj == rax, "obj must be in rax for cmpxchg");
   assert_different_registers(obj, var_size_in_bytes, t1);
-  Register end = t1;
-  Label retry;
-  bind(retry);
-  ExternalAddress heap_top((address) Universe::heap()->top_addr());
-  movptr(obj, heap_top);
-  if (var_size_in_bytes == noreg) {
-    leaq(end, Address(obj, con_size_in_bytes));
+  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+    jmp(slow_case);
   } else {
-    leaq(end, Address(obj, var_size_in_bytes, Address::times_1));
+    Register end = t1;
+    Label retry;
+    bind(retry);
+    ExternalAddress heap_top((address) Universe::heap()->top_addr());
+    movptr(obj, heap_top);
+    if (var_size_in_bytes == noreg) {
+      leaq(end, Address(obj, con_size_in_bytes));
+    } else {
+      leaq(end, Address(obj, var_size_in_bytes, Address::times_1));
+    }
+    // if end < obj then we wrapped around => object too long => slow case
+    cmpq(end, obj);
+    jcc(Assembler::below, slow_case);
+    cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
+
+    jcc(Assembler::above, slow_case);
+    // Compare obj with the top addr, and if still equal, store the new
+    // top addr in end at the address of the top addr pointer. Sets ZF
+    // if was equal, and clears it otherwise. Use lock prefix for
+    // atomicity on MPs.
+    if (os::is_MP()) {
+      lock();
+    }
+    cmpxchgptr(end, heap_top);
+    // if someone beat us on the allocation, try again, otherwise continue
+    jcc(Assembler::notEqual, retry);
   }
-  // if end < obj then we wrapped around => object too long => slow case
-  cmpq(end, obj);
-  jcc(Assembler::below, slow_case);
-  cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
-
-  jcc(Assembler::above, slow_case);
-  // Compare obj with the top addr, and if still equal, store the new
-  // top addr in end at the address of the top addr pointer. Sets ZF
-  // if was equal, and clears it otherwise. Use lock prefix for
-  // atomicity on MPs.
-  if (os::is_MP()) {
-    lock();
-  }
-  cmpxchgptr(end, heap_top);
-  // if someone beat us on the allocation, try again, otherwise continue
-  jcc(Assembler::notEqual, retry);
 }
 
 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/assembler_x86_64.hpp
--- a/src/cpu/x86/vm/assembler_x86_64.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/x86/vm/assembler_x86_64.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -222,6 +222,18 @@
   static Address make_raw(int base, int index, int scale, int disp);
 
   static Address make_array(ArrayAddress);
+  Register base() const {
+    return _base;
+  }
+
+  Register index() const {
+    return _index;
+  }
+
+  int disp() const {
+    return _disp;
+  }
+
 
  private:
   bool base_needs_rex() const {
@@ -1194,6 +1206,9 @@
                                                  // location (reg. is
                                                  // destroyed)
 
+  void g1_write_barrier_pre(Register obj, Register tmp, Register tmp2, bool tosca_live );
+  void g1_write_barrier_post(Register store_addr, Register new_val, Register tmp, Register tmp2);
+
   // split store_check(Register obj) to enhance instruction interleaving
   void store_check_part_1(Register obj);
   void store_check_part_2(Register obj);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/c1_CodeStubs_x86.cpp
--- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -455,5 +455,50 @@
   __ jmp(_continuation);
 }
 
+/////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+
+  // At this point we know that marking is in progress
+
+  __ bind(_entry);
+  assert(pre_val()->is_register(), "Precondition.");
+
+  Register pre_val_reg = pre_val()->as_register();
+
+  ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false);
+
+  __ cmpl(pre_val_reg, NULL_WORD);
+  __ jcc(Assembler::equal, _continuation);
+  ce->store_parameter(pre_val()->as_register(), 0);
+  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id)));
+  __ jmp(_continuation);
+
+}
+
+jbyte* G1PostBarrierStub::_byte_map_base = NULL;
+
+jbyte* G1PostBarrierStub::byte_map_base_slow() {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->is_a(BarrierSet::G1SATBCTLogging),
+         "Must be if we're using this.");
+  return ((G1SATBCardTableModRefBS*)bs)->byte_map_base;
+}
+
+void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  assert(addr()->is_register(), "Precondition.");
+  assert(new_val()->is_register(), "Precondition.");
+  Register new_val_reg = new_val()->as_register();
+  __ cmpl(new_val_reg, NULL_WORD);
+  __ jcc(Assembler::equal, _continuation);
+  ce->store_parameter(addr()->as_register(), 0);
+  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id)));
+  __ jmp(_continuation);
+}
+
+#endif // SERIALGC
+/////////////////////////////////////////////////////////////////////////////
 
 #undef __
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
--- a/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -294,6 +294,8 @@
   }
 
   if (obj_store) {
+    // Needs GC write barriers.
+    pre_barrier(LIR_OprFact::address(array_addr), false, NULL);
     __ move(value.result(), array_addr, null_check_info);
     // Seems to be a precise
     post_barrier(LIR_OprFact::address(array_addr), value.result());
@@ -745,7 +747,10 @@
   __ move(obj.result(), addr);
   __ add(addr, offset.result(), addr);
 
-
+  if (type == objectType) {  // Write-barrier needed for Object fields.
+    // Do the pre-write barrier, if any.
+    pre_barrier(addr, false, NULL);
+  }
 
   LIR_Opr ill = LIR_OprFact::illegalOpr;  // for convenience
   if (type == objectType)
@@ -1250,6 +1255,8 @@
     LIR_Address* addr = new LIR_Address(src, offset, type);
     bool is_obj = (type == T_ARRAY || type == T_OBJECT);
     if (is_obj) {
+      // Do the pre-write barrier, if any.
+      pre_barrier(LIR_OprFact::address(addr), false, NULL);
       __ move(data, addr);
       assert(src->is_register(), "must be register");
       // Seems to be a precise address
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/c1_Runtime1_x86.cpp
--- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -1385,6 +1385,136 @@
       }
       break;
 
+#ifndef SERIALGC
+    case g1_pre_barrier_slow_id:
+      {
+        StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments);
+        // arg0 : previous value of memory
+
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+          __ movl(rax, (int)id);
+          __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), rax);
+          __ should_not_reach_here();
+          break;
+        }
+
+        __ pushl(rax);
+        __ pushl(rdx);
+
+        const Register pre_val = rax;
+        const Register thread = rax;
+        const Register tmp = rdx;
+
+        __ get_thread(thread);
+
+        Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                             PtrQueue::byte_offset_of_active()));
+
+        Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                             PtrQueue::byte_offset_of_index()));
+        Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                        PtrQueue::byte_offset_of_buf()));
+
+
+        Label done;
+        Label runtime;
+
+        // Can we store original value in the thread's buffer?
+
+        __ cmpl(queue_index, 0);
+        __ jcc(Assembler::equal, runtime);
+        __ subl(queue_index, wordSize);
+        __ movl(tmp, buffer);
+        __ addl(tmp, queue_index);
+        // prev_val (rax)
+        f.load_argument(0, pre_val);
+        __ movl(Address(tmp, 0), pre_val);
+        __ jmp(done);
+
+        __ bind(runtime);
+        // load the pre-value
+        __ pushl(rcx);
+        f.load_argument(0, rcx);
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), rcx, thread);
+        __ popl(rcx);
+
+        __ bind(done);
+        __ popl(rdx);
+        __ popl(rax);
+      }
+      break;
+
+    case g1_post_barrier_slow_id:
+      {
+        StubFrame f(sasm, "g1_post_barrier", dont_gc_arguments);
+
+
+        // arg0: store_address
+        Address store_addr(rbp, 2*BytesPerWord);
+
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+        Label done;
+        Label runtime;
+
+        // At this point we know new_value is non-NULL and the new_value crosses regsion.
+        // Must check to see if card is already dirty
+
+        const Register card_index = rdx;
+
+        const Register thread = rax;
+        Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                             PtrQueue::byte_offset_of_index()));
+        Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                        PtrQueue::byte_offset_of_buf()));
+
+        __ pushl(rax);
+        __ pushl(rdx);
+
+        __ movl(card_index, store_addr);
+        __ get_thread(rax);
+        __ shrl(card_index, CardTableModRefBS::card_shift);
+        assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+        ExternalAddress cardtable((address)ct->byte_map_base);
+        Address index(noreg, card_index, Address::times_1);
+        const Register card_addr = rdx;
+        __ leal(card_addr, __ as_Address(ArrayAddress(cardtable, index)));
+        __ cmpb(Address(card_addr, 0), 0);
+        __ jcc(Assembler::equal, done);
+
+        // storing region crossing non-NULL, card is clean.
+        // dirty card and log.
+
+        __ movb(Address(card_addr, 0), 0);
+
+        __ cmpl(queue_index, 0);
+        __ jcc(Assembler::equal, runtime);
+        __ subl(queue_index, wordSize);
+
+        const Register buffer_addr = rbx;
+        __ pushl(rbx);
+
+        __ movl(buffer_addr, buffer);
+        __ addl(buffer_addr, queue_index);
+        __ movl(Address(buffer_addr, 0), card_addr);
+        __ popl(rbx);
+        __ jmp(done);
+
+        __ bind(runtime);
+        __ pushl(rcx);
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+        __ popl(rcx);
+
+        __ bind(done);
+        __ popl(rdx);
+        __ popl(rax);
+
+      }
+      break;
+#endif // !SERIALGC
+
     default:
       { StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
         __ movl(rax, (int)id);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/interp_masm_x86_64.cpp
--- a/src/cpu/x86/vm/interp_masm_x86_64.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -35,8 +35,13 @@
   // Note: No need to save/restore bcp & locals (r13 & r14) pointer
   //       since these are callee saved registers and no blocking/
   //       GC can happen in leaf calls.
+  // Further Note: DO NOT save/restore bcp/locals. If a caller has
+  // already saved them so that it can use esi/edi as temporaries
+  // then a save/restore here will DESTROY the copy the caller
+  // saved! There used to be a save_bcp() that only happened in
+  // the ASSERT path (no restore_bcp). Which caused bizarre failures
+  // when jvm built with ASSERTs.
 #ifdef ASSERT
-  save_bcp();
   {
     Label L;
     cmpq(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int)NULL_WORD);
@@ -49,24 +54,9 @@
   // super call
   MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
   // interpreter specific
-#ifdef ASSERT
-  {
-    Label L;
-    cmpq(r13, Address(rbp, frame::interpreter_frame_bcx_offset * wordSize));
-    jcc(Assembler::equal, L);
-    stop("InterpreterMacroAssembler::call_VM_leaf_base:"
-         " r13 not callee saved?");
-    bind(L);
-  }
-  {
-    Label L;
-    cmpq(r14, Address(rbp, frame::interpreter_frame_locals_offset * wordSize));
-    jcc(Assembler::equal, L);
-    stop("InterpreterMacroAssembler::call_VM_leaf_base:"
-         " r14 not callee saved?");
-    bind(L);
-  }
-#endif
+  // Used to ASSERT that r13/r14 were equal to frame's bcp/locals
+  // but since they may not have been saved (and we don't want to
+  // save thme here (see note above) the assert is invalid.
 }
 
 void InterpreterMacroAssembler::call_VM_base(Register oop_result,
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/stubGenerator_x86_32.cpp
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -711,7 +711,6 @@
   //     end     -  element count
   void  gen_write_ref_array_pre_barrier(Register start, Register count) {
     assert_different_registers(start, count);
-#if 0 // G1 only
     BarrierSet* bs = Universe::heap()->barrier_set();
     switch (bs->kind()) {
       case BarrierSet::G1SATBCT:
@@ -720,8 +719,8 @@
           __ pushad();                      // push registers
           __ pushl(count);
           __ pushl(start);
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
-          __ addl(esp, wordSize * 2);
+          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)));
+          __ addl(rsp, wordSize * 2);
           __ popad();
         }
         break;
@@ -733,7 +732,6 @@
         ShouldNotReachHere();
 
     }
-#endif // 0 - G1 only
   }
 
 
@@ -749,20 +747,18 @@
     BarrierSet* bs = Universe::heap()->barrier_set();
     assert_different_registers(start, count);
     switch (bs->kind()) {
-#if 0 // G1 only
       case BarrierSet::G1SATBCT:
       case BarrierSet::G1SATBCTLogging:
         {
           __ pushad();                      // push registers
           __ pushl(count);
           __ pushl(start);
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
-          __ addl(esp, wordSize * 2);
+          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)));
+          __ addl(rsp, wordSize * 2);
           __ popad();
 
         }
         break;
-#endif // 0 G1 only
 
       case BarrierSet::CardTableModRef:
       case BarrierSet::CardTableExtension:
@@ -1377,9 +1373,9 @@
     Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes());
 
     // Copy from low to high addresses, indexed from the end of each array.
+    gen_write_ref_array_pre_barrier(to, count);
     __ leal(end_from, end_from_addr);
     __ leal(end_to,   end_to_addr);
-    gen_write_ref_array_pre_barrier(to, count);
     assert(length == count, "");        // else fix next line:
     __ negl(count);                     // negate and test the length
     __ jccb(Assembler::notZero, L_load_element);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/stubGenerator_x86_64.cpp
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -1152,18 +1152,26 @@
   //     Destroy no registers!
   //
   void  gen_write_ref_array_pre_barrier(Register addr, Register count) {
-#if 0 // G1 - only
-    assert_different_registers(addr, c_rarg1);
-    assert_different_registers(count, c_rarg0);
     BarrierSet* bs = Universe::heap()->barrier_set();
     switch (bs->kind()) {
       case BarrierSet::G1SATBCT:
       case BarrierSet::G1SATBCTLogging:
         {
           __ pushaq();                      // push registers
-          __ movq(c_rarg0, addr);
-          __ movq(c_rarg1, count);
-          __ call(RuntimeAddress(BarrierSet::static_write_ref_array_pre));
+          if (count == c_rarg0) {
+            if (addr == c_rarg1) {
+              // exactly backwards!!
+              __ xchgq(c_rarg1, c_rarg0);
+            } else {
+              __ movq(c_rarg1, count);
+              __ movq(c_rarg0, addr);
+            }
+
+          } else {
+            __ movq(c_rarg0, addr);
+            __ movq(c_rarg1, count);
+          }
+          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)));
           __ popaq();
         }
         break;
@@ -1171,11 +1179,10 @@
       case BarrierSet::CardTableExtension:
       case BarrierSet::ModRef:
         break;
-      default      :
+      default:
         ShouldNotReachHere();
 
     }
-#endif // 0 G1 - only
   }
 
   //
@@ -1192,7 +1199,6 @@
     assert_different_registers(start, end, scratch);
     BarrierSet* bs = Universe::heap()->barrier_set();
     switch (bs->kind()) {
-#if 0 // G1 - only
       case BarrierSet::G1SATBCT:
       case BarrierSet::G1SATBCTLogging:
 
@@ -1205,11 +1211,10 @@
           __ shrq(scratch, LogBytesPerWord);
           __ movq(c_rarg0, start);
           __ movq(c_rarg1, scratch);
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
+          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)));
           __ popaq();
         }
         break;
-#endif // 0 G1 - only
       case BarrierSet::CardTableModRef:
       case BarrierSet::CardTableExtension:
         {
@@ -1230,8 +1235,12 @@
           __ decrementq(count);
           __ jcc(Assembler::greaterEqual, L_loop);
         }
-      }
-   }
+        break;
+      default:
+        ShouldNotReachHere();
+
+    }
+  }
 
   // Copy big chunks forward
   //
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/templateTable_x86_32.cpp
--- a/src/cpu/x86/vm/templateTable_x86_32.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/x86/vm/templateTable_x86_32.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -107,6 +107,78 @@
 //----------------------------------------------------------------------------------------------------
 // Miscelaneous helper routines
 
+// Store an oop (or NULL) at the address described by obj.
+// If val == noreg this means store a NULL
+
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+                         Address obj,
+                         Register val,
+                         BarrierSet::Name barrier,
+                         bool precise) {
+  assert(val == noreg || val == rax, "parameter is just for looks");
+  switch (barrier) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      {
+        // flatten object address if needed
+        // We do it regardless of precise because we need the registers
+        if (obj.index() == noreg && obj.disp() == 0) {
+          if (obj.base() != rdx) {
+            __ movl(rdx, obj.base());
+          }
+        } else {
+          __ leal(rdx, obj);
+        }
+        __ get_thread(rcx);
+        __ save_bcp();
+        __ g1_write_barrier_pre(rdx, rcx, rsi, rbx, val != noreg);
+
+        // Do the actual store
+        // noreg means NULL
+        if (val == noreg) {
+          __ movl(Address(rdx, 0), NULL_WORD);
+          // No post barrier for NULL
+        } else {
+          __ movl(Address(rdx, 0), val);
+          __ g1_write_barrier_post(rdx, rax, rcx, rbx, rsi);
+        }
+        __ restore_bcp();
+
+      }
+      break;
+#endif // SERIALGC
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      {
+        if (val == noreg) {
+          __ movl(obj, NULL_WORD);
+        } else {
+          __ movl(obj, val);
+          // flatten object address if needed
+          if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
+            __ store_check(obj.base());
+          } else {
+            __ leal(rdx, obj);
+            __ store_check(rdx);
+          }
+        }
+      }
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      if (val == noreg) {
+        __ movl(obj, NULL_WORD);
+      } else {
+        __ movl(obj, val);
+      }
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
+
 Address TemplateTable::at_bcp(int offset) {
   assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
   return Address(rsi, offset);
@@ -872,6 +944,8 @@
   __ movl(rax, at_tos());     // Value
   __ movl(rcx, at_tos_p1());  // Index
   __ movl(rdx, at_tos_p2());  // Array
+
+  Address element_address(rdx, rcx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   index_check_without_pop(rdx, rcx);      // kills rbx,
   // do array store check - check for NULL value first
   __ testl(rax, rax);
@@ -883,7 +957,7 @@
   __ movl(rax, Address(rdx, oopDesc::klass_offset_in_bytes()));
   __ movl(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes()));
   // Compress array+index*4+12 into a single register.  Frees ECX.
-  __ leal(rdx, Address(rdx, rcx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+  __ leal(rdx, element_address);
 
   // Generate subtype check.  Blows ECX.  Resets EDI to locals.
   // Superklass in EAX.  Subklass in EBX.
@@ -895,15 +969,20 @@
 
   // Come here on success
   __ bind(ok_is_subtype);
-  __ movl(rax, at_rsp());     // Value
-  __ movl(Address(rdx, 0), rax);
-  __ store_check(rdx);
-  __ jmpb(done);
+
+  // Get the value to store
+  __ movl(rax, at_rsp());
+  // and store it with appropriate barrier
+  do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true);
+
+  __ jmp(done);
 
   // Have a NULL in EAX, EDX=array, ECX=index.  Store NULL at ary[idx]
   __ bind(is_null);
   __ profile_null_seen(rbx);
-  __ movl(Address(rdx, rcx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), rax);
+
+  // Store NULL, (noreg means NULL to do_oop_store)
+  do_oop_store(_masm, element_address, noreg, _bs->kind(), true);
 
   // Pop stack arguments
   __ bind(done);
@@ -1506,7 +1585,7 @@
     // compute return address as bci in rax,
     __ leal(rax, at_bcp((is_wide ? 5 : 3) - in_bytes(constMethodOopDesc::codes_offset())));
     __ subl(rax, Address(rcx, methodOopDesc::const_offset()));
-    // Adjust the bcp in ESI by the displacement in EDX
+    // Adjust the bcp in rsi by the displacement in EDX
     __ addl(rsi, rdx);
     // Push return address
     __ push_i(rax);
@@ -1517,7 +1596,7 @@
 
   // Normal (non-jsr) branch handling
 
-  // Adjust the bcp in ESI by the displacement in EDX
+  // Adjust the bcp in rsi by the displacement in EDX
   __ addl(rsi, rdx);
 
   assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
@@ -2426,11 +2505,12 @@
   __ pop(atos);
   if (!is_static) pop_and_check_object(obj);
 
-  __ movl(lo, rax );
-  __ store_check(obj, lo);  // Need to mark card
+  do_oop_store(_masm, lo, rax, _bs->kind(), false);
+
   if (!is_static) {
     patch_bytecode(Bytecodes::_fast_aputfield, rcx, rbx);
   }
+
   __ jmp(Done);
 
   __ bind(notObj);
@@ -2638,14 +2718,18 @@
     case Bytecodes::_fast_lputfield: __ movl(hi, rdx); __ movl(lo, rax);        break;
     case Bytecodes::_fast_fputfield: __ fstp_s(lo); break;
     case Bytecodes::_fast_dputfield: __ fstp_d(lo); break;
-    case Bytecodes::_fast_aputfield: __ movl(lo, rax); __ store_check(rcx, lo); break;
+    case Bytecodes::_fast_aputfield: {
+      do_oop_store(_masm, lo, rax, _bs->kind(), false);
+      break;
+    }
     default:
       ShouldNotReachHere();
   }
 
   Label done;
   volatile_barrier( );
-  __ jmpb(done);
+  // Barriers are so large that short branch doesn't reach!
+  __ jmp(done);
 
   // Same code as above, but don't need rdx to test for volatile.
   __ bind(notVolatile);
@@ -2664,7 +2748,10 @@
     case Bytecodes::_fast_lputfield: __ movl(hi, rdx); __ movl(lo, rax);        break;
     case Bytecodes::_fast_fputfield: __ fstp_s(lo); break;
     case Bytecodes::_fast_dputfield: __ fstp_d(lo); break;
-    case Bytecodes::_fast_aputfield: __ movl(lo, rax); __ store_check(rcx, lo); break;
+    case Bytecodes::_fast_aputfield: {
+      do_oop_store(_masm, lo, rax, _bs->kind(), false);
+      break;
+    }
     default:
       ShouldNotReachHere();
   }
@@ -3019,8 +3106,6 @@
   Label initialize_object;  // including clearing the fields
   Label allocate_shared;
 
-  ExternalAddress heap_top((address)Universe::heap()->top_addr());
-
   __ get_cpool_and_tags(rcx, rax);
   // get instanceKlass
   __ movl(rcx, Address(rcx, rdx, Address::times_4, sizeof(constantPoolOopDesc)));
@@ -3077,6 +3162,8 @@
   if (allow_shared_alloc) {
     __ bind(allocate_shared);
 
+    ExternalAddress heap_top((address)Universe::heap()->top_addr());
+
     Label retry;
     __ bind(retry);
     __ mov32(rax, heap_top);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/cpu/x86/vm/templateTable_x86_64.cpp
--- a/src/cpu/x86/vm/templateTable_x86_64.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/cpu/x86/vm/templateTable_x86_64.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -113,6 +113,69 @@
 
 
 // Miscelaneous helper routines
+// Store an oop (or NULL) at the address described by obj.
+// If val == noreg this means store a NULL
+
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+                         Address obj,
+                         Register val,
+                         BarrierSet::Name barrier,
+                         bool precise) {
+  assert(val == noreg || val == rax, "parameter is just for looks");
+  switch (barrier) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      {
+        // flatten object address if needed
+        if (obj.index() == noreg && obj.disp() == 0) {
+          if (obj.base() != rdx) {
+            __ movq(rdx, obj.base());
+          }
+        } else {
+          __ leaq(rdx, obj);
+        }
+        __ g1_write_barrier_pre(rdx, r8, rbx, val != noreg);
+        if (val == noreg) {
+          __ store_heap_oop(Address(rdx, 0), NULL_WORD);
+        } else {
+          __ store_heap_oop(Address(rdx, 0), val);
+          __ g1_write_barrier_post(rdx, val, r8, rbx);
+        }
+
+      }
+      break;
+#endif // SERIALGC
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      {
+        if (val == noreg) {
+          __ store_heap_oop(obj, NULL_WORD);
+        } else {
+          __ store_heap_oop(obj, val);
+          // flatten object address if needed
+          if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
+            __ store_check(obj.base());
+          } else {
+            __ leaq(rdx, obj);
+            __ store_check(rdx);
+          }
+        }
+      }
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      if (val == noreg) {
+        __ store_heap_oop(obj, NULL_WORD);
+      } else {
+        __ store_heap_oop(obj, val);
+      }
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
 
 Address TemplateTable::at_bcp(int offset) {
   assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
@@ -558,8 +621,8 @@
   // rdx: array
   index_check(rdx, rax); // kills rbx
   __ load_heap_oop(rax, Address(rdx, rax,
-                       UseCompressedOops ? Address::times_4 : Address::times_8,
-                       arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+                                UseCompressedOops ? Address::times_4 : Address::times_8,
+                                arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
 }
 
 void TemplateTable::baload() {
@@ -864,6 +927,11 @@
   __ movq(rax, at_tos());    // value
   __ movl(rcx, at_tos_p1()); // index
   __ movq(rdx, at_tos_p2()); // array
+
+  Address element_address(rdx, rcx,
+                          UseCompressedOops? Address::times_4 : Address::times_8,
+                          arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
   index_check(rdx, rcx);     // kills rbx
   // do array store check - check for NULL value first
   __ testq(rax, rax);
@@ -877,9 +945,7 @@
                        sizeof(oopDesc) +
                        objArrayKlass::element_klass_offset_in_bytes()));
   // Compress array + index*oopSize + 12 into a single register.  Frees rcx.
-  __ leaq(rdx, Address(rdx, rcx,
-                       UseCompressedOops ? Address::times_4 : Address::times_8,
-                       arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+  __ leaq(rdx, element_address);
 
   // Generate subtype check.  Blows rcx, rdi
   // Superklass in rax.  Subklass in rbx.
@@ -891,18 +957,20 @@
 
   // Come here on success
   __ bind(ok_is_subtype);
-  __ movq(rax, at_tos()); // Value
-  __ store_heap_oop(Address(rdx, 0), rax);
-  __ store_check(rdx);
+
+  // Get the value we will store
+  __ movq(rax, at_tos());
+
+  // Now store using the appropriate barrier
+  do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true);
   __ jmp(done);
 
   // Have a NULL in rax, rdx=array, ecx=index.  Store NULL at ary[idx]
   __ bind(is_null);
   __ profile_null_seen(rbx);
-  __ store_heap_oop(Address(rdx, rcx,
-                            UseCompressedOops ? Address::times_4 : Address::times_8,
-                            arrayOopDesc::base_offset_in_bytes(T_OBJECT)),
-                    rax);
+
+  // Store a NULL
+  do_oop_store(_masm, element_address, noreg, _bs->kind(), true);
 
   // Pop stack arguments
   __ bind(done);
@@ -2394,8 +2462,10 @@
   // atos
   __ pop(atos);
   if (!is_static) pop_and_check_object(obj);
-  __ store_heap_oop(field, rax);
-  __ store_check(obj, field); // Need to mark card
+
+  // Store into the field
+  do_oop_store(_masm, field, rax, _bs->kind(), false);
+
   if (!is_static) {
     patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx);
   }
@@ -2582,8 +2652,7 @@
   // access field
   switch (bytecode()) {
   case Bytecodes::_fast_aputfield:
-    __ store_heap_oop(field, rax);
-    __ store_check(rcx, field);
+    do_oop_store(_masm, field, rax, _bs->kind(), false);
     break;
   case Bytecodes::_fast_lputfield:
     __ movq(field, rax);
@@ -2789,7 +2858,7 @@
     __ andl(recv, 0xFF);
     if (TaggedStackInterpreter) __ shll(recv, 1);  // index*2
     __ movq(recv, Address(rsp, recv, Address::times_8,
-                                 -Interpreter::expr_offset_in_bytes(1)));
+                          -Interpreter::expr_offset_in_bytes(1)));
     __ verify_oop(recv);
   }
 
@@ -3042,8 +3111,6 @@
   Label initialize_header;
   Label initialize_object; // including clearing the fields
   Label allocate_shared;
-  ExternalAddress top((address)Universe::heap()->top_addr());
-  ExternalAddress end((address)Universe::heap()->end_addr());
 
   __ get_cpool_and_tags(rsi, rax);
   // get instanceKlass
@@ -3104,6 +3171,9 @@
   if (allow_shared_alloc) {
     __ bind(allocate_shared);
 
+    ExternalAddress top((address)Universe::heap()->top_addr());
+    ExternalAddress end((address)Universe::heap()->end_addr());
+
     const Register RtopAddr = rscratch1;
     const Register RendAddr = rscratch2;
 
diff -r 0b27f3512f9e -r 37f87013dfd8 src/os/linux/vm/os_linux.cpp
--- a/src/os/linux/vm/os_linux.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/os/linux/vm/os_linux.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -1261,6 +1261,17 @@
   return (1000 * 1000);
 }
 
+// For now, we say that linux does not support vtime.  I have no idea
+// whether it can actually be made to (DLD, 9/13/05).
+
+bool os::supports_vtime() { return false; }
+bool os::enable_vtime()   { return false; }
+bool os::vtime_enabled()  { return false; }
+double os::elapsedVTime() {
+  // better than nothing, but not much
+  return elapsedTime();
+}
+
 jlong os::javaTimeMillis() {
   timeval time;
   int status = gettimeofday(&time, NULL);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/os/solaris/vm/os_solaris.cpp
--- a/src/os/solaris/vm/os_solaris.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/os/solaris/vm/os_solaris.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -1691,6 +1691,40 @@
   }
 }
 
+bool os::supports_vtime() { return true; }
+
+bool os::enable_vtime() {
+  int fd = open("/proc/self/ctl", O_WRONLY);
+  if (fd == -1)
+    return false;
+
+  long cmd[] = { PCSET, PR_MSACCT };
+  int res = write(fd, cmd, sizeof(long) * 2);
+  close(fd);
+  if (res != sizeof(long) * 2)
+    return false;
+
+  return true;
+}
+
+bool os::vtime_enabled() {
+  int fd = open("/proc/self/status", O_RDONLY);
+  if (fd == -1)
+    return false;
+
+  pstatus_t status;
+  int res = read(fd, (void*) &status, sizeof(pstatus_t));
+  close(fd);
+  if (res != sizeof(pstatus_t))
+    return false;
+
+  return status.pr_flags & PR_MSACCT;
+}
+
+double os::elapsedVTime() {
+  return (double)gethrvtime() / (double)hrtime_hz;
+}
+
 // Used internally for comparisons only
 // getTimeMillis guaranteed to not move backwards on Solaris
 jlong getTimeMillis() {
@@ -2661,7 +2695,7 @@
    return bottom;
 }
 
-// Detect the topology change. Typically happens during CPU pluggin-unplugging.
+// Detect the topology change. Typically happens during CPU plugging-unplugging.
 bool os::numa_topology_changed() {
   int is_stale = Solaris::lgrp_cookie_stale(Solaris::lgrp_cookie());
   if (is_stale != -1 && is_stale) {
diff -r 0b27f3512f9e -r 37f87013dfd8 src/os/windows/vm/os_windows.cpp
--- a/src/os/windows/vm/os_windows.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/os/windows/vm/os_windows.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -737,6 +737,17 @@
   return result;
 }
 
+// For now, we say that Windows does not support vtime.  I have no idea
+// whether it can actually be made to (DLD, 9/13/05).
+
+bool os::supports_vtime() { return false; }
+bool os::enable_vtime() { return false; }
+bool os::vtime_enabled() { return false; }
+double os::elapsedVTime() {
+  // better than nothing, but not much
+  return elapsedTime();
+}
+
 jlong os::javaTimeMillis() {
   if (UseFakeTimers) {
     return fake_time++;
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/adlc/formssel.cpp
--- a/src/share/vm/adlc/formssel.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/adlc/formssel.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -3767,6 +3767,10 @@
 int MatchRule::is_ideal_copy() const {
   if( _rChild ) {
     const char  *opType = _rChild->_opType;
+#if 1
+    if( strcmp(opType,"CastIP")==0 )
+      return 1;
+#else
     if( strcmp(opType,"CastII")==0 )
       return 1;
     // Do not treat *CastPP this way, because it
@@ -3786,6 +3790,7 @@
     //  return 1;
     //if( strcmp(opType,"CastP2X")==0 )
     //  return 1;
+#endif
   }
   if( is_chain_rule(_AD.globalNames()) &&
       _lChild && strncmp(_lChild->_opType,"stackSlot",9)==0 )
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/c1/c1_CodeStubs.hpp
--- a/src/share/vm/c1/c1_CodeStubs.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/c1/c1_CodeStubs.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -482,3 +482,81 @@
   virtual void print_name(outputStream* out) const { out->print("ArrayCopyStub"); }
 #endif // PRODUCT
 };
+
+//////////////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+// Code stubs for Garbage-First barriers.
+class G1PreBarrierStub: public CodeStub {
+ private:
+  LIR_Opr _addr;
+  LIR_Opr _pre_val;
+  LIR_PatchCode _patch_code;
+  CodeEmitInfo* _info;
+
+ public:
+  // pre_val (a temporary register) must be a register;
+  // addr (the address of the field to be read) must be a LIR_Address
+  G1PreBarrierStub(LIR_Opr addr, LIR_Opr pre_val, LIR_PatchCode patch_code, CodeEmitInfo* info) :
+    _addr(addr), _pre_val(pre_val), _patch_code(patch_code), _info(info)
+  {
+    assert(_pre_val->is_register(), "should be temporary register");
+    assert(_addr->is_address(), "should be the address of the field");
+  }
+
+  LIR_Opr addr() const { return _addr; }
+  LIR_Opr pre_val() const { return _pre_val; }
+  LIR_PatchCode patch_code() const { return _patch_code; }
+  CodeEmitInfo* info() const { return _info; }
+
+  virtual void emit_code(LIR_Assembler* e);
+  virtual void visit(LIR_OpVisitState* visitor) {
+    // don't pass in the code emit info since it's processed in the fast
+    // path
+    if (_info != NULL)
+      visitor->do_slow_case(_info);
+    else
+      visitor->do_slow_case();
+    visitor->do_input(_addr);
+    visitor->do_temp(_pre_val);
+  }
+#ifndef PRODUCT
+  virtual void print_name(outputStream* out) const { out->print("G1PreBarrierStub"); }
+#endif // PRODUCT
+};
+
+class G1PostBarrierStub: public CodeStub {
+ private:
+  LIR_Opr _addr;
+  LIR_Opr _new_val;
+
+  static jbyte* _byte_map_base;
+  static jbyte* byte_map_base_slow();
+  static jbyte* byte_map_base() {
+    if (_byte_map_base == NULL) {
+      _byte_map_base = byte_map_base_slow();
+    }
+    return _byte_map_base;
+  }
+
+ public:
+  // addr (the address of the object head) and new_val must be registers.
+  G1PostBarrierStub(LIR_Opr addr, LIR_Opr new_val): _addr(addr), _new_val(new_val) { }
+
+  LIR_Opr addr() const { return _addr; }
+  LIR_Opr new_val() const { return _new_val; }
+
+  virtual void emit_code(LIR_Assembler* e);
+  virtual void visit(LIR_OpVisitState* visitor) {
+    // don't pass in the code emit info since it's processed in the fast path
+    visitor->do_slow_case();
+    visitor->do_input(_addr);
+    visitor->do_input(_new_val);
+  }
+#ifndef PRODUCT
+  virtual void print_name(outputStream* out) const { out->print("G1PostBarrierStub"); }
+#endif // PRODUCT
+};
+
+#endif // SERIALGC
+//////////////////////////////////////////////////////////////////////////////////////////
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/c1/c1_LIRAssembler.cpp
--- a/src/share/vm/c1/c1_LIRAssembler.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/c1/c1_LIRAssembler.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -74,6 +74,7 @@
 LIR_Assembler::LIR_Assembler(Compilation* c):
    _compilation(c)
  , _masm(c->masm())
+ , _bs(Universe::heap()->barrier_set())
  , _frame_map(c->frame_map())
  , _current_block(NULL)
  , _pending_non_safepoint(NULL)
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/c1/c1_LIRAssembler.hpp
--- a/src/share/vm/c1/c1_LIRAssembler.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/c1/c1_LIRAssembler.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -24,11 +24,13 @@
 
 class Compilation;
 class ScopeValue;
+class BarrierSet;
 
 class LIR_Assembler: public CompilationResourceObj {
  private:
   C1_MacroAssembler* _masm;
   CodeStubList*      _slow_case_stubs;
+  BarrierSet*        _bs;
 
   Compilation*       _compilation;
   FrameMap*          _frame_map;
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/c1/c1_LIRGenerator.cpp
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -285,16 +285,7 @@
 
 
 void LIRGenerator::init() {
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
-  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
-  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
-
-#ifdef _LP64
-  _card_table_base = new LIR_Const((jlong)ct->byte_map_base);
-#else
-  _card_table_base = new LIR_Const((jint)ct->byte_map_base);
-#endif
+  _bs = Universe::heap()->barrier_set();
 }
 
 
@@ -1239,8 +1230,37 @@
 
 // Various barriers
 
+void LIRGenerator::pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info) {
+  // Do the pre-write barrier, if any.
+  switch (_bs->kind()) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      G1SATBCardTableModRef_pre_barrier(addr_opr, patch, info);
+      break;
+#endif // SERIALGC
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      // No pre barriers
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      // No pre barriers
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
+
 void LIRGenerator::post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {
-  switch (Universe::heap()->barrier_set()->kind()) {
+  switch (_bs->kind()) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      G1SATBCardTableModRef_post_barrier(addr,  new_val);
+      break;
+#endif // SERIALGC
     case BarrierSet::CardTableModRef:
     case BarrierSet::CardTableExtension:
       CardTableModRef_post_barrier(addr,  new_val);
@@ -1254,11 +1274,120 @@
     }
 }
 
+////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info) {
+  if (G1DisablePreBarrier) return;
+
+  // First we test whether marking is in progress.
+  BasicType flag_type;
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    flag_type = T_INT;
+  } else {
+    guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
+              "Assumption");
+    flag_type = T_BYTE;
+  }
+  LIR_Opr thrd = getThreadPointer();
+  LIR_Address* mark_active_flag_addr =
+    new LIR_Address(thrd,
+                    in_bytes(JavaThread::satb_mark_queue_offset() +
+                             PtrQueue::byte_offset_of_active()),
+                    flag_type);
+  // Read the marking-in-progress flag.
+  LIR_Opr flag_val = new_register(T_INT);
+  __ load(mark_active_flag_addr, flag_val);
+
+  LabelObj* start_store = new LabelObj();
+
+  LIR_PatchCode pre_val_patch_code =
+    patch ? lir_patch_normal : lir_patch_none;
+
+  LIR_Opr pre_val = new_register(T_OBJECT);
+
+  __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0));
+  if (!addr_opr->is_address()) {
+    assert(addr_opr->is_register(), "must be");
+    addr_opr = LIR_OprFact::address(new LIR_Address(addr_opr, 0, T_OBJECT));
+  }
+  CodeStub* slow = new G1PreBarrierStub(addr_opr, pre_val, pre_val_patch_code,
+                                        info);
+  __ branch(lir_cond_notEqual, T_INT, slow);
+  __ branch_destination(slow->continuation());
+}
+
+void LIRGenerator::G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {
+  if (G1DisablePostBarrier) return;
+
+  // If the "new_val" is a constant NULL, no barrier is necessary.
+  if (new_val->is_constant() &&
+      new_val->as_constant_ptr()->as_jobject() == NULL) return;
+
+  if (!new_val->is_register()) {
+    LIR_Opr new_val_reg = new_pointer_register();
+    if (new_val->is_constant()) {
+      __ move(new_val, new_val_reg);
+    } else {
+      __ leal(new_val, new_val_reg);
+    }
+    new_val = new_val_reg;
+  }
+  assert(new_val->is_register(), "must be a register at this point");
+
+  if (addr->is_address()) {
+    LIR_Address* address = addr->as_address_ptr();
+    LIR_Opr ptr = new_pointer_register();
+    if (!address->index()->is_valid() && address->disp() == 0) {
+      __ move(address->base(), ptr);
+    } else {
+      assert(address->disp() != max_jint, "lea doesn't support patched addresses!");
+      __ leal(addr, ptr);
+    }
+    addr = ptr;
+  }
+  assert(addr->is_register(), "must be a register at this point");
+
+  LIR_Opr xor_res = new_pointer_register();
+  LIR_Opr xor_shift_res = new_pointer_register();
+
+  if (TwoOperandLIRForm ) {
+    __ move(addr, xor_res);
+    __ logical_xor(xor_res, new_val, xor_res);
+    __ move(xor_res, xor_shift_res);
+    __ unsigned_shift_right(xor_shift_res,
+                            LIR_OprFact::intConst(HeapRegion::LogOfHRGrainBytes),
+                            xor_shift_res,
+                            LIR_OprDesc::illegalOpr());
+  } else {
+    __ logical_xor(addr, new_val, xor_res);
+    __ unsigned_shift_right(xor_res,
+                            LIR_OprFact::intConst(HeapRegion::LogOfHRGrainBytes),
+                            xor_shift_res,
+                            LIR_OprDesc::illegalOpr());
+  }
+
+  if (!new_val->is_register()) {
+    LIR_Opr new_val_reg = new_pointer_register();
+    __ leal(new_val, new_val_reg);
+    new_val = new_val_reg;
+  }
+  assert(new_val->is_register(), "must be a register at this point");
+
+  __ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD));
+
+  CodeStub* slow = new G1PostBarrierStub(addr, new_val);
+  __ branch(lir_cond_notEqual, T_INT, slow);
+  __ branch_destination(slow->continuation());
+}
+
+#endif // SERIALGC
+////////////////////////////////////////////////////////////////////////
+
 void LIRGenerator::CardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {
 
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  assert(sizeof(*((CardTableModRefBS*)bs)->byte_map_base) == sizeof(jbyte), "adjust this code");
-  LIR_Const* card_table_base = new LIR_Const(((CardTableModRefBS*)bs)->byte_map_base);
+  assert(sizeof(*((CardTableModRefBS*)_bs)->byte_map_base) == sizeof(jbyte), "adjust this code");
+  LIR_Const* card_table_base = new LIR_Const(((CardTableModRefBS*)_bs)->byte_map_base);
   if (addr->is_address()) {
     LIR_Address* address = addr->as_address_ptr();
     LIR_Opr ptr = new_register(T_OBJECT);
@@ -1388,6 +1517,13 @@
     __ membar_release();
   }
 
+  if (is_oop) {
+    // Do the pre-write barrier, if any.
+    pre_barrier(LIR_OprFact::address(address),
+                needs_patching,
+                (info ? new CodeEmitInfo(info) : NULL));
+  }
+
   if (is_volatile) {
     assert(!needs_patching && x->is_loaded(),
            "how do we know it's volatile if it's not loaded");
@@ -1398,7 +1534,12 @@
   }
 
   if (is_oop) {
+#ifdef PRECISE_CARDMARK
+    // Precise cardmarks don't work
+    post_barrier(LIR_OprFact::address(address), value.result());
+#else
     post_barrier(object.result(), value.result());
+#endif // PRECISE_CARDMARK
   }
 
   if (is_volatile && os::is_MP()) {
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/c1/c1_LIRGenerator.hpp
--- a/src/share/vm/c1/c1_LIRGenerator.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/c1/c1_LIRGenerator.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -145,6 +145,7 @@
 
 // only the classes below belong in the same file
 class LIRGenerator: public InstructionVisitor, public BlockClosure {
+
  private:
   Compilation*  _compilation;
   ciMethod*     _method;    // method that we are compiling
@@ -154,6 +155,7 @@
   Values        _instruction_for_operand;
   BitMap2D      _vreg_flags; // flags which can be set on a per-vreg basis
   LIR_List*     _lir;
+  BarrierSet*   _bs;
 
   LIRGenerator* gen() {
     return this;
@@ -174,8 +176,6 @@
   LIR_OprList                     _reg_for_constants;
   Values                          _unpinned_constants;
 
-  LIR_Const*                      _card_table_base;
-
   friend class PhiResolver;
 
   // unified bailout support
@@ -196,8 +196,6 @@
   LIR_Opr load_constant(Constant* x);
   LIR_Opr load_constant(LIR_Const* constant);
 
-  LIR_Const* card_table_base() const { return _card_table_base; }
-
   void  set_result(Value x, LIR_Opr opr)           {
     assert(opr->is_valid(), "must set to valid value");
     assert(x->operand()->is_illegal(), "operand should never change");
@@ -253,12 +251,17 @@
 
   // generic interface
 
+  void pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info);
   void post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val);
 
   // specific implementations
+  // pre barriers
+
+  void G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info);
 
   // post barriers
 
+  void G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val);
   void CardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val);
 
 
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/c1/c1_Runtime1.cpp
--- a/src/share/vm/c1/c1_Runtime1.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/c1/c1_Runtime1.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -168,6 +168,8 @@
   switch (id) {
     // These stubs don't need to have an oopmap
     case dtrace_object_alloc_id:
+    case g1_pre_barrier_slow_id:
+    case g1_post_barrier_slow_id:
     case slow_subtype_check_id:
     case fpu2long_stub_id:
     case unwind_exception_id:
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/c1/c1_Runtime1.hpp
--- a/src/share/vm/c1/c1_Runtime1.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/c1/c1_Runtime1.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -56,6 +56,8 @@
   stub(access_field_patching)        \
   stub(load_klass_patching)          \
   stub(jvmti_exception_throw)        \
+  stub(g1_pre_barrier_slow)          \
+  stub(g1_post_barrier_slow)         \
   stub(fpu2long_stub)                \
   stub(counter_overflow)             \
   last_entry(number_of_ids)
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/c1/c1_globals.hpp
--- a/src/share/vm/c1/c1_globals.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/c1/c1_globals.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -213,9 +213,6 @@
   develop(bool, UseFastLocking, true,                                       \
           "Use fast inlined locking code")                                  \
                                                                             \
-  product(bool, FastTLABRefill, true,                                       \
-          "Use fast TLAB refill code")                                      \
-                                                                            \
   develop(bool, UseSlowPath, false,                                         \
           "For debugging: test slow cases by always using them")            \
                                                                             \
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/compiler/methodLiveness.cpp
--- a/src/share/vm/compiler/methodLiveness.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/compiler/methodLiveness.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -76,8 +76,9 @@
   BitCounter() : _count(0) {}
 
   // Callback when bit in map is set
-  virtual void do_bit(size_t offset) {
+  virtual bool do_bit(size_t offset) {
     _count++;
+    return true;
   }
 
   int count() {
@@ -467,7 +468,7 @@
     bci = 0;
   }
 
-  MethodLivenessResult answer(NULL,0);
+  MethodLivenessResult answer((uintptr_t*)NULL,0);
 
   if (_block_count > 0) {
     if (TimeLivenessAnalysis) _time_total.start();
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/compiler/methodLiveness.hpp
--- a/src/share/vm/compiler/methodLiveness.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/compiler/methodLiveness.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -29,7 +29,7 @@
   bool _is_valid;
 
  public:
-  MethodLivenessResult(uintptr_t* map, idx_t size_in_bits)
+  MethodLivenessResult(BitMap::bm_word_t* map, idx_t size_in_bits)
     : BitMap(map, size_in_bits)
     , _is_valid(false)
   {}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -790,7 +790,7 @@
 }
 
 
-HeapWord* CompactibleFreeListSpace::block_start(const void* p) const {
+HeapWord* CompactibleFreeListSpace::block_start_const(const void* p) const {
   NOT_PRODUCT(verify_objects_initialized());
   return _bt.block_start(p);
 }
@@ -2285,9 +2285,9 @@
 }
 
 void CompactibleFreeListSpace::verifyIndexedFreeList(size_t size) const {
-  guarantee(size % 2 == 0, "Odd slots should be empty");
-  for (FreeChunk* fc = _indexedFreeList[size].head(); fc != NULL;
-    fc = fc->next()) {
+  FreeChunk* fc =  _indexedFreeList[size].head();
+  guarantee((size % 2 == 0) || fc == NULL, "Odd slots should be empty");
+  for (; fc != NULL; fc = fc->next()) {
     guarantee(fc->size() == size, "Size inconsistency");
     guarantee(fc->isFree(), "!free?");
     guarantee(fc->next() == NULL || fc->next()->prev() == fc, "Broken list");
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -502,7 +502,7 @@
 
   void blk_iterate(BlkClosure* cl);
   void blk_iterate_careful(BlkClosureCareful* cl);
-  HeapWord* block_start(const void* p) const;
+  HeapWord* block_start_const(const void* p) const;
   HeapWord* block_start_careful(const void* p) const;
   size_t block_size(const HeapWord* p) const;
   size_t block_size_no_stall(HeapWord* p, const CMSCollector* c) const;
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -2751,13 +2751,14 @@
  public:
   VerifyMarkedClosure(CMSBitMap* bm): _marks(bm), _failed(false) {}
 
-  void do_bit(size_t offset) {
+  bool do_bit(size_t offset) {
     HeapWord* addr = _marks->offsetToHeapWord(offset);
     if (!_marks->isMarked(addr)) {
       oop(addr)->print();
       gclog_or_tty->print_cr(" ("INTPTR_FORMAT" should have been marked)", addr);
       _failed = true;
     }
+    return true;
   }
 
   bool failed() { return _failed; }
@@ -4645,8 +4646,11 @@
       startTimer();
       sample_eden();
       // Get and clear dirty region from card table
-      dirtyRegion = _ct->ct_bs()->dirty_card_range_after_preclean(
-                                    MemRegion(nextAddr, endAddr));
+      dirtyRegion = _ct->ct_bs()->dirty_card_range_after_reset(
+                                    MemRegion(nextAddr, endAddr),
+                                    true,
+                                    CardTableModRefBS::precleaned_card_val());
+
       assert(dirtyRegion.start() >= nextAddr,
              "returned region inconsistent?");
     }
@@ -5414,8 +5418,8 @@
                               &mrias_cl);
   {
     TraceTime t("grey object rescan", PrintGCDetails, false, gclog_or_tty);
-    // Iterate over the dirty cards, marking them precleaned, and
-    // setting the corresponding bits in the mod union table.
+    // Iterate over the dirty cards, setting the corresponding bits in the
+    // mod union table.
     {
       ModUnionClosure modUnionClosure(&_modUnionTable);
       _ct->ct_bs()->dirty_card_iterate(
@@ -6187,7 +6191,7 @@
 // bit vector itself. That is done by a separate call CMSBitMap::allocate()
 // further below.
 CMSBitMap::CMSBitMap(int shifter, int mutex_rank, const char* mutex_name):
-  _bm(NULL,0),
+  _bm(),
   _shifter(shifter),
   _lock(mutex_rank >= 0 ? new Mutex(mutex_rank, mutex_name, true) : NULL)
 {
@@ -6212,7 +6216,7 @@
   }
   assert(_virtual_space.committed_size() == brs.size(),
          "didn't reserve backing store for all of CMS bit map?");
-  _bm.set_map((uintptr_t*)_virtual_space.low());
+  _bm.set_map((BitMap::bm_word_t*)_virtual_space.low());
   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
          _bmWordSize, "inconsistency in bit map sizing");
   _bm.set_size(_bmWordSize >> _shifter);
@@ -6853,10 +6857,10 @@
 
 // Should revisit to see if this should be restructured for
 // greater efficiency.
-void MarkFromRootsClosure::do_bit(size_t offset) {
+bool MarkFromRootsClosure::do_bit(size_t offset) {
   if (_skipBits > 0) {
     _skipBits--;
-    return;
+    return true;
   }
   // convert offset into a HeapWord*
   HeapWord* addr = _bitMap->startWord() + offset;
@@ -6896,10 +6900,11 @@
           } // ...else the setting of klass will dirty the card anyway.
         }
       DEBUG_ONLY(})
-      return;
+      return true;
     }
   }
   scanOopsInOop(addr);
+  return true;
 }
 
 // We take a break if we've been at this for a while,
@@ -7033,10 +7038,10 @@
 
 // Should revisit to see if this should be restructured for
 // greater efficiency.
-void Par_MarkFromRootsClosure::do_bit(size_t offset) {
+bool Par_MarkFromRootsClosure::do_bit(size_t offset) {
   if (_skip_bits > 0) {
     _skip_bits--;
-    return;
+    return true;
   }
   // convert offset into a HeapWord*
   HeapWord* addr = _bit_map->startWord() + offset;
@@ -7051,10 +7056,11 @@
     if (p->klass() == NULL || !p->is_parsable()) {
       // in the case of Clean-on-Enter optimization, redirty card
       // and avoid clearing card by increasing  the threshold.
-      return;
+      return true;
     }
   }
   scan_oops_in_oop(addr);
+  return true;
 }
 
 void Par_MarkFromRootsClosure::scan_oops_in_oop(HeapWord* ptr) {
@@ -7177,7 +7183,7 @@
 
 // Should revisit to see if this should be restructured for
 // greater efficiency.
-void MarkFromRootsVerifyClosure::do_bit(size_t offset) {
+bool MarkFromRootsVerifyClosure::do_bit(size_t offset) {
   // convert offset into a HeapWord*
   HeapWord* addr = _verification_bm->startWord() + offset;
   assert(_verification_bm->endWord() && addr < _verification_bm->endWord(),
@@ -7205,6 +7211,7 @@
     new_oop->oop_iterate(&_pam_verify_closure);
   }
   assert(_mark_stack->isEmpty(), "tautology, emphasizing post-condition");
+  return true;
 }
 
 PushAndMarkVerifyClosure::PushAndMarkVerifyClosure(
@@ -7448,8 +7455,12 @@
 // Grey object rescan during pre-cleaning and second checkpoint phases --
 // the non-parallel version (the parallel version appears further below.)
 void PushAndMarkClosure::do_oop(oop obj) {
-  // If _concurrent_precleaning, ignore mark word verification
-  assert(obj->is_oop_or_null(_concurrent_precleaning),
+  // Ignore mark word verification. If during concurrent precleaning,
+  // the object monitor may be locked. If during the checkpoint
+  // phases, the object may already have been reached by a  different
+  // path and may be at the end of the global overflow list (so
+  // the mark word may be NULL).
+  assert(obj->is_oop_or_null(true /* ignore mark word */),
          "expected an oop or NULL");
   HeapWord* addr = (HeapWord*)obj;
   // Check if oop points into the CMS generation
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -1325,7 +1325,7 @@
                        CMSMarkStack*  markStack,
                        CMSMarkStack*  revisitStack,
                        bool should_yield, bool verifying = false);
-  void do_bit(size_t offset);
+  bool do_bit(size_t offset);
   void reset(HeapWord* addr);
   inline void do_yield_check();
 
@@ -1361,7 +1361,7 @@
                        CMSMarkStack*  overflow_stack,
                        CMSMarkStack*  revisit_stack,
                        bool should_yield);
-  void do_bit(size_t offset);
+  bool do_bit(size_t offset);
   inline void do_yield_check();
 
  private:
@@ -1409,7 +1409,7 @@
                              CMSBitMap* verification_bm,
                              CMSBitMap* cms_bm,
                              CMSMarkStack*  mark_stack);
-  void do_bit(size_t offset);
+  bool do_bit(size_t offset);
   void reset(HeapWord* addr);
 };
 
@@ -1418,8 +1418,9 @@
 // "empty" (i.e. the bit vector doesn't have any 1-bits).
 class FalseBitMapClosure: public BitMapClosure {
  public:
-  void do_bit(size_t offset) {
+  bool do_bit(size_t offset) {
     guarantee(false, "Should not have a 1 bit");
+    return true;
   }
 };
 
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,195 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// A BufferingOops closure tries to separate out the cost of finding roots
+// from the cost of applying closures to them.  It maintains an array of
+// ref-containing locations.  Until the array is full, applying the closure
+// to an oop* merely records that location in the array.  Since this
+// closure app cost is small, an elapsed timer can approximately attribute
+// all of this cost to the cost of finding the roots.  When the array fills
+// up, the wrapped closure is applied to all elements, keeping track of
+// this elapsed time of this process, and leaving the array empty.
+// The caller must be sure to call "done" to process any unprocessed
+// buffered entriess.
+
+class Generation;
+class HeapRegion;
+
+class BufferingOopClosure: public OopClosure {
+protected:
+  enum PrivateConstants {
+    BufferLength = 1024
+  };
+
+  oop          *_buffer[BufferLength];
+  oop         **_buffer_top;
+  oop         **_buffer_curr;
+
+  OopClosure  *_oc;
+  double       _closure_app_seconds;
+
+  void process_buffer () {
+
+    double start = os::elapsedTime();
+    for (oop **curr = _buffer; curr < _buffer_curr; ++curr) {
+      _oc->do_oop(*curr);
+    }
+    _buffer_curr = _buffer;
+    _closure_app_seconds += (os::elapsedTime() - start);
+  }
+
+public:
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  virtual void do_oop(oop *p) {
+    if (_buffer_curr == _buffer_top) {
+      process_buffer();
+    }
+
+    *_buffer_curr = p;
+    ++_buffer_curr;
+  }
+  void done () {
+    if (_buffer_curr > _buffer) {
+      process_buffer();
+    }
+  }
+  double closure_app_seconds () {
+    return _closure_app_seconds;
+  }
+  BufferingOopClosure (OopClosure *oc) :
+    _oc(oc),
+    _buffer_curr(_buffer), _buffer_top(_buffer + BufferLength),
+    _closure_app_seconds(0.0) { }
+};
+
+class BufferingOopsInGenClosure: public OopsInGenClosure {
+  BufferingOopClosure _boc;
+  OopsInGenClosure* _oc;
+public:
+  BufferingOopsInGenClosure(OopsInGenClosure *oc) :
+    _boc(oc), _oc(oc) {}
+
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  virtual void do_oop(oop* p) {
+    assert(generation()->is_in_reserved(p), "Must be in!");
+    _boc.do_oop(p);
+  }
+
+  void done() {
+    _boc.done();
+  }
+
+  double closure_app_seconds () {
+    return _boc.closure_app_seconds();
+  }
+
+  void set_generation(Generation* gen) {
+    OopsInGenClosure::set_generation(gen);
+    _oc->set_generation(gen);
+  }
+
+  void reset_generation() {
+    // Make sure we finish the current work with the current generation.
+    _boc.done();
+    OopsInGenClosure::reset_generation();
+    _oc->reset_generation();
+  }
+
+};
+
+
+class BufferingOopsInHeapRegionClosure: public OopsInHeapRegionClosure {
+private:
+  enum PrivateConstants {
+    BufferLength = 1024
+  };
+
+  oop                      *_buffer[BufferLength];
+  oop                     **_buffer_top;
+  oop                     **_buffer_curr;
+
+  HeapRegion               *_hr_buffer[BufferLength];
+  HeapRegion              **_hr_curr;
+
+  OopsInHeapRegionClosure  *_oc;
+  double                    _closure_app_seconds;
+
+  void process_buffer () {
+
+    assert((_hr_curr - _hr_buffer) == (_buffer_curr - _buffer),
+           "the two lengths should be the same");
+
+    double start = os::elapsedTime();
+    HeapRegion **hr_curr = _hr_buffer;
+    HeapRegion *hr_prev = NULL;
+    for (oop **curr = _buffer; curr < _buffer_curr; ++curr) {
+      HeapRegion *region = *hr_curr;
+      if (region != hr_prev) {
+        _oc->set_region(region);
+        hr_prev = region;
+      }
+      _oc->do_oop(*curr);
+      ++hr_curr;
+    }
+    _buffer_curr = _buffer;
+    _hr_curr = _hr_buffer;
+    _closure_app_seconds += (os::elapsedTime() - start);
+  }
+
+public:
+  virtual void do_oop(narrowOop *p) {
+    guarantee(false, "NYI");
+  }
+
+  virtual void do_oop(oop *p) {
+    if (_buffer_curr == _buffer_top) {
+      assert(_hr_curr > _hr_buffer, "_hr_curr should be consistent with _buffer_curr");
+      process_buffer();
+    }
+
+    *_buffer_curr = p;
+    ++_buffer_curr;
+    *_hr_curr = _from;
+    ++_hr_curr;
+  }
+  void done () {
+    if (_buffer_curr > _buffer) {
+      assert(_hr_curr > _hr_buffer, "_hr_curr should be consistent with _buffer_curr");
+      process_buffer();
+    }
+  }
+  double closure_app_seconds () {
+    return _closure_app_seconds;
+  }
+  BufferingOopsInHeapRegionClosure (OopsInHeapRegionClosure *oc) :
+    _oc(oc),
+    _buffer_curr(_buffer), _buffer_top(_buffer + BufferLength),
+    _hr_curr(_hr_buffer),
+    _closure_app_seconds(0.0) { }
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/collectionSetChooser.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,409 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_collectionSetChooser.cpp.incl"
+
+CSetChooserCache::CSetChooserCache() {
+  for (int i = 0; i < CacheLength; ++i)
+    _cache[i] = NULL;
+  clear();
+}
+
+void CSetChooserCache::clear() {
+  _occupancy = 0;
+  _first = 0;
+  for (int i = 0; i < CacheLength; ++i) {
+    HeapRegion *hr = _cache[i];
+    if (hr != NULL)
+      hr->set_sort_index(-1);
+    _cache[i] = NULL;
+  }
+}
+
+#ifndef PRODUCT
+bool CSetChooserCache::verify() {
+  int index = _first;
+  HeapRegion *prev = NULL;
+  for (int i = 0; i < _occupancy; ++i) {
+    guarantee(_cache[index] != NULL, "cache entry should not be empty");
+    HeapRegion *hr = _cache[index];
+    guarantee(!hr->is_young(), "should not be young!");
+    if (prev != NULL) {
+      guarantee(prev->gc_efficiency() >= hr->gc_efficiency(),
+                "cache should be correctly ordered");
+    }
+    guarantee(hr->sort_index() == get_sort_index(index),
+              "sort index should be correct");
+    index = trim_index(index + 1);
+    prev = hr;
+  }
+
+  for (int i = 0; i < (CacheLength - _occupancy); ++i) {
+    guarantee(_cache[index] == NULL, "cache entry should be empty");
+    index = trim_index(index + 1);
+  }
+
+  guarantee(index == _first, "we should have reached where we started from");
+  return true;
+}
+#endif // PRODUCT
+
+void CSetChooserCache::insert(HeapRegion *hr) {
+  assert(!is_full(), "cache should not be empty");
+  hr->calc_gc_efficiency();
+
+  int empty_index;
+  if (_occupancy == 0) {
+    empty_index = _first;
+  } else {
+    empty_index = trim_index(_first + _occupancy);
+    assert(_cache[empty_index] == NULL, "last slot should be empty");
+    int last_index = trim_index(empty_index - 1);
+    HeapRegion *last = _cache[last_index];
+    assert(last != NULL,"as the cache is not empty, last should not be empty");
+    while (empty_index != _first &&
+           last->gc_efficiency() < hr->gc_efficiency()) {
+      _cache[empty_index] = last;
+      last->set_sort_index(get_sort_index(empty_index));
+      empty_index = last_index;
+      last_index = trim_index(last_index - 1);
+      last = _cache[last_index];
+    }
+  }
+  _cache[empty_index] = hr;
+  hr->set_sort_index(get_sort_index(empty_index));
+
+  ++_occupancy;
+  assert(verify(), "cache should be consistent");
+}
+
+HeapRegion *CSetChooserCache::remove_first() {
+  if (_occupancy > 0) {
+    assert(_cache[_first] != NULL, "cache should have at least one region");
+    HeapRegion *ret = _cache[_first];
+    _cache[_first] = NULL;
+    ret->set_sort_index(-1);
+    --_occupancy;
+    _first = trim_index(_first + 1);
+    assert(verify(), "cache should be consistent");
+    return ret;
+  } else {
+    return NULL;
+  }
+}
+
+// this is a bit expensive... but we expect that it should not be called
+// to often.
+void CSetChooserCache::remove(HeapRegion *hr) {
+  assert(_occupancy > 0, "cache should not be empty");
+  assert(hr->sort_index() < -1, "should already be in the cache");
+  int index = get_index(hr->sort_index());
+  assert(_cache[index] == hr, "index should be correct");
+  int next_index = trim_index(index + 1);
+  int last_index = trim_index(_first + _occupancy - 1);
+  while (index != last_index) {
+    assert(_cache[next_index] != NULL, "should not be null");
+    _cache[index] = _cache[next_index];
+    _cache[index]->set_sort_index(get_sort_index(index));
+
+    index = next_index;
+    next_index = trim_index(next_index+1);
+  }
+  assert(index == last_index, "should have reached the last one");
+  _cache[index] = NULL;
+  hr->set_sort_index(-1);
+  --_occupancy;
+  assert(verify(), "cache should be consistent");
+}
+
+static inline int orderRegions(HeapRegion* hr1, HeapRegion* hr2) {
+  if (hr1 == NULL) {
+    if (hr2 == NULL) return 0;
+    else return 1;
+  } else if (hr2 == NULL) {
+    return -1;
+  }
+  if (hr2->gc_efficiency() < hr1->gc_efficiency()) return -1;
+  else if (hr1->gc_efficiency() < hr2->gc_efficiency()) return 1;
+  else return 0;
+}
+
+static int orderRegions(HeapRegion** hr1p, HeapRegion** hr2p) {
+  return orderRegions(*hr1p, *hr2p);
+}
+
+CollectionSetChooser::CollectionSetChooser() :
+  // The line below is the worst bit of C++ hackery I've ever written
+  // (Detlefs, 11/23).  You should think of it as equivalent to
+  // "_regions(100, true)": initialize the growable array and inform it
+  // that it should allocate its elem array(s) on the C heap.  The first
+  // argument, however, is actually a comma expression (new-expr, 100).
+  // The purpose of the new_expr is to inform the growable array that it
+  // is *already* allocated on the C heap: it uses the placement syntax to
+  // keep it from actually doing any allocation.
+  _markedRegions((ResourceObj::operator new (sizeof(GrowableArray<HeapRegion*>),
+                                             (void*)&_markedRegions,
+                                             ResourceObj::C_HEAP),
+                  100),
+                 true),
+  _curMarkedIndex(0),
+  _numMarkedRegions(0),
+  _unmarked_age_1_returned_as_new(false),
+  _first_par_unreserved_idx(0)
+{}
+
+
+
+#ifndef PRODUCT
+bool CollectionSetChooser::verify() {
+  int index = 0;
+  guarantee(_curMarkedIndex <= _numMarkedRegions,
+            "_curMarkedIndex should be within bounds");
+  while (index < _curMarkedIndex) {
+    guarantee(_markedRegions.at(index++) == NULL,
+              "all entries before _curMarkedIndex should be NULL");
+  }
+  HeapRegion *prev = NULL;
+  while (index < _numMarkedRegions) {
+    HeapRegion *curr = _markedRegions.at(index++);
+    if (curr != NULL) {
+      int si = curr->sort_index();
+      guarantee(!curr->is_young(), "should not be young!");
+      guarantee(si > -1 && si == (index-1), "sort index invariant");
+      if (prev != NULL) {
+        guarantee(orderRegions(prev, curr) != 1, "regions should be sorted");
+      }
+      prev = curr;
+    }
+  }
+  return _cache.verify();
+}
+#endif
+
+bool
+CollectionSetChooser::addRegionToCache() {
+  assert(!_cache.is_full(), "cache should not be full");
+
+  HeapRegion *hr = NULL;
+  while (hr == NULL && _curMarkedIndex < _numMarkedRegions) {
+    hr = _markedRegions.at(_curMarkedIndex++);
+  }
+  if (hr == NULL)
+    return false;
+  assert(!hr->is_young(), "should not be young!");
+  assert(hr->sort_index() == _curMarkedIndex-1, "sort_index invariant");
+  _markedRegions.at_put(hr->sort_index(), NULL);
+  _cache.insert(hr);
+  assert(!_cache.is_empty(), "cache should not be empty");
+  assert(verify(), "cache should be consistent");
+  return false;
+}
+
+void
+CollectionSetChooser::fillCache() {
+  while (!_cache.is_full() && addRegionToCache()) {
+  }
+}
+
+void
+CollectionSetChooser::sortMarkedHeapRegions() {
+  guarantee(_cache.is_empty(), "cache should be empty");
+  // First trim any unused portion of the top in the parallel case.
+  if (_first_par_unreserved_idx > 0) {
+    if (G1PrintParCleanupStats) {
+      gclog_or_tty->print("     Truncating _markedRegions from %d to %d.\n",
+                          _markedRegions.length(), _first_par_unreserved_idx);
+    }
+    assert(_first_par_unreserved_idx <= _markedRegions.length(),
+           "Or we didn't reserved enough length");
+    _markedRegions.trunc_to(_first_par_unreserved_idx);
+  }
+  _markedRegions.sort(orderRegions);
+  assert(_numMarkedRegions <= _markedRegions.length(), "Requirement");
+  assert(_numMarkedRegions == 0
+         || _markedRegions.at(_numMarkedRegions-1) != NULL,
+         "Testing _numMarkedRegions");
+  assert(_numMarkedRegions == _markedRegions.length()
+         || _markedRegions.at(_numMarkedRegions) == NULL,
+         "Testing _numMarkedRegions");
+  if (G1PrintParCleanupStats) {
+    gclog_or_tty->print_cr("     Sorted %d marked regions.", _numMarkedRegions);
+  }
+  for (int i = 0; i < _numMarkedRegions; i++) {
+    assert(_markedRegions.at(i) != NULL, "Should be true by sorting!");
+    _markedRegions.at(i)->set_sort_index(i);
+    if (G1PrintRegionLivenessInfo > 0) {
+      if (i == 0) gclog_or_tty->print_cr("Sorted marked regions:");
+      if (i < G1PrintRegionLivenessInfo ||
+          (_numMarkedRegions-i) < G1PrintRegionLivenessInfo) {
+        HeapRegion* hr = _markedRegions.at(i);
+        size_t u = hr->used();
+        gclog_or_tty->print_cr("  Region %d: %d used, %d max live, %5.2f%%.",
+                      i, u, hr->max_live_bytes(),
+                      100.0*(float)hr->max_live_bytes()/(float)u);
+      }
+    }
+  }
+  if (G1PolicyVerbose > 1)
+    printSortedHeapRegions();
+  assert(verify(), "should now be sorted");
+}
+
+void
+printHeapRegion(HeapRegion *hr) {
+  if (hr->isHumongous())
+    gclog_or_tty->print("H: ");
+  if (hr->in_collection_set())
+    gclog_or_tty->print("CS: ");
+  if (hr->popular())
+    gclog_or_tty->print("pop: ");
+  gclog_or_tty->print_cr("Region " PTR_FORMAT " (%s%s) "
+                         "[" PTR_FORMAT ", " PTR_FORMAT"] "
+                         "Used: " SIZE_FORMAT "K, garbage: " SIZE_FORMAT "K.",
+                         hr, hr->is_young() ? "Y " : "  ",
+                         hr->is_marked()? "M1" : "M0",
+                         hr->bottom(), hr->end(),
+                         hr->used()/K, hr->garbage_bytes()/K);
+}
+
+void
+CollectionSetChooser::addMarkedHeapRegion(HeapRegion* hr) {
+  assert(!hr->isHumongous(),
+         "Humongous regions shouldn't be added to the collection set");
+  assert(!hr->is_young(), "should not be young!");
+  _markedRegions.append(hr);
+  _numMarkedRegions++;
+  hr->calc_gc_efficiency();
+}
+
+void
+CollectionSetChooser::
+prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize) {
+  _first_par_unreserved_idx = 0;
+  size_t max_waste = ParallelGCThreads * chunkSize;
+  // it should be aligned with respect to chunkSize
+  size_t aligned_n_regions =
+                     (n_regions + (chunkSize - 1)) / chunkSize * chunkSize;
+  assert( aligned_n_regions % chunkSize == 0, "should be aligned" );
+  _markedRegions.at_put_grow((int)(aligned_n_regions + max_waste - 1), NULL);
+}
+
+jint
+CollectionSetChooser::getParMarkedHeapRegionChunk(jint n_regions) {
+  jint res = Atomic::add(n_regions, &_first_par_unreserved_idx);
+  assert(_markedRegions.length() > res + n_regions - 1,
+         "Should already have been expanded");
+  return res - n_regions;
+}
+
+void
+CollectionSetChooser::setMarkedHeapRegion(jint index, HeapRegion* hr) {
+  assert(_markedRegions.at(index) == NULL, "precondition");
+  assert(!hr->is_young(), "should not be young!");
+  _markedRegions.at_put(index, hr);
+  hr->calc_gc_efficiency();
+}
+
+void
+CollectionSetChooser::incNumMarkedHeapRegions(jint inc_by) {
+  (void)Atomic::add(inc_by, &_numMarkedRegions);
+}
+
+void
+CollectionSetChooser::clearMarkedHeapRegions(){
+  for (int i = 0; i < _markedRegions.length(); i++) {
+    HeapRegion* r =   _markedRegions.at(i);
+    if (r != NULL) r->set_sort_index(-1);
+  }
+  _markedRegions.clear();
+  _curMarkedIndex = 0;
+  _numMarkedRegions = 0;
+  _cache.clear();
+};
+
+void
+CollectionSetChooser::updateAfterFullCollection() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  clearMarkedHeapRegions();
+}
+
+void
+CollectionSetChooser::printSortedHeapRegions() {
+  gclog_or_tty->print_cr("Printing %d Heap Regions sorted by amount of known garbage",
+                _numMarkedRegions);
+  for (int i = 0; i < _markedRegions.length(); i++) {
+    printHeapRegion(_markedRegions.at(i));
+  }
+  gclog_or_tty->print_cr("Done sorted heap region print");
+}
+
+void CollectionSetChooser::removeRegion(HeapRegion *hr) {
+  int si = hr->sort_index();
+  assert(si == -1 || hr->is_marked(), "Sort index not valid.");
+  if (si > -1) {
+    assert(_markedRegions.at(si) == hr, "Sort index not valid." );
+    _markedRegions.at_put(si, NULL);
+  } else if (si < -1) {
+    assert(_cache.region_in_cache(hr), "should be in the cache");
+    _cache.remove(hr);
+    assert(hr->sort_index() == -1, "sort index invariant");
+  }
+  hr->set_sort_index(-1);
+}
+
+// if time_remaining < 0.0, then this method should try to return
+// a region, whether it fits within the remaining time or not
+HeapRegion*
+CollectionSetChooser::getNextMarkedRegion(double time_remaining,
+                                          double avg_prediction) {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+  fillCache();
+  if (_cache.is_empty()) {
+    assert(_curMarkedIndex == _numMarkedRegions,
+           "if cache is empty, list should also be empty");
+    return NULL;
+  }
+
+  HeapRegion *hr = _cache.get_first();
+  assert(hr != NULL, "if cache not empty, first entry should be non-null");
+  double predicted_time = g1h->predict_region_elapsed_time_ms(hr, false);
+
+  if (g1p->adaptive_young_list_length()) {
+    if (time_remaining - predicted_time < 0.0) {
+      g1h->check_if_region_is_too_expensive(predicted_time);
+      return NULL;
+    }
+  } else {
+    if (predicted_time > 2.0 * avg_prediction) {
+      return NULL;
+    }
+  }
+
+  HeapRegion *hr2 = _cache.remove_first();
+  assert(hr == hr2, "cache contents should not have changed");
+
+  return hr;
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/collectionSetChooser.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// We need to sort heap regions by collection desirability.
+
+class CSetChooserCache {
+private:
+  enum {
+    CacheLength = 16
+  } PrivateConstants;
+
+  HeapRegion*  _cache[CacheLength];
+  int          _occupancy; // number of region in cache
+  int          _first; // "first" region in the cache
+
+  // adding CacheLength to deal with negative values
+  inline int trim_index(int index) {
+    return (index + CacheLength) % CacheLength;
+  }
+
+  inline int get_sort_index(int index) {
+    return -index-2;
+  }
+  inline int get_index(int sort_index) {
+    return -sort_index-2;
+  }
+
+public:
+  CSetChooserCache(void);
+
+  inline int occupancy(void) { return _occupancy; }
+  inline bool is_full()      { return _occupancy == CacheLength; }
+  inline bool is_empty()     { return _occupancy == 0; }
+
+  void clear(void);
+  void insert(HeapRegion *hr);
+  HeapRegion *remove_first(void);
+  void remove (HeapRegion *hr);
+  inline HeapRegion *get_first(void) {
+    return _cache[_first];
+  }
+
+#ifndef PRODUCT
+  bool verify (void);
+  bool region_in_cache(HeapRegion *hr) {
+    int sort_index = hr->sort_index();
+    if (sort_index < -1) {
+      int index = get_index(sort_index);
+      guarantee(index < CacheLength, "should be within bounds");
+      return _cache[index] == hr;
+    } else
+      return 0;
+  }
+#endif // PRODUCT
+};
+
+class CollectionSetChooser: public CHeapObj {
+
+  GrowableArray<HeapRegion*> _markedRegions;
+  int _curMarkedIndex;
+  int _numMarkedRegions;
+  CSetChooserCache _cache;
+
+  // True iff last collection pause ran of out new "age 0" regions, and
+  // returned an "age 1" region.
+  bool _unmarked_age_1_returned_as_new;
+
+  jint _first_par_unreserved_idx;
+
+public:
+
+  HeapRegion* getNextMarkedRegion(double time_so_far, double avg_prediction);
+
+  CollectionSetChooser();
+
+  void printSortedHeapRegions();
+
+  void sortMarkedHeapRegions();
+  void fillCache();
+  bool addRegionToCache(void);
+  void addMarkedHeapRegion(HeapRegion *hr);
+
+  // Must be called before calls to getParMarkedHeapRegionChunk.
+  // "n_regions" is the number of regions, "chunkSize" the chunk size.
+  void prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize);
+  // Returns the first index in a contiguous chunk of "n_regions" indexes
+  // that the calling thread has reserved.  These must be set by the
+  // calling thread using "setMarkedHeapRegion" (to NULL if necessary).
+  jint getParMarkedHeapRegionChunk(jint n_regions);
+  // Set the marked array entry at index to hr.  Careful to claim the index
+  // first if in parallel.
+  void setMarkedHeapRegion(jint index, HeapRegion* hr);
+  // Atomically increment the number of claimed regions by "inc_by".
+  void incNumMarkedHeapRegions(jint inc_by);
+
+  void clearMarkedHeapRegions();
+
+  void updateAfterFullCollection();
+
+  // Ensure that "hr" is not a member of the marked region array or the cache
+  void removeRegion(HeapRegion* hr);
+
+  bool unmarked_age_1_returned_as_new() { return _unmarked_age_1_returned_as_new; }
+
+  // Returns true if the used portion of "_markedRegions" is properly
+  // sorted, otherwise asserts false.
+#ifndef PRODUCT
+  bool verify(void);
+  bool regionProperlyOrdered(HeapRegion* r) {
+    int si = r->sort_index();
+    return (si == -1) ||
+      (si > -1 && _markedRegions.at(si) == r) ||
+      (si < -1 && _cache.region_in_cache(r));
+  }
+#endif
+
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,355 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_concurrentG1Refine.cpp.incl"
+
+bool ConcurrentG1Refine::_enabled = false;
+
+ConcurrentG1Refine::ConcurrentG1Refine() :
+  _pya(PYA_continue), _last_pya(PYA_continue),
+  _last_cards_during(), _first_traversal(false),
+  _card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL),
+  _hot_cache(NULL),
+  _def_use_cache(false), _use_cache(false),
+  _n_periods(0), _total_cards(0), _total_travs(0)
+{
+  if (G1ConcRefine) {
+    _cg1rThread = new ConcurrentG1RefineThread(this);
+    assert(cg1rThread() != NULL, "Conc refine should have been created");
+    assert(cg1rThread()->cg1r() == this,
+           "Conc refine thread should refer to this");
+  } else {
+    _cg1rThread = NULL;
+  }
+}
+
+void ConcurrentG1Refine::init() {
+  if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
+    G1CollectedHeap* g1h = G1CollectedHeap::heap();
+    _n_card_counts =
+      (unsigned) (g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift);
+    _card_counts = NEW_C_HEAP_ARRAY(unsigned char, _n_card_counts);
+    for (size_t i = 0; i < _n_card_counts; i++) _card_counts[i] = 0;
+    ModRefBarrierSet* bs = g1h->mr_bs();
+    guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
+    CardTableModRefBS* ctbs = (CardTableModRefBS*)bs;
+    _ct_bot = ctbs->byte_for_const(g1h->reserved_region().start());
+    if (G1ConcRSCountTraversals) {
+      _cur_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256);
+      _cum_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256);
+      for (int i = 0; i < 256; i++) {
+        _cur_card_count_histo[i] = 0;
+        _cum_card_count_histo[i] = 0;
+      }
+    }
+  }
+  if (G1ConcRSLogCacheSize > 0) {
+    _def_use_cache = true;
+    _use_cache = true;
+    _hot_cache_size = (1 << G1ConcRSLogCacheSize);
+    _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);
+    _n_hot = 0;
+    _hot_cache_idx = 0;
+  }
+}
+
+ConcurrentG1Refine::~ConcurrentG1Refine() {
+  if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
+    assert(_card_counts != NULL, "Logic");
+    FREE_C_HEAP_ARRAY(unsigned char, _card_counts);
+    assert(_cur_card_count_histo != NULL, "Logic");
+    FREE_C_HEAP_ARRAY(unsigned, _cur_card_count_histo);
+    assert(_cum_card_count_histo != NULL, "Logic");
+    FREE_C_HEAP_ARRAY(unsigned, _cum_card_count_histo);
+  }
+  if (G1ConcRSLogCacheSize > 0) {
+    assert(_hot_cache != NULL, "Logic");
+    FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
+  }
+}
+
+bool ConcurrentG1Refine::refine() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  unsigned cards_before = g1h->g1_rem_set()->conc_refine_cards();
+  clear_hot_cache();  // Any previous values in this are now invalid.
+  g1h->g1_rem_set()->concurrentRefinementPass(this);
+  _traversals++;
+  unsigned cards_after = g1h->g1_rem_set()->conc_refine_cards();
+  unsigned cards_during = cards_after-cards_before;
+  // If this is the first traversal in the current enabling
+  // and we did some cards, or if the number of cards found is decreasing
+  // sufficiently quickly, then keep going.  Otherwise, sleep a while.
+  bool res =
+    (_first_traversal && cards_during > 0)
+    ||
+    (!_first_traversal && cards_during * 3 < _last_cards_during * 2);
+  _last_cards_during = cards_during;
+  _first_traversal = false;
+  return res;
+}
+
+void ConcurrentG1Refine::enable() {
+  MutexLocker x(G1ConcRefine_mon);
+  if (!_enabled) {
+    _enabled = true;
+    _first_traversal = true; _last_cards_during = 0;
+    G1ConcRefine_mon->notify_all();
+  }
+}
+
+unsigned ConcurrentG1Refine::disable() {
+  MutexLocker x(G1ConcRefine_mon);
+  if (_enabled) {
+    _enabled = false;
+    return _traversals;
+  } else {
+    return 0;
+  }
+}
+
+void ConcurrentG1Refine::wait_for_ConcurrentG1Refine_enabled() {
+  G1ConcRefine_mon->lock();
+  while (!_enabled) {
+    G1ConcRefine_mon->wait(Mutex::_no_safepoint_check_flag);
+  }
+  G1ConcRefine_mon->unlock();
+  _traversals = 0;
+};
+
+void ConcurrentG1Refine::set_pya_restart() {
+  // If we're using the log-based RS barrier, the above will cause
+  // in-progress traversals of completed log buffers to quit early; we will
+  // also abandon all other buffers.
+  if (G1RSBarrierUseQueue) {
+    DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+    dcqs.abandon_logs();
+    if (_cg1rThread->do_traversal()) {
+      _pya = PYA_restart;
+    } else {
+      _cg1rThread->set_do_traversal(true);
+      // Reset the post-yield actions.
+      _pya = PYA_continue;
+      _last_pya = PYA_continue;
+    }
+  } else {
+    _pya = PYA_restart;
+  }
+}
+
+void ConcurrentG1Refine::set_pya_cancel() {
+  _pya = PYA_cancel;
+}
+
+PostYieldAction ConcurrentG1Refine::get_pya() {
+  if (_pya != PYA_continue) {
+    jint val = _pya;
+    while (true) {
+      jint val_read = Atomic::cmpxchg(PYA_continue, &_pya, val);
+      if (val_read == val) {
+        PostYieldAction res = (PostYieldAction)val;
+        assert(res != PYA_continue, "Only the refine thread should reset.");
+        _last_pya = res;
+        return res;
+      } else {
+        val = val_read;
+      }
+    }
+  }
+  // QQQ WELL WHAT DO WE RETURN HERE???
+  // make up something!
+  return PYA_continue;
+}
+
+PostYieldAction ConcurrentG1Refine::get_last_pya() {
+  PostYieldAction res = _last_pya;
+  _last_pya = PYA_continue;
+  return res;
+}
+
+bool ConcurrentG1Refine::do_traversal() {
+  return _cg1rThread->do_traversal();
+}
+
+int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) {
+  size_t card_num = (card_ptr - _ct_bot);
+  guarantee(0 <= card_num && card_num < _n_card_counts, "Bounds");
+  unsigned char cnt = _card_counts[card_num];
+  if (cnt < 255) _card_counts[card_num]++;
+  return cnt;
+  _total_travs++;
+}
+
+jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr) {
+  int count = add_card_count(card_ptr);
+  // Count previously unvisited cards.
+  if (count == 0) _total_cards++;
+  // We'll assume a traversal unless we store it in the cache.
+  if (count < G1ConcRSHotCardLimit) {
+    _total_travs++;
+    return card_ptr;
+  }
+  // Otherwise, it's hot.
+  jbyte* res = NULL;
+  MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
+  if (_n_hot == _hot_cache_size) {
+    _total_travs++;
+    res = _hot_cache[_hot_cache_idx];
+    _n_hot--;
+  }
+  // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
+  _hot_cache[_hot_cache_idx] = card_ptr;
+  _hot_cache_idx++;
+  if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;
+  _n_hot++;
+  return res;
+}
+
+
+void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) {
+  assert(!use_cache(), "cache should be disabled");
+  int start_ind = _hot_cache_idx-1;
+  for (int i = 0; i < _n_hot; i++) {
+    int ind = start_ind - i;
+    if (ind < 0) ind = ind + _hot_cache_size;
+    jbyte* entry = _hot_cache[ind];
+    if (entry != NULL) {
+      g1rs->concurrentRefineOneCard(entry, worker_i);
+    }
+  }
+  _n_hot = 0;
+  _hot_cache_idx = 0;
+}
+
+void ConcurrentG1Refine::clear_and_record_card_counts() {
+  if (G1ConcRSLogCacheSize == 0 && !G1ConcRSCountTraversals) return;
+  _n_periods++;
+  if (G1ConcRSCountTraversals) {
+    for (size_t i = 0; i < _n_card_counts; i++) {
+      unsigned char bucket = _card_counts[i];
+      _cur_card_count_histo[bucket]++;
+      _card_counts[i] = 0;
+    }
+    gclog_or_tty->print_cr("Card counts:");
+    for (int i = 0; i < 256; i++) {
+      if (_cur_card_count_histo[i] > 0) {
+        gclog_or_tty->print_cr("  %3d: %9d", i, _cur_card_count_histo[i]);
+        _cum_card_count_histo[i] += _cur_card_count_histo[i];
+        _cur_card_count_histo[i] = 0;
+      }
+    }
+  } else {
+    assert(G1ConcRSLogCacheSize > 0, "Logic");
+    Copy::fill_to_words((HeapWord*)(&_card_counts[0]),
+                        _n_card_counts / HeapWordSize);
+  }
+}
+
+void
+ConcurrentG1Refine::
+print_card_count_histo_range(unsigned* histo, int from, int to,
+                             float& cum_card_pct,
+                             float& cum_travs_pct) {
+  unsigned cards = 0;
+  unsigned travs = 0;
+  guarantee(to <= 256, "Precondition");
+  for (int i = from; i < to-1; i++) {
+    cards += histo[i];
+    travs += histo[i] * i;
+  }
+  if (to == 256) {
+    unsigned histo_card_sum = 0;
+    unsigned histo_trav_sum = 0;
+    for (int i = 1; i < 255; i++) {
+      histo_trav_sum += histo[i] * i;
+    }
+    cards += histo[255];
+    // correct traversals for the last one.
+    unsigned travs_255 = (unsigned) (_total_travs - histo_trav_sum);
+    travs += travs_255;
+
+  } else {
+    cards += histo[to-1];
+    travs += histo[to-1] * (to-1);
+  }
+  float fperiods = (float)_n_periods;
+  float f_tot_cards = (float)_total_cards/fperiods;
+  float f_tot_travs = (float)_total_travs/fperiods;
+  if (cards > 0) {
+    float fcards = (float)cards/fperiods;
+    float ftravs = (float)travs/fperiods;
+    if (to == 256) {
+      gclog_or_tty->print(" %4d-       %10.2f%10.2f", from, fcards, ftravs);
+    } else {
+      gclog_or_tty->print(" %4d-%4d   %10.2f%10.2f", from, to-1, fcards, ftravs);
+    }
+    float pct_cards = fcards*100.0/f_tot_cards;
+    cum_card_pct += pct_cards;
+    float pct_travs = ftravs*100.0/f_tot_travs;
+    cum_travs_pct += pct_travs;
+    gclog_or_tty->print_cr("%10.2f%10.2f%10.2f%10.2f",
+                  pct_cards, cum_card_pct,
+                  pct_travs, cum_travs_pct);
+  }
+}
+
+void ConcurrentG1Refine::print_final_card_counts() {
+  if (!G1ConcRSCountTraversals) return;
+
+  gclog_or_tty->print_cr("Did %d total traversals of %d distinct cards.",
+                _total_travs, _total_cards);
+  float fperiods = (float)_n_periods;
+  gclog_or_tty->print_cr("  This is an average of %8.2f traversals, %8.2f cards, "
+                "per collection.", (float)_total_travs/fperiods,
+                (float)_total_cards/fperiods);
+  gclog_or_tty->print_cr("  This is an average of %8.2f traversals/distinct "
+                "dirty card.\n",
+                _total_cards > 0 ?
+                (float)_total_travs/(float)_total_cards : 0.0);
+
+
+  gclog_or_tty->print_cr("Histogram:\n\n%10s   %10s%10s%10s%10s%10s%10s",
+                "range", "# cards", "# travs", "% cards", "(cum)",
+                "% travs", "(cum)");
+  gclog_or_tty->print_cr("------------------------------------------------------------"
+                "-------------");
+  float cum_cards_pct = 0.0;
+  float cum_travs_pct = 0.0;
+  for (int i = 1; i < 10; i++) {
+    print_card_count_histo_range(_cum_card_count_histo, i, i+1,
+                                 cum_cards_pct, cum_travs_pct);
+  }
+  for (int i = 10; i < 100; i += 10) {
+    print_card_count_histo_range(_cum_card_count_histo, i, i+10,
+                                 cum_cards_pct, cum_travs_pct);
+  }
+  print_card_count_histo_range(_cum_card_count_histo, 100, 150,
+                               cum_cards_pct, cum_travs_pct);
+  print_card_count_histo_range(_cum_card_count_histo, 150, 200,
+                               cum_cards_pct, cum_travs_pct);
+  print_card_count_histo_range(_cum_card_count_histo, 150, 255,
+                               cum_cards_pct, cum_travs_pct);
+  print_card_count_histo_range(_cum_card_count_histo, 255, 256,
+                               cum_cards_pct, cum_travs_pct);
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Forward decl
+class ConcurrentG1RefineThread;
+class G1RemSet;
+
+// What to do after a yield:
+enum PostYieldAction {
+  PYA_continue,  // Continue the traversal
+  PYA_restart,   // Restart
+  PYA_cancel     // It's been completed by somebody else: cancel.
+};
+
+class ConcurrentG1Refine {
+  ConcurrentG1RefineThread* _cg1rThread;
+
+  volatile jint _pya;
+  PostYieldAction _last_pya;
+
+  static bool _enabled;  // Protected by G1ConcRefine_mon.
+  unsigned _traversals;
+
+  // Number of cards processed during last refinement traversal.
+  unsigned _first_traversal;
+  unsigned _last_cards_during;
+
+  // The cache for card refinement.
+  bool     _use_cache;
+  bool     _def_use_cache;
+  size_t _n_periods;
+  size_t _total_cards;
+  size_t _total_travs;
+
+  unsigned char*  _card_counts;
+  unsigned _n_card_counts;
+  const jbyte* _ct_bot;
+  unsigned* _cur_card_count_histo;
+  unsigned* _cum_card_count_histo;
+  jbyte**  _hot_cache;
+  int      _hot_cache_size;
+  int      _n_hot;
+  int      _hot_cache_idx;
+
+  // Returns the count of this card after incrementing it.
+  int add_card_count(jbyte* card_ptr);
+
+  void print_card_count_histo_range(unsigned* histo, int from, int to,
+                                    float& cum_card_pct,
+                                    float& cum_travs_pct);
+ public:
+  ConcurrentG1Refine();
+  ~ConcurrentG1Refine();
+
+  void init(); // Accomplish some initialization that has to wait.
+
+  // Enabled Conc refinement, waking up thread if necessary.
+  void enable();
+
+  // Returns the number of traversals performed since this refiner was enabled.
+  unsigned disable();
+
+  // Requires G1ConcRefine_mon to be held.
+  bool enabled() { return _enabled; }
+
+  // Returns only when G1 concurrent refinement has been enabled.
+  void wait_for_ConcurrentG1Refine_enabled();
+
+  // Do one concurrent refinement pass over the card table.  Returns "true"
+  // if heuristics determine that another pass should be done immediately.
+  bool refine();
+
+  // Indicate that an in-progress refinement pass should start over.
+  void set_pya_restart();
+  // Indicate that an in-progress refinement pass should quit.
+  void set_pya_cancel();
+
+  // Get the appropriate post-yield action.  Also sets last_pya.
+  PostYieldAction get_pya();
+
+  // The last PYA read by "get_pya".
+  PostYieldAction get_last_pya();
+
+  bool do_traversal();
+
+  ConcurrentG1RefineThread* cg1rThread() { return _cg1rThread; }
+
+  // If this is the first entry for the slot, writes into the cache and
+  // returns NULL.  If it causes an eviction, returns the evicted pointer.
+  // Otherwise, its a cache hit, and returns NULL.
+  jbyte* cache_insert(jbyte* card_ptr);
+
+  // Process the cached entries.
+  void clean_up_cache(int worker_i, G1RemSet* g1rs);
+
+  // Discard entries in the hot cache.
+  void clear_hot_cache() {
+    _hot_cache_idx = 0; _n_hot = 0;
+  }
+
+  bool hot_cache_is_empty() { return _n_hot == 0; }
+
+  bool use_cache() { return _use_cache; }
+  void set_use_cache(bool b) {
+    if (b) _use_cache = _def_use_cache;
+    else   _use_cache = false;
+  }
+
+  void clear_and_record_card_counts();
+  void print_final_card_counts();
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,246 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_concurrentG1RefineThread.cpp.incl"
+
+// ======= Concurrent Mark Thread ========
+
+// The CM thread is created when the G1 garbage collector is used
+
+ConcurrentG1RefineThread::
+ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r) :
+  ConcurrentGCThread(),
+  _cg1r(cg1r),
+  _started(false),
+  _in_progress(false),
+  _do_traversal(false),
+  _vtime_accum(0.0),
+  _co_tracker(G1CRGroup),
+  _interval_ms(5.0)
+{
+  create_and_start();
+}
+
+const long timeout = 200; // ms.
+
+void ConcurrentG1RefineThread::traversalBasedRefinement() {
+  _cg1r->wait_for_ConcurrentG1Refine_enabled();
+  MutexLocker x(G1ConcRefine_mon);
+  while (_cg1r->enabled()) {
+    MutexUnlocker ux(G1ConcRefine_mon);
+    ResourceMark rm;
+    HandleMark   hm;
+
+    if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine starting pass");
+    _sts.join();
+    bool no_sleep = _cg1r->refine();
+    _sts.leave();
+    if (!no_sleep) {
+      MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
+      // We do this only for the timeout; we don't expect this to be signalled.
+      CGC_lock->wait(Mutex::_no_safepoint_check_flag, timeout);
+    }
+  }
+}
+
+void ConcurrentG1RefineThread::queueBasedRefinement() {
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  // Wait for completed log buffers to exist.
+  {
+    MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
+    while (!_do_traversal && !dcqs.process_completed_buffers() &&
+           !_should_terminate) {
+      DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag);
+    }
+  }
+
+  if (_should_terminate) {
+    return;
+  }
+
+  // Now we take them off (this doesn't hold locks while it applies
+  // closures.)  (If we did a full collection, then we'll do a full
+  // traversal.
+  _sts.join();
+  if (_do_traversal) {
+    (void)_cg1r->refine();
+    switch (_cg1r->get_last_pya()) {
+    case PYA_cancel: case PYA_continue:
+      // Continue was caught and handled inside "refine".  If it's still
+      // "continue" when we get here, we're done.
+      _do_traversal = false;
+      break;
+    case PYA_restart:
+      assert(_do_traversal, "Because of Full GC.");
+      break;
+    }
+  } else {
+    int n_logs = 0;
+    int lower_limit = 0;
+    double start_vtime_sec; // only used when G1SmoothConcRefine is on
+    int prev_buffer_num; // only used when G1SmoothConcRefine is on
+
+    if (G1SmoothConcRefine) {
+      lower_limit = 0;
+      start_vtime_sec = os::elapsedVTime();
+      prev_buffer_num = (int) dcqs.completed_buffers_num();
+    } else {
+      lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now.
+    }
+    while (dcqs.apply_closure_to_completed_buffer(0, lower_limit)) {
+      double end_vtime_sec;
+      double elapsed_vtime_sec;
+      int elapsed_vtime_ms;
+      int curr_buffer_num;
+
+      if (G1SmoothConcRefine) {
+        end_vtime_sec = os::elapsedVTime();
+        elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
+        elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0);
+        curr_buffer_num = (int) dcqs.completed_buffers_num();
+
+        if (curr_buffer_num > prev_buffer_num ||
+            curr_buffer_num > DCQBarrierProcessCompletedThreshold) {
+          decreaseInterval(elapsed_vtime_ms);
+        } else if (curr_buffer_num < prev_buffer_num) {
+          increaseInterval(elapsed_vtime_ms);
+        }
+      }
+
+      sample_young_list_rs_lengths();
+      _co_tracker.update(false);
+
+      if (G1SmoothConcRefine) {
+        start_vtime_sec = os::elapsedVTime();
+        prev_buffer_num = curr_buffer_num;
+
+        _sts.leave();
+        os::sleep(Thread::current(), (jlong) _interval_ms, false);
+        _sts.join();
+      }
+
+      n_logs++;
+    }
+    // Make sure we harvest the PYA, if any.
+    (void)_cg1r->get_pya();
+  }
+  _sts.leave();
+}
+
+void ConcurrentG1RefineThread::sample_young_list_rs_lengths() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+  if (g1p->adaptive_young_list_length()) {
+    int regions_visited = 0;
+
+    g1h->young_list_rs_length_sampling_init();
+    while (g1h->young_list_rs_length_sampling_more()) {
+      g1h->young_list_rs_length_sampling_next();
+      ++regions_visited;
+
+      // we try to yield every time we visit 10 regions
+      if (regions_visited == 10) {
+        if (_sts.should_yield()) {
+          _sts.yield("G1 refine");
+          // we just abandon the iteration
+          break;
+        }
+        regions_visited = 0;
+      }
+    }
+
+    g1p->check_prediction_validity();
+  }
+}
+
+void ConcurrentG1RefineThread::run() {
+  initialize_in_thread();
+  _vtime_start = os::elapsedVTime();
+  wait_for_universe_init();
+
+  _co_tracker.enable();
+  _co_tracker.start();
+
+  while (!_should_terminate) {
+    // wait until started is set.
+    if (G1RSBarrierUseQueue) {
+      queueBasedRefinement();
+    } else {
+      traversalBasedRefinement();
+    }
+    _sts.join();
+    _co_tracker.update();
+    _sts.leave();
+    if (os::supports_vtime()) {
+      _vtime_accum = (os::elapsedVTime() - _vtime_start);
+    } else {
+      _vtime_accum = 0.0;
+    }
+  }
+  _sts.join();
+  _co_tracker.update(true);
+  _sts.leave();
+  assert(_should_terminate, "just checking");
+
+  terminate();
+}
+
+
+void ConcurrentG1RefineThread::yield() {
+  if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-yield");
+  _sts.yield("G1 refine");
+  if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-yield-end");
+}
+
+void ConcurrentG1RefineThread::stop() {
+  // it is ok to take late safepoints here, if needed
+  {
+    MutexLockerEx mu(Terminator_lock);
+    _should_terminate = true;
+  }
+
+  {
+    MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
+    DirtyCardQ_CBL_mon->notify_all();
+  }
+
+  {
+    MutexLockerEx mu(Terminator_lock);
+    while (!_has_terminated) {
+      Terminator_lock->wait();
+    }
+  }
+  if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-stop");
+}
+
+void ConcurrentG1RefineThread::print() {
+  gclog_or_tty->print("\"Concurrent G1 Refinement Thread\" ");
+  Thread::print();
+  gclog_or_tty->cr();
+}
+
+void ConcurrentG1RefineThread::set_do_traversal(bool b) {
+  _do_traversal = b;
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Forward Decl.
+class ConcurrentG1Refine;
+
+// The G1 Concurrent Refinement Thread (could be several in the future).
+
+class ConcurrentG1RefineThread: public ConcurrentGCThread {
+  friend class VMStructs;
+  friend class G1CollectedHeap;
+
+  double _vtime_start;  // Initial virtual time.
+  double _vtime_accum;  // Initial virtual time.
+
+ public:
+  virtual void run();
+
+ private:
+  ConcurrentG1Refine*              _cg1r;
+  bool                             _started;
+  bool                             _in_progress;
+  volatile bool                    _restart;
+
+  COTracker                        _co_tracker;
+  double                           _interval_ms;
+
+  bool                             _do_traversal;
+
+  void decreaseInterval(int processing_time_ms) {
+    double min_interval_ms = (double) processing_time_ms;
+    _interval_ms = 0.8 * _interval_ms;
+    if (_interval_ms < min_interval_ms)
+      _interval_ms = min_interval_ms;
+  }
+  void increaseInterval(int processing_time_ms) {
+    double max_interval_ms = 9.0 * (double) processing_time_ms;
+    _interval_ms = 1.1 * _interval_ms;
+    if (max_interval_ms > 0 && _interval_ms > max_interval_ms)
+      _interval_ms = max_interval_ms;
+  }
+
+  void sleepBeforeNextCycle();
+
+  void traversalBasedRefinement();
+
+  void queueBasedRefinement();
+
+  // For use by G1CollectedHeap, which is a friend.
+  static SuspendibleThreadSet* sts() { return &_sts; }
+
+ public:
+  // Constructor
+  ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r);
+
+  // Printing
+  void print();
+
+  // Total virtual time so far.
+  double vtime_accum() { return _vtime_accum; }
+
+  ConcurrentG1Refine* cg1r()                     { return _cg1r;     }
+
+
+  void            set_started()                  { _started = true;   }
+  void            clear_started()                { _started = false;  }
+  bool            started()                      { return _started;   }
+
+  void            set_in_progress()              { _in_progress = true;   }
+  void            clear_in_progress()            { _in_progress = false;  }
+  bool            in_progress()                  { return _in_progress;   }
+
+  void            set_do_traversal(bool b);
+  bool            do_traversal() { return _do_traversal; }
+
+  void            sample_young_list_rs_lengths();
+
+  // Yield for GC
+  void            yield();
+
+  // shutdown
+  static void stop();
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentMark.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,3957 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_concurrentMark.cpp.incl"
+
+//
+// CMS Bit Map Wrapper
+
+CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
+  _bm((uintptr_t*)NULL,0),
+  _shifter(shifter) {
+  _bmStartWord = (HeapWord*)(rs.base());
+  _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
+  ReservedSpace brs(ReservedSpace::allocation_align_size_up(
+                     (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
+
+  guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
+  // For now we'll just commit all of the bit map up fromt.
+  // Later on we'll try to be more parsimonious with swap.
+  guarantee(_virtual_space.initialize(brs, brs.size()),
+            "couldn't reseve backing store for CMS bit map");
+  assert(_virtual_space.committed_size() == brs.size(),
+         "didn't reserve backing store for all of CMS bit map?");
+  _bm.set_map((uintptr_t*)_virtual_space.low());
+  assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
+         _bmWordSize, "inconsistency in bit map sizing");
+  _bm.set_size(_bmWordSize >> _shifter);
+}
+
+HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
+                                               HeapWord* limit) const {
+  // First we must round addr *up* to a possible object boundary.
+  addr = (HeapWord*)align_size_up((intptr_t)addr,
+                                  HeapWordSize << _shifter);
+  size_t addrOffset = heapWordToOffset(addr);
+  if (limit == NULL) limit = _bmStartWord + _bmWordSize;
+  size_t limitOffset = heapWordToOffset(limit);
+  size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
+  HeapWord* nextAddr = offsetToHeapWord(nextOffset);
+  assert(nextAddr >= addr, "get_next_one postcondition");
+  assert(nextAddr == limit || isMarked(nextAddr),
+         "get_next_one postcondition");
+  return nextAddr;
+}
+
+HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
+                                                 HeapWord* limit) const {
+  size_t addrOffset = heapWordToOffset(addr);
+  if (limit == NULL) limit = _bmStartWord + _bmWordSize;
+  size_t limitOffset = heapWordToOffset(limit);
+  size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
+  HeapWord* nextAddr = offsetToHeapWord(nextOffset);
+  assert(nextAddr >= addr, "get_next_one postcondition");
+  assert(nextAddr == limit || !isMarked(nextAddr),
+         "get_next_one postcondition");
+  return nextAddr;
+}
+
+int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
+  assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
+  return (int) (diff >> _shifter);
+}
+
+bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
+  HeapWord* left  = MAX2(_bmStartWord, mr.start());
+  HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
+  if (right > left) {
+    // Right-open interval [leftOffset, rightOffset).
+    return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
+  } else {
+    return true;
+  }
+}
+
+void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
+                                             size_t    from_start_index,
+                                             HeapWord* to_start_word,
+                                             size_t    word_num) {
+  _bm.mostly_disjoint_range_union(from_bitmap,
+                                  from_start_index,
+                                  heapWordToOffset(to_start_word),
+                                  word_num);
+}
+
+#ifndef PRODUCT
+bool CMBitMapRO::covers(ReservedSpace rs) const {
+  // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
+  assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize,
+         "size inconsistency");
+  return _bmStartWord == (HeapWord*)(rs.base()) &&
+         _bmWordSize  == rs.size()>>LogHeapWordSize;
+}
+#endif
+
+void CMBitMap::clearAll() {
+  _bm.clear();
+  return;
+}
+
+void CMBitMap::markRange(MemRegion mr) {
+  mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
+  assert(!mr.is_empty(), "unexpected empty region");
+  assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
+          ((HeapWord *) mr.end())),
+         "markRange memory region end is not card aligned");
+  // convert address range into offset range
+  _bm.at_put_range(heapWordToOffset(mr.start()),
+                   heapWordToOffset(mr.end()), true);
+}
+
+void CMBitMap::clearRange(MemRegion mr) {
+  mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
+  assert(!mr.is_empty(), "unexpected empty region");
+  // convert address range into offset range
+  _bm.at_put_range(heapWordToOffset(mr.start()),
+                   heapWordToOffset(mr.end()), false);
+}
+
+MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
+                                            HeapWord* end_addr) {
+  HeapWord* start = getNextMarkedWordAddress(addr);
+  start = MIN2(start, end_addr);
+  HeapWord* end   = getNextUnmarkedWordAddress(start);
+  end = MIN2(end, end_addr);
+  assert(start <= end, "Consistency check");
+  MemRegion mr(start, end);
+  if (!mr.is_empty()) {
+    clearRange(mr);
+  }
+  return mr;
+}
+
+CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
+  _base(NULL), _cm(cm)
+#ifdef ASSERT
+  , _drain_in_progress(false)
+  , _drain_in_progress_yields(false)
+#endif
+{}
+
+void CMMarkStack::allocate(size_t size) {
+  _base = NEW_C_HEAP_ARRAY(oop, size);
+  if (_base == NULL)
+    vm_exit_during_initialization("Failed to allocate "
+                                  "CM region mark stack");
+  _index = 0;
+  // QQQQ cast ...
+  _capacity = (jint) size;
+  _oops_do_bound = -1;
+  NOT_PRODUCT(_max_depth = 0);
+}
+
+CMMarkStack::~CMMarkStack() {
+  if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
+}
+
+void CMMarkStack::par_push(oop ptr) {
+  while (true) {
+    if (isFull()) {
+      _overflow = true;
+      return;
+    }
+    // Otherwise...
+    jint index = _index;
+    jint next_index = index+1;
+    jint res = Atomic::cmpxchg(next_index, &_index, index);
+    if (res == index) {
+      _base[index] = ptr;
+      // Note that we don't maintain this atomically.  We could, but it
+      // doesn't seem necessary.
+      NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
+      return;
+    }
+    // Otherwise, we need to try again.
+  }
+}
+
+void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
+  while (true) {
+    if (isFull()) {
+      _overflow = true;
+      return;
+    }
+    // Otherwise...
+    jint index = _index;
+    jint next_index = index + n;
+    if (next_index > _capacity) {
+      _overflow = true;
+      return;
+    }
+    jint res = Atomic::cmpxchg(next_index, &_index, index);
+    if (res == index) {
+      for (int i = 0; i < n; i++) {
+        int ind = index + i;
+        assert(ind < _capacity, "By overflow test above.");
+        _base[ind] = ptr_arr[i];
+      }
+      NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
+      return;
+    }
+    // Otherwise, we need to try again.
+  }
+}
+
+
+void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
+  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+  jint start = _index;
+  jint next_index = start + n;
+  if (next_index > _capacity) {
+    _overflow = true;
+    return;
+  }
+  // Otherwise.
+  _index = next_index;
+  for (int i = 0; i < n; i++) {
+    int ind = start + i;
+    guarantee(ind < _capacity, "By overflow test above.");
+    _base[ind] = ptr_arr[i];
+  }
+}
+
+
+bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
+  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+  jint index = _index;
+  if (index == 0) {
+    *n = 0;
+    return false;
+  } else {
+    int k = MIN2(max, index);
+    jint new_ind = index - k;
+    for (int j = 0; j < k; j++) {
+      ptr_arr[j] = _base[new_ind + j];
+    }
+    _index = new_ind;
+    *n = k;
+    return true;
+  }
+}
+
+
+CMRegionStack::CMRegionStack() : _base(NULL) {}
+
+void CMRegionStack::allocate(size_t size) {
+  _base = NEW_C_HEAP_ARRAY(MemRegion, size);
+  if (_base == NULL)
+    vm_exit_during_initialization("Failed to allocate "
+                                  "CM region mark stack");
+  _index = 0;
+  // QQQQ cast ...
+  _capacity = (jint) size;
+}
+
+CMRegionStack::~CMRegionStack() {
+  if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
+}
+
+void CMRegionStack::push(MemRegion mr) {
+  assert(mr.word_size() > 0, "Precondition");
+  while (true) {
+    if (isFull()) {
+      _overflow = true;
+      return;
+    }
+    // Otherwise...
+    jint index = _index;
+    jint next_index = index+1;
+    jint res = Atomic::cmpxchg(next_index, &_index, index);
+    if (res == index) {
+      _base[index] = mr;
+      return;
+    }
+    // Otherwise, we need to try again.
+  }
+}
+
+MemRegion CMRegionStack::pop() {
+  while (true) {
+    // Otherwise...
+    jint index = _index;
+
+    if (index == 0) {
+      return MemRegion();
+    }
+    jint next_index = index-1;
+    jint res = Atomic::cmpxchg(next_index, &_index, index);
+    if (res == index) {
+      MemRegion mr = _base[next_index];
+      if (mr.start() != NULL) {
+        tmp_guarantee_CM( mr.end() != NULL, "invariant" );
+        tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
+        return mr;
+      } else {
+        // that entry was invalidated... let's skip it
+        tmp_guarantee_CM( mr.end() == NULL, "invariant" );
+      }
+    }
+    // Otherwise, we need to try again.
+  }
+}
+
+bool CMRegionStack::invalidate_entries_into_cset() {
+  bool result = false;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  for (int i = 0; i < _oops_do_bound; ++i) {
+    MemRegion mr = _base[i];
+    if (mr.start() != NULL) {
+      tmp_guarantee_CM( mr.end() != NULL, "invariant");
+      tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
+      HeapRegion* hr = g1h->heap_region_containing(mr.start());
+      tmp_guarantee_CM( hr != NULL, "invariant" );
+      if (hr->in_collection_set()) {
+        // The region points into the collection set
+        _base[i] = MemRegion();
+        result = true;
+      }
+    } else {
+      // that entry was invalidated... let's skip it
+      tmp_guarantee_CM( mr.end() == NULL, "invariant" );
+    }
+  }
+  return result;
+}
+
+template<class OopClosureClass>
+bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
+  assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
+         || SafepointSynchronize::is_at_safepoint(),
+         "Drain recursion must be yield-safe.");
+  bool res = true;
+  debug_only(_drain_in_progress = true);
+  debug_only(_drain_in_progress_yields = yield_after);
+  while (!isEmpty()) {
+    oop newOop = pop();
+    assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
+    assert(newOop->is_oop(), "Expected an oop");
+    assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
+           "only grey objects on this stack");
+    // iterate over the oops in this oop, marking and pushing
+    // the ones in CMS generation.
+    newOop->oop_iterate(cl);
+    if (yield_after && _cm->do_yield_check()) {
+      res = false; break;
+    }
+  }
+  debug_only(_drain_in_progress = false);
+  return res;
+}
+
+void CMMarkStack::oops_do(OopClosure* f) {
+  if (_index == 0) return;
+  assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
+         "Bound must be set.");
+  for (int i = 0; i < _oops_do_bound; i++) {
+    f->do_oop(&_base[i]);
+  }
+  _oops_do_bound = -1;
+}
+
+bool ConcurrentMark::not_yet_marked(oop obj) const {
+  return (_g1h->is_obj_ill(obj)
+          || (_g1h->is_in_permanent(obj)
+              && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
+}
+
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+ConcurrentMark::ConcurrentMark(ReservedSpace rs,
+                               int max_regions) :
+  _markBitMap1(rs, MinObjAlignment - 1),
+  _markBitMap2(rs, MinObjAlignment - 1),
+
+  _parallel_marking_threads(0),
+  _sleep_factor(0.0),
+  _marking_task_overhead(1.0),
+  _cleanup_sleep_factor(0.0),
+  _cleanup_task_overhead(1.0),
+  _region_bm(max_regions, false /* in_resource_area*/),
+  _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
+           CardTableModRefBS::card_shift,
+           false /* in_resource_area*/),
+  _prevMarkBitMap(&_markBitMap1),
+  _nextMarkBitMap(&_markBitMap2),
+  _at_least_one_mark_complete(false),
+
+  _markStack(this),
+  _regionStack(),
+  // _finger set in set_non_marking_state
+
+  _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
+  // _active_tasks set in set_non_marking_state
+  // _tasks set inside the constructor
+  _task_queues(new CMTaskQueueSet((int) _max_task_num)),
+  _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
+
+  _has_overflown(false),
+  _concurrent(false),
+
+  // _verbose_level set below
+
+  _init_times(),
+  _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
+  _cleanup_times(),
+  _total_counting_time(0.0),
+  _total_rs_scrub_time(0.0),
+
+  _parallel_workers(NULL),
+  _cleanup_co_tracker(G1CLGroup)
+{
+  CMVerboseLevel verbose_level =
+    (CMVerboseLevel) G1MarkingVerboseLevel;
+  if (verbose_level < no_verbose)
+    verbose_level = no_verbose;
+  if (verbose_level > high_verbose)
+    verbose_level = high_verbose;
+  _verbose_level = verbose_level;
+
+  if (verbose_low())
+    gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
+                           "heap end = "PTR_FORMAT, _heap_start, _heap_end);
+
+  _markStack.allocate(G1CMStackSize);
+  _regionStack.allocate(G1CMRegionStackSize);
+
+  // Create & start a ConcurrentMark thread.
+  if (G1ConcMark) {
+    _cmThread = new ConcurrentMarkThread(this);
+    assert(cmThread() != NULL, "CM Thread should have been created");
+    assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
+  } else {
+    _cmThread = NULL;
+  }
+  _g1h = G1CollectedHeap::heap();
+  assert(CGC_lock != NULL, "Where's the CGC_lock?");
+  assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
+  assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
+
+  SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
+  satb_qs.set_buffer_size(G1SATBLogBufferSize);
+
+  int size = (int) MAX2(ParallelGCThreads, (size_t)1);
+  _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
+  for (int i = 0 ; i < size; i++) {
+    _par_cleanup_thread_state[i] = new ParCleanupThreadState;
+  }
+
+  _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
+  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
+
+  // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
+  _active_tasks = _max_task_num;
+  for (int i = 0; i < (int) _max_task_num; ++i) {
+    CMTaskQueue* task_queue = new CMTaskQueue();
+    task_queue->initialize();
+    _task_queues->register_queue(i, task_queue);
+
+    _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
+    _accum_task_vtime[i] = 0.0;
+  }
+
+  if (ParallelMarkingThreads > ParallelGCThreads) {
+    vm_exit_during_initialization("Can't have more ParallelMarkingThreads "
+                                  "than ParallelGCThreads.");
+  }
+  if (ParallelGCThreads == 0) {
+    // if we are not running with any parallel GC threads we will not
+    // spawn any marking threads either
+    _parallel_marking_threads =   0;
+    _sleep_factor             = 0.0;
+    _marking_task_overhead    = 1.0;
+  } else {
+    if (ParallelMarkingThreads > 0) {
+      // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPerc
+      // if both are set
+
+      _parallel_marking_threads = ParallelMarkingThreads;
+      _sleep_factor             = 0.0;
+      _marking_task_overhead    = 1.0;
+    } else if (G1MarkingOverheadPerc > 0) {
+      // we will calculate the number of parallel marking threads
+      // based on a target overhead with respect to the soft real-time
+      // goal
+
+      double marking_overhead = (double) G1MarkingOverheadPerc / 100.0;
+      double overall_cm_overhead =
+        (double) G1MaxPauseTimeMS * marking_overhead / (double) G1TimeSliceMS;
+      double cpu_ratio = 1.0 / (double) os::processor_count();
+      double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
+      double marking_task_overhead =
+        overall_cm_overhead / marking_thread_num *
+                                                (double) os::processor_count();
+      double sleep_factor =
+                         (1.0 - marking_task_overhead) / marking_task_overhead;
+
+      _parallel_marking_threads = (size_t) marking_thread_num;
+      _sleep_factor             = sleep_factor;
+      _marking_task_overhead    = marking_task_overhead;
+    } else {
+      _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
+      _sleep_factor             = 0.0;
+      _marking_task_overhead    = 1.0;
+    }
+
+    if (parallel_marking_threads() > 1)
+      _cleanup_task_overhead = 1.0;
+    else
+      _cleanup_task_overhead = marking_task_overhead();
+    _cleanup_sleep_factor =
+                     (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
+
+#if 0
+    gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
+    gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
+    gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
+    gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
+    gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
+#endif
+
+    guarantee( parallel_marking_threads() > 0, "peace of mind" );
+    _parallel_workers = new WorkGang("Parallel Marking Threads",
+                                     (int) parallel_marking_threads(), false, true);
+    if (_parallel_workers == NULL)
+      vm_exit_during_initialization("Failed necessary allocation.");
+  }
+
+  // so that the call below can read a sensible value
+  _heap_start = (HeapWord*) rs.base();
+  set_non_marking_state();
+}
+
+void ConcurrentMark::update_g1_committed(bool force) {
+  // If concurrent marking is not in progress, then we do not need to
+  // update _heap_end. This has a subtle and important
+  // side-effect. Imagine that two evacuation pauses happen between
+  // marking completion and remark. The first one can grow the
+  // heap (hence now the finger is below the heap end). Then, the
+  // second one could unnecessarily push regions on the region
+  // stack. This causes the invariant that the region stack is empty
+  // at the beginning of remark to be false. By ensuring that we do
+  // not observe heap expansions after marking is complete, then we do
+  // not have this problem.
+  if (!concurrent_marking_in_progress() && !force)
+    return;
+
+  MemRegion committed = _g1h->g1_committed();
+  tmp_guarantee_CM( committed.start() == _heap_start,
+                    "start shouldn't change" );
+  HeapWord* new_end = committed.end();
+  if (new_end > _heap_end) {
+    // The heap has been expanded.
+
+    _heap_end = new_end;
+  }
+  // Notice that the heap can also shrink. However, this only happens
+  // during a Full GC (at least currently) and the entire marking
+  // phase will bail out and the task will not be restarted. So, let's
+  // do nothing.
+}
+
+void ConcurrentMark::reset() {
+  // Starting values for these two. This should be called in a STW
+  // phase. CM will be notified of any future g1_committed expansions
+  // will be at the end of evacuation pauses, when tasks are
+  // inactive.
+  MemRegion committed = _g1h->g1_committed();
+  _heap_start = committed.start();
+  _heap_end   = committed.end();
+
+  guarantee( _heap_start != NULL &&
+             _heap_end != NULL   &&
+             _heap_start < _heap_end, "heap bounds should look ok" );
+
+  // reset all the marking data structures and any necessary flags
+  clear_marking_state();
+
+  if (verbose_low())
+    gclog_or_tty->print_cr("[global] resetting");
+
+  // We do reset all of them, since different phases will use
+  // different number of active threads. So, it's easiest to have all
+  // of them ready.
+  for (int i = 0; i < (int) _max_task_num; ++i)
+    _tasks[i]->reset(_nextMarkBitMap);
+
+  // we need this to make sure that the flag is on during the evac
+  // pause with initial mark piggy-backed
+  set_concurrent_marking_in_progress();
+}
+
+void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
+  guarantee( active_tasks <= _max_task_num, "we should not have more" );
+
+  _active_tasks = active_tasks;
+  // Need to update the three data structures below according to the
+  // number of active threads for this phase.
+  _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
+  _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
+  _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
+
+  _concurrent = concurrent;
+  // We propagate this to all tasks, not just the active ones.
+  for (int i = 0; i < (int) _max_task_num; ++i)
+    _tasks[i]->set_concurrent(concurrent);
+
+  if (concurrent) {
+    set_concurrent_marking_in_progress();
+  } else {
+    // We currently assume that the concurrent flag has been set to
+    // false before we start remark. At this point we should also be
+    // in a STW phase.
+    guarantee( !concurrent_marking_in_progress(), "invariant" );
+    guarantee( _finger == _heap_end, "only way to get here" );
+    update_g1_committed(true);
+  }
+}
+
+void ConcurrentMark::set_non_marking_state() {
+  // We set the global marking state to some default values when we're
+  // not doing marking.
+  clear_marking_state();
+  _active_tasks = 0;
+  clear_concurrent_marking_in_progress();
+}
+
+ConcurrentMark::~ConcurrentMark() {
+  int size = (int) MAX2(ParallelGCThreads, (size_t)1);
+  for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
+  FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
+                    _par_cleanup_thread_state);
+
+  for (int i = 0; i < (int) _max_task_num; ++i) {
+    delete _task_queues->queue(i);
+    delete _tasks[i];
+  }
+  delete _task_queues;
+  FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
+}
+
+// This closure is used to mark refs into the g1 generation
+// from external roots in the CMS bit map.
+// Called at the first checkpoint.
+//
+
+#define PRINT_REACHABLE_AT_INITIAL_MARK 0
+#if PRINT_REACHABLE_AT_INITIAL_MARK
+static FILE* reachable_file = NULL;
+
+class PrintReachableClosure: public OopsInGenClosure {
+  CMBitMap* _bm;
+  int _level;
+public:
+  PrintReachableClosure(CMBitMap* bm) :
+    _bm(bm), _level(0) {
+    guarantee(reachable_file != NULL, "pre-condition");
+  }
+  void do_oop(oop* p) {
+    oop obj = *p;
+    HeapWord* obj_addr = (HeapWord*)obj;
+    if (obj == NULL) return;
+    fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n",
+            _level, p, (void*) obj, _bm->isMarked(obj_addr));
+    if (!_bm->isMarked(obj_addr)) {
+      _bm->mark(obj_addr);
+      _level++;
+      obj->oop_iterate(this);
+      _level--;
+    }
+  }
+};
+#endif // PRINT_REACHABLE_AT_INITIAL_MARK
+
+#define SEND_HEAP_DUMP_TO_FILE 0
+#if SEND_HEAP_DUMP_TO_FILE
+static FILE* heap_dump_file = NULL;
+#endif // SEND_HEAP_DUMP_TO_FILE
+
+void ConcurrentMark::clearNextBitmap() {
+   guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
+
+   // clear the mark bitmap (no grey objects to start with).
+   // We need to do this in chunks and offer to yield in between
+   // each chunk.
+   HeapWord* start  = _nextMarkBitMap->startWord();
+   HeapWord* end    = _nextMarkBitMap->endWord();
+   HeapWord* cur    = start;
+   size_t chunkSize = M;
+   while (cur < end) {
+     HeapWord* next = cur + chunkSize;
+     if (next > end)
+       next = end;
+     MemRegion mr(cur,next);
+     _nextMarkBitMap->clearRange(mr);
+     cur = next;
+     do_yield_check();
+   }
+}
+
+class NoteStartOfMarkHRClosure: public HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->continuesHumongous()) {
+      r->note_start_of_marking(true);
+    }
+    return false;
+  }
+};
+
+void ConcurrentMark::checkpointRootsInitialPre() {
+  G1CollectedHeap*   g1h = G1CollectedHeap::heap();
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+
+  _has_aborted = false;
+
+  // Find all the reachable objects...
+#if PRINT_REACHABLE_AT_INITIAL_MARK
+  guarantee(reachable_file == NULL, "Protocol");
+  char fn_buf[100];
+  sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id());
+  reachable_file = fopen(fn_buf, "w");
+  // clear the mark bitmap (no grey objects to start with)
+  _nextMarkBitMap->clearAll();
+  PrintReachableClosure prcl(_nextMarkBitMap);
+  g1h->process_strong_roots(
+                            false,   // fake perm gen collection
+                            SharedHeap::SO_AllClasses,
+                            &prcl, // Regular roots
+                            &prcl    // Perm Gen Roots
+                            );
+  // The root iteration above "consumed" dirty cards in the perm gen.
+  // Therefore, as a shortcut, we dirty all such cards.
+  g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false);
+  fclose(reachable_file);
+  reachable_file = NULL;
+  // clear the mark bitmap again.
+  _nextMarkBitMap->clearAll();
+  COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
+  COMPILER2_PRESENT(DerivedPointerTable::clear());
+#endif // PRINT_REACHABLE_AT_INITIAL_MARK
+
+  // Initialise marking structures. This has to be done in a STW phase.
+  reset();
+}
+
+class CMMarkRootsClosure: public OopsInGenClosure {
+private:
+  ConcurrentMark*  _cm;
+  G1CollectedHeap* _g1h;
+  bool             _do_barrier;
+
+public:
+  CMMarkRootsClosure(ConcurrentMark* cm,
+                     G1CollectedHeap* g1h,
+                     bool do_barrier) : _cm(cm), _g1h(g1h),
+                                        _do_barrier(do_barrier) { }
+
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  virtual void do_oop(oop* p) {
+    oop thisOop = *p;
+    if (thisOop != NULL) {
+      assert(thisOop->is_oop() || thisOop->mark() == NULL,
+             "expected an oop, possibly with mark word displaced");
+      HeapWord* addr = (HeapWord*)thisOop;
+      if (_g1h->is_in_g1_reserved(addr)) {
+        _cm->grayRoot(thisOop);
+      }
+    }
+    if (_do_barrier) {
+      assert(!_g1h->is_in_g1_reserved(p),
+             "Should be called on external roots");
+      do_barrier(p);
+    }
+  }
+};
+
+void ConcurrentMark::checkpointRootsInitialPost() {
+  G1CollectedHeap*   g1h = G1CollectedHeap::heap();
+
+  // For each region note start of marking.
+  NoteStartOfMarkHRClosure startcl;
+  g1h->heap_region_iterate(&startcl);
+
+  // Start weak-reference discovery.
+  ReferenceProcessor* rp = g1h->ref_processor();
+  rp->verify_no_references_recorded();
+  rp->enable_discovery(); // enable ("weak") refs discovery
+
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold);
+  satb_mq_set.set_active_all_threads(true);
+
+  // update_g1_committed() will be called at the end of an evac pause
+  // when marking is on. So, it's also called at the end of the
+  // initial-mark pause to update the heap end, if the heap expands
+  // during it. No need to call it here.
+
+  guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
+
+  size_t max_marking_threads =
+    MAX2((size_t) 1, parallel_marking_threads());
+  for (int i = 0; i < (int)_max_task_num; ++i) {
+    _tasks[i]->enable_co_tracker();
+    if (i < (int) max_marking_threads)
+      _tasks[i]->reset_co_tracker(marking_task_overhead());
+    else
+      _tasks[i]->reset_co_tracker(0.0);
+  }
+}
+
+// Checkpoint the roots into this generation from outside
+// this generation. [Note this initial checkpoint need only
+// be approximate -- we'll do a catch up phase subsequently.]
+void ConcurrentMark::checkpointRootsInitial() {
+  assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  double start = os::elapsedTime();
+  GCOverheadReporter::recordSTWStart(start);
+
+  // If there has not been a GC[n-1] since last GC[n] cycle completed,
+  // precede our marking with a collection of all
+  // younger generations to keep floating garbage to a minimum.
+  // YSR: we won't do this for now -- it's an optimization to be
+  // done post-beta.
+
+  // YSR:    ignoring weak refs for now; will do at bug fixing stage
+  // EVM:    assert(discoveredRefsAreClear());
+
+
+  G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
+  g1p->record_concurrent_mark_init_start();
+  checkpointRootsInitialPre();
+
+  // YSR: when concurrent precleaning is in place, we'll
+  // need to clear the cached card table here
+
+  ResourceMark rm;
+  HandleMark  hm;
+
+  g1h->ensure_parsability(false);
+  g1h->perm_gen()->save_marks();
+
+  CMMarkRootsClosure notOlder(this, g1h, false);
+  CMMarkRootsClosure older(this, g1h, true);
+
+  g1h->set_marking_started();
+  g1h->rem_set()->prepare_for_younger_refs_iterate(false);
+
+  g1h->process_strong_roots(false,   // fake perm gen collection
+                            SharedHeap::SO_AllClasses,
+                            &notOlder, // Regular roots
+                            &older    // Perm Gen Roots
+                            );
+  checkpointRootsInitialPost();
+
+  // Statistics.
+  double end = os::elapsedTime();
+  _init_times.add((end - start) * 1000.0);
+  GCOverheadReporter::recordSTWEnd(end);
+
+  g1p->record_concurrent_mark_init_end();
+}
+
+/*
+   Notice that in the next two methods, we actually leave the STS
+   during the barrier sync and join it immediately afterwards. If we
+   do not do this, this then the following deadlock can occur: one
+   thread could be in the barrier sync code, waiting for the other
+   thread to also sync up, whereas another one could be trying to
+   yield, while also waiting for the other threads to sync up too.
+
+   Because the thread that does the sync barrier has left the STS, it
+   is possible to be suspended for a Full GC or an evacuation pause
+   could occur. This is actually safe, since the entering the sync
+   barrier is one of the last things do_marking_step() does, and it
+   doesn't manipulate any data structures afterwards.
+*/
+
+void ConcurrentMark::enter_first_sync_barrier(int task_num) {
+  if (verbose_low())
+    gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
+
+  ConcurrentGCThread::stsLeave();
+  _first_overflow_barrier_sync.enter();
+  ConcurrentGCThread::stsJoin();
+  // at this point everyone should have synced up and not be doing any
+  // more work
+
+  if (verbose_low())
+    gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
+
+  // let task 0 do this
+  if (task_num == 0) {
+    // task 0 is responsible for clearing the global data structures
+    clear_marking_state();
+
+    if (PrintGC) {
+      gclog_or_tty->date_stamp(PrintGCDateStamps);
+      gclog_or_tty->stamp(PrintGCTimeStamps);
+      gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
+    }
+  }
+
+  // after this, each task should reset its own data structures then
+  // then go into the second barrier
+}
+
+void ConcurrentMark::enter_second_sync_barrier(int task_num) {
+  if (verbose_low())
+    gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
+
+  ConcurrentGCThread::stsLeave();
+  _second_overflow_barrier_sync.enter();
+  ConcurrentGCThread::stsJoin();
+  // at this point everything should be re-initialised and ready to go
+
+  if (verbose_low())
+    gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
+}
+
+void ConcurrentMark::grayRoot(oop p) {
+  HeapWord* addr = (HeapWord*) p;
+  // We can't really check against _heap_start and _heap_end, since it
+  // is possible during an evacuation pause with piggy-backed
+  // initial-mark that the committed space is expanded during the
+  // pause without CM observing this change. So the assertions below
+  // is a bit conservative; but better than nothing.
+  tmp_guarantee_CM( _g1h->g1_committed().contains(addr),
+                    "address should be within the heap bounds" );
+
+  if (!_nextMarkBitMap->isMarked(addr))
+    _nextMarkBitMap->parMark(addr);
+}
+
+void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
+  // The objects on the region have already been marked "in bulk" by
+  // the caller. We only need to decide whether to push the region on
+  // the region stack or not.
+
+  if (!concurrent_marking_in_progress() || !_should_gray_objects)
+    // We're done with marking and waiting for remark. We do not need to
+    // push anything else on the region stack.
+    return;
+
+  HeapWord* finger = _finger;
+
+  if (verbose_low())
+    gclog_or_tty->print_cr("[global] attempting to push "
+                           "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
+                           PTR_FORMAT, mr.start(), mr.end(), finger);
+
+  if (mr.start() < finger) {
+    // The finger is always heap region aligned and it is not possible
+    // for mr to span heap regions.
+    tmp_guarantee_CM( mr.end() <= finger, "invariant" );
+
+    tmp_guarantee_CM( mr.start() <= mr.end() &&
+                      _heap_start <= mr.start() &&
+                      mr.end() <= _heap_end,
+                  "region boundaries should fall within the committed space" );
+    if (verbose_low())
+      gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
+                             "below the finger, pushing it",
+                             mr.start(), mr.end());
+
+    if (!region_stack_push(mr)) {
+      if (verbose_low())
+        gclog_or_tty->print_cr("[global] region stack has overflown.");
+    }
+  }
+}
+
+void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
+  // The object is not marked by the caller. We need to at least mark
+  // it and maybe push in on the stack.
+
+  HeapWord* addr = (HeapWord*)p;
+  if (!_nextMarkBitMap->isMarked(addr)) {
+    // We definitely need to mark it, irrespective whether we bail out
+    // because we're done with marking.
+    if (_nextMarkBitMap->parMark(addr)) {
+      if (!concurrent_marking_in_progress() || !_should_gray_objects)
+        // If we're done with concurrent marking and we're waiting for
+        // remark, then we're not pushing anything on the stack.
+        return;
+
+      // No OrderAccess:store_load() is needed. It is implicit in the
+      // CAS done in parMark(addr) above
+      HeapWord* finger = _finger;
+
+      if (addr < finger) {
+        if (!mark_stack_push(oop(addr))) {
+          if (verbose_low())
+            gclog_or_tty->print_cr("[global] global stack overflow "
+                                   "during parMark");
+        }
+      }
+    }
+  }
+}
+
+class CMConcurrentMarkingTask: public AbstractGangTask {
+private:
+  ConcurrentMark*       _cm;
+  ConcurrentMarkThread* _cmt;
+
+public:
+  void work(int worker_i) {
+    guarantee( Thread::current()->is_ConcurrentGC_thread(),
+               "this should only be done by a conc GC thread" );
+
+    double start_vtime = os::elapsedVTime();
+
+    ConcurrentGCThread::stsJoin();
+
+    guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" );
+    CMTask* the_task = _cm->task(worker_i);
+    the_task->start_co_tracker();
+    the_task->record_start_time();
+    if (!_cm->has_aborted()) {
+      do {
+        double start_vtime_sec = os::elapsedVTime();
+        double start_time_sec = os::elapsedTime();
+        the_task->do_marking_step(10.0);
+        double end_time_sec = os::elapsedTime();
+        double end_vtime_sec = os::elapsedVTime();
+        double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
+        double elapsed_time_sec = end_time_sec - start_time_sec;
+        _cm->clear_has_overflown();
+
+        bool ret = _cm->do_yield_check(worker_i);
+
+        jlong sleep_time_ms;
+        if (!_cm->has_aborted() && the_task->has_aborted()) {
+          sleep_time_ms =
+            (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
+          ConcurrentGCThread::stsLeave();
+          os::sleep(Thread::current(), sleep_time_ms, false);
+          ConcurrentGCThread::stsJoin();
+        }
+        double end_time2_sec = os::elapsedTime();
+        double elapsed_time2_sec = end_time2_sec - start_time_sec;
+
+        the_task->update_co_tracker();
+
+#if 0
+          gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
+                                 "overhead %1.4lf",
+                                 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
+                                 the_task->conc_overhead(os::elapsedTime()) * 8.0);
+          gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
+                                 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
+#endif
+      } while (!_cm->has_aborted() && the_task->has_aborted());
+    }
+    the_task->record_end_time();
+    guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" );
+
+    ConcurrentGCThread::stsLeave();
+
+    double end_vtime = os::elapsedVTime();
+    the_task->update_co_tracker(true);
+    _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
+  }
+
+  CMConcurrentMarkingTask(ConcurrentMark* cm,
+                          ConcurrentMarkThread* cmt) :
+      AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
+
+  ~CMConcurrentMarkingTask() { }
+};
+
+void ConcurrentMark::markFromRoots() {
+  // we might be tempted to assert that:
+  // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
+  //        "inconsistent argument?");
+  // However that wouldn't be right, because it's possible that
+  // a safepoint is indeed in progress as a younger generation
+  // stop-the-world GC happens even as we mark in this generation.
+
+  _restart_for_overflow = false;
+
+  set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
+
+  CMConcurrentMarkingTask markingTask(this, cmThread());
+  if (parallel_marking_threads() > 0)
+    _parallel_workers->run_task(&markingTask);
+  else
+    markingTask.work(0);
+  print_stats();
+}
+
+void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
+  // world is stopped at this checkpoint
+  assert(SafepointSynchronize::is_at_safepoint(),
+         "world should be stopped");
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  // If a full collection has happened, we shouldn't do this.
+  if (has_aborted()) {
+    g1h->set_marking_complete(); // So bitmap clearing isn't confused
+    return;
+  }
+
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+  g1p->record_concurrent_mark_remark_start();
+
+  double start = os::elapsedTime();
+  GCOverheadReporter::recordSTWStart(start);
+
+  checkpointRootsFinalWork();
+
+  double mark_work_end = os::elapsedTime();
+
+  weakRefsWork(clear_all_soft_refs);
+
+  if (has_overflown()) {
+    // Oops.  We overflowed.  Restart concurrent marking.
+    _restart_for_overflow = true;
+    // Clear the flag. We do not need it any more.
+    clear_has_overflown();
+    if (G1TraceMarkStackOverflow)
+      gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
+  } else {
+    // We're done with marking.
+    JavaThread::satb_mark_queue_set().set_active_all_threads(false);
+  }
+
+#if VERIFY_OBJS_PROCESSED
+  _scan_obj_cl.objs_processed = 0;
+  ThreadLocalObjQueue::objs_enqueued = 0;
+#endif
+
+  // Statistics
+  double now = os::elapsedTime();
+  _remark_mark_times.add((mark_work_end - start) * 1000.0);
+  _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
+  _remark_times.add((now - start) * 1000.0);
+
+  GCOverheadReporter::recordSTWEnd(now);
+  for (int i = 0; i < (int)_max_task_num; ++i)
+    _tasks[i]->disable_co_tracker();
+  _cleanup_co_tracker.enable();
+  _cleanup_co_tracker.reset(cleanup_task_overhead());
+  g1p->record_concurrent_mark_remark_end();
+}
+
+
+#define CARD_BM_TEST_MODE 0
+
+class CalcLiveObjectsClosure: public HeapRegionClosure {
+
+  CMBitMapRO* _bm;
+  ConcurrentMark* _cm;
+  COTracker* _co_tracker;
+  bool _changed;
+  bool _yield;
+  size_t _words_done;
+  size_t _tot_live;
+  size_t _tot_used;
+  size_t _regions_done;
+  double _start_vtime_sec;
+
+  BitMap* _region_bm;
+  BitMap* _card_bm;
+  intptr_t _bottom_card_num;
+  bool _final;
+
+  void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
+    for (intptr_t i = start_card_num; i <= last_card_num; i++) {
+#if CARD_BM_TEST_MODE
+      guarantee(_card_bm->at(i - _bottom_card_num),
+                "Should already be set.");
+#else
+      _card_bm->par_at_put(i - _bottom_card_num, 1);
+#endif
+    }
+  }
+
+public:
+  CalcLiveObjectsClosure(bool final,
+                         CMBitMapRO *bm, ConcurrentMark *cm,
+                         BitMap* region_bm, BitMap* card_bm,
+                         COTracker* co_tracker) :
+    _bm(bm), _cm(cm), _changed(false), _yield(true),
+    _words_done(0), _tot_live(0), _tot_used(0),
+    _region_bm(region_bm), _card_bm(card_bm),
+    _final(final), _co_tracker(co_tracker),
+    _regions_done(0), _start_vtime_sec(0.0)
+  {
+    _bottom_card_num =
+      intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
+               CardTableModRefBS::card_shift);
+  }
+
+  bool doHeapRegion(HeapRegion* hr) {
+    if (_co_tracker != NULL)
+      _co_tracker->update();
+
+    if (!_final && _regions_done == 0)
+      _start_vtime_sec = os::elapsedVTime();
+
+    if (hr->continuesHumongous()) return false;
+
+    HeapWord* nextTop = hr->next_top_at_mark_start();
+    HeapWord* start   = hr->top_at_conc_mark_count();
+    assert(hr->bottom() <= start && start <= hr->end() &&
+           hr->bottom() <= nextTop && nextTop <= hr->end() &&
+           start <= nextTop,
+           "Preconditions.");
+    // Otherwise, record the number of word's we'll examine.
+    size_t words_done = (nextTop - start);
+    // Find the first marked object at or after "start".
+    start = _bm->getNextMarkedWordAddress(start, nextTop);
+    size_t marked_bytes = 0;
+
+    // Below, the term "card num" means the result of shifting an address
+    // by the card shift -- address 0 corresponds to card number 0.  One
+    // must subtract the card num of the bottom of the heap to obtain a
+    // card table index.
+    // The first card num of the sequence of live cards currently being
+    // constructed.  -1 ==> no sequence.
+    intptr_t start_card_num = -1;
+    // The last card num of the sequence of live cards currently being
+    // constructed.  -1 ==> no sequence.
+    intptr_t last_card_num = -1;
+
+    while (start < nextTop) {
+      if (_yield && _cm->do_yield_check()) {
+        // We yielded.  It might be for a full collection, in which case
+        // all bets are off; terminate the traversal.
+        if (_cm->has_aborted()) {
+          _changed = false;
+          return true;
+        } else {
+          // Otherwise, it might be a collection pause, and the region
+          // we're looking at might be in the collection set.  We'll
+          // abandon this region.
+          return false;
+        }
+      }
+      oop obj = oop(start);
+      int obj_sz = obj->size();
+      // The card num of the start of the current object.
+      intptr_t obj_card_num =
+        intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
+
+      HeapWord* obj_last = start + obj_sz - 1;
+      intptr_t obj_last_card_num =
+        intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
+
+      if (obj_card_num != last_card_num) {
+        if (start_card_num == -1) {
+          assert(last_card_num == -1, "Both or neither.");
+          start_card_num = obj_card_num;
+        } else {
+          assert(last_card_num != -1, "Both or neither.");
+          assert(obj_card_num >= last_card_num, "Inv");
+          if ((obj_card_num - last_card_num) > 1) {
+            // Mark the last run, and start a new one.
+            mark_card_num_range(start_card_num, last_card_num);
+            start_card_num = obj_card_num;
+          }
+        }
+#if CARD_BM_TEST_MODE
+        /*
+        gclog_or_tty->print_cr("Setting bits from %d/%d.",
+                               obj_card_num - _bottom_card_num,
+                               obj_last_card_num - _bottom_card_num);
+        */
+        for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
+          _card_bm->par_at_put(j - _bottom_card_num, 1);
+        }
+#endif
+      }
+      // In any case, we set the last card num.
+      last_card_num = obj_last_card_num;
+
+      marked_bytes += obj_sz * HeapWordSize;
+      // Find the next marked object after this one.
+      start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
+      _changed = true;
+    }
+    // Handle the last range, if any.
+    if (start_card_num != -1)
+      mark_card_num_range(start_card_num, last_card_num);
+    if (_final) {
+      // Mark the allocated-since-marking portion...
+      HeapWord* tp = hr->top();
+      if (nextTop < tp) {
+        start_card_num =
+          intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
+        last_card_num =
+          intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
+        mark_card_num_range(start_card_num, last_card_num);
+        // This definitely means the region has live objects.
+        _region_bm->par_at_put(hr->hrs_index(), 1);
+      }
+    }
+
+    hr->add_to_marked_bytes(marked_bytes);
+    // Update the live region bitmap.
+    if (marked_bytes > 0) {
+      _region_bm->par_at_put(hr->hrs_index(), 1);
+    }
+    hr->set_top_at_conc_mark_count(nextTop);
+    _tot_live += hr->next_live_bytes();
+    _tot_used += hr->used();
+    _words_done = words_done;
+
+    if (!_final) {
+      ++_regions_done;
+      if (_regions_done % 10 == 0) {
+        double end_vtime_sec = os::elapsedVTime();
+        double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
+        if (elapsed_vtime_sec > (10.0 / 1000.0)) {
+          jlong sleep_time_ms =
+            (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
+#if 0
+          gclog_or_tty->print_cr("CL: elapsed %1.4lf ms, sleep %1.4lf ms, "
+                                 "overhead %1.4lf",
+                                 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
+                                 _co_tracker->concOverhead(os::elapsedTime()));
+#endif
+          os::sleep(Thread::current(), sleep_time_ms, false);
+          _start_vtime_sec = end_vtime_sec;
+        }
+      }
+    }
+
+    return false;
+  }
+
+  bool changed() { return _changed;  }
+  void reset()   { _changed = false; _words_done = 0; }
+  void no_yield() { _yield = false; }
+  size_t words_done() { return _words_done; }
+  size_t tot_live() { return _tot_live; }
+  size_t tot_used() { return _tot_used; }
+};
+
+
+void ConcurrentMark::calcDesiredRegions() {
+  guarantee( _cleanup_co_tracker.enabled(), "invariant" );
+  _cleanup_co_tracker.start();
+
+  _region_bm.clear();
+  _card_bm.clear();
+  CalcLiveObjectsClosure calccl(false /*final*/,
+                                nextMarkBitMap(), this,
+                                &_region_bm, &_card_bm,
+                                &_cleanup_co_tracker);
+  G1CollectedHeap *g1h = G1CollectedHeap::heap();
+  g1h->heap_region_iterate(&calccl);
+
+  do {
+    calccl.reset();
+    g1h->heap_region_iterate(&calccl);
+  } while (calccl.changed());
+
+  _cleanup_co_tracker.update(true);
+}
+
+class G1ParFinalCountTask: public AbstractGangTask {
+protected:
+  G1CollectedHeap* _g1h;
+  CMBitMap* _bm;
+  size_t _n_workers;
+  size_t *_live_bytes;
+  size_t *_used_bytes;
+  BitMap* _region_bm;
+  BitMap* _card_bm;
+public:
+  G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
+                      BitMap* region_bm, BitMap* card_bm) :
+    AbstractGangTask("G1 final counting"), _g1h(g1h),
+    _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
+  {
+    if (ParallelGCThreads > 0)
+      _n_workers = _g1h->workers()->total_workers();
+    else
+      _n_workers = 1;
+    _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
+    _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
+  }
+
+  ~G1ParFinalCountTask() {
+    FREE_C_HEAP_ARRAY(size_t, _live_bytes);
+    FREE_C_HEAP_ARRAY(size_t, _used_bytes);
+  }
+
+  void work(int i) {
+    CalcLiveObjectsClosure calccl(true /*final*/,
+                                  _bm, _g1h->concurrent_mark(),
+                                  _region_bm, _card_bm,
+                                  NULL /* CO tracker */);
+    calccl.no_yield();
+    if (ParallelGCThreads > 0) {
+      _g1h->heap_region_par_iterate_chunked(&calccl, i, 1);
+    } else {
+      _g1h->heap_region_iterate(&calccl);
+    }
+    assert(calccl.complete(), "Shouldn't have yielded!");
+
+    guarantee( (size_t)i < _n_workers, "invariant" );
+    _live_bytes[i] = calccl.tot_live();
+    _used_bytes[i] = calccl.tot_used();
+  }
+  size_t live_bytes()  {
+    size_t live_bytes = 0;
+    for (size_t i = 0; i < _n_workers; ++i)
+      live_bytes += _live_bytes[i];
+    return live_bytes;
+  }
+  size_t used_bytes()  {
+    size_t used_bytes = 0;
+    for (size_t i = 0; i < _n_workers; ++i)
+      used_bytes += _used_bytes[i];
+    return used_bytes;
+  }
+};
+
+class G1ParNoteEndTask;
+
+class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
+  G1CollectedHeap* _g1;
+  int _worker_num;
+  size_t _max_live_bytes;
+  size_t _regions_claimed;
+  size_t _freed_bytes;
+  size_t _cleared_h_regions;
+  size_t _freed_regions;
+  UncleanRegionList* _unclean_region_list;
+  double _claimed_region_time;
+  double _max_region_time;
+
+public:
+  G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
+                             UncleanRegionList* list,
+                             int worker_num);
+  size_t freed_bytes() { return _freed_bytes; }
+  size_t cleared_h_regions() { return _cleared_h_regions; }
+  size_t freed_regions() { return  _freed_regions; }
+  UncleanRegionList* unclean_region_list() {
+    return _unclean_region_list;
+  }
+
+  bool doHeapRegion(HeapRegion *r);
+
+  size_t max_live_bytes() { return _max_live_bytes; }
+  size_t regions_claimed() { return _regions_claimed; }
+  double claimed_region_time_sec() { return _claimed_region_time; }
+  double max_region_time_sec() { return _max_region_time; }
+};
+
+class G1ParNoteEndTask: public AbstractGangTask {
+  friend class G1NoteEndOfConcMarkClosure;
+protected:
+  G1CollectedHeap* _g1h;
+  size_t _max_live_bytes;
+  size_t _freed_bytes;
+  ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
+public:
+  G1ParNoteEndTask(G1CollectedHeap* g1h,
+                   ConcurrentMark::ParCleanupThreadState**
+                   par_cleanup_thread_state) :
+    AbstractGangTask("G1 note end"), _g1h(g1h),
+    _max_live_bytes(0), _freed_bytes(0),
+    _par_cleanup_thread_state(par_cleanup_thread_state)
+  {}
+
+  void work(int i) {
+    double start = os::elapsedTime();
+    G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
+                                           &_par_cleanup_thread_state[i]->list,
+                                           i);
+    if (ParallelGCThreads > 0) {
+      _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, 2);
+    } else {
+      _g1h->heap_region_iterate(&g1_note_end);
+    }
+    assert(g1_note_end.complete(), "Shouldn't have yielded!");
+
+    // Now finish up freeing the current thread's regions.
+    _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
+                                  g1_note_end.cleared_h_regions(),
+                                  0, NULL);
+    {
+      MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+      _max_live_bytes += g1_note_end.max_live_bytes();
+      _freed_bytes += g1_note_end.freed_bytes();
+    }
+    double end = os::elapsedTime();
+    if (G1PrintParCleanupStats) {
+      gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
+                          "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
+                          i, start, end, (end-start)*1000.0,
+                          g1_note_end.regions_claimed(),
+                          g1_note_end.claimed_region_time_sec()*1000.0,
+                          g1_note_end.max_region_time_sec()*1000.0);
+    }
+  }
+  size_t max_live_bytes() { return _max_live_bytes; }
+  size_t freed_bytes() { return _freed_bytes; }
+};
+
+class G1ParScrubRemSetTask: public AbstractGangTask {
+protected:
+  G1RemSet* _g1rs;
+  BitMap* _region_bm;
+  BitMap* _card_bm;
+public:
+  G1ParScrubRemSetTask(G1CollectedHeap* g1h,
+                       BitMap* region_bm, BitMap* card_bm) :
+    AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
+    _region_bm(region_bm), _card_bm(card_bm)
+  {}
+
+  void work(int i) {
+    if (ParallelGCThreads > 0) {
+      _g1rs->scrub_par(_region_bm, _card_bm, i, 3);
+    } else {
+      _g1rs->scrub(_region_bm, _card_bm);
+    }
+  }
+
+};
+
+G1NoteEndOfConcMarkClosure::
+G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
+                           UncleanRegionList* list,
+                           int worker_num)
+  : _g1(g1), _worker_num(worker_num),
+    _max_live_bytes(0), _regions_claimed(0),
+    _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
+    _claimed_region_time(0.0), _max_region_time(0.0),
+    _unclean_region_list(list)
+{}
+
+bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
+  // We use a claim value of zero here because all regions
+  // were claimed with value 1 in the FinalCount task.
+  r->reset_gc_time_stamp();
+  if (!r->continuesHumongous()) {
+    double start = os::elapsedTime();
+    _regions_claimed++;
+    r->note_end_of_marking();
+    _max_live_bytes += r->max_live_bytes();
+    _g1->free_region_if_totally_empty_work(r,
+                                           _freed_bytes,
+                                           _cleared_h_regions,
+                                           _freed_regions,
+                                           _unclean_region_list,
+                                           true /*par*/);
+    double region_time = (os::elapsedTime() - start);
+    _claimed_region_time += region_time;
+    if (region_time > _max_region_time) _max_region_time = region_time;
+  }
+  return false;
+}
+
+void ConcurrentMark::cleanup() {
+  // world is stopped at this checkpoint
+  assert(SafepointSynchronize::is_at_safepoint(),
+         "world should be stopped");
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  // If a full collection has happened, we shouldn't do this.
+  if (has_aborted()) {
+    g1h->set_marking_complete(); // So bitmap clearing isn't confused
+    return;
+  }
+
+  _cleanup_co_tracker.disable();
+
+  G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
+  g1p->record_concurrent_mark_cleanup_start();
+
+  double start = os::elapsedTime();
+  GCOverheadReporter::recordSTWStart(start);
+
+  // Do counting once more with the world stopped for good measure.
+  G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
+                                        &_region_bm, &_card_bm);
+  if (ParallelGCThreads > 0) {
+    int n_workers = g1h->workers()->total_workers();
+    g1h->set_par_threads(n_workers);
+    g1h->workers()->run_task(&g1_par_count_task);
+    g1h->set_par_threads(0);
+  } else {
+    g1_par_count_task.work(0);
+  }
+
+  size_t known_garbage_bytes =
+    g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
+#if 0
+  gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
+                         (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
+                         (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
+                         (double) known_garbage_bytes / (double) (1024 * 1024));
+#endif // 0
+  g1p->set_known_garbage_bytes(known_garbage_bytes);
+
+  size_t start_used_bytes = g1h->used();
+  _at_least_one_mark_complete = true;
+  g1h->set_marking_complete();
+
+  double count_end = os::elapsedTime();
+  double this_final_counting_time = (count_end - start);
+  if (G1PrintParCleanupStats) {
+    gclog_or_tty->print_cr("Cleanup:");
+    gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
+                           this_final_counting_time*1000.0);
+  }
+  _total_counting_time += this_final_counting_time;
+
+  // Install newly created mark bitMap as "prev".
+  swapMarkBitMaps();
+
+  g1h->reset_gc_time_stamp();
+
+  // Note end of marking in all heap regions.
+  double note_end_start = os::elapsedTime();
+  G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
+  if (ParallelGCThreads > 0) {
+    int n_workers = g1h->workers()->total_workers();
+    g1h->set_par_threads(n_workers);
+    g1h->workers()->run_task(&g1_par_note_end_task);
+    g1h->set_par_threads(0);
+  } else {
+    g1_par_note_end_task.work(0);
+  }
+  g1h->set_unclean_regions_coming(true);
+  double note_end_end = os::elapsedTime();
+  // Tell the mutators that there might be unclean regions coming...
+  if (G1PrintParCleanupStats) {
+    gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
+                           (note_end_end - note_end_start)*1000.0);
+  }
+
+  // Now we "scrub" remembered sets.  Note that we must do this before the
+  // call below, since it affects the metric by which we sort the heap
+  // regions.
+  if (G1ScrubRemSets) {
+    double rs_scrub_start = os::elapsedTime();
+    G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
+    if (ParallelGCThreads > 0) {
+      int n_workers = g1h->workers()->total_workers();
+      g1h->set_par_threads(n_workers);
+      g1h->workers()->run_task(&g1_par_scrub_rs_task);
+      g1h->set_par_threads(0);
+    } else {
+      g1_par_scrub_rs_task.work(0);
+    }
+
+    double rs_scrub_end = os::elapsedTime();
+    double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
+    _total_rs_scrub_time += this_rs_scrub_time;
+  }
+
+  // this will also free any regions totally full of garbage objects,
+  // and sort the regions.
+  g1h->g1_policy()->record_concurrent_mark_cleanup_end(
+                        g1_par_note_end_task.freed_bytes(),
+                        g1_par_note_end_task.max_live_bytes());
+
+  // Statistics.
+  double end = os::elapsedTime();
+  _cleanup_times.add((end - start) * 1000.0);
+  GCOverheadReporter::recordSTWEnd(end);
+
+  // G1CollectedHeap::heap()->print();
+  // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
+  // G1CollectedHeap::heap()->get_gc_time_stamp());
+
+  if (PrintGC || PrintGCDetails) {
+    g1h->print_size_transition(gclog_or_tty,
+                               start_used_bytes,
+                               g1h->used(),
+                               g1h->capacity());
+  }
+
+  size_t cleaned_up_bytes = start_used_bytes - g1h->used();
+  g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
+
+  // We need to make this be a "collection" so any collection pause that
+  // races with it goes around and waits for completeCleanup to finish.
+  g1h->increment_total_collections();
+
+#ifndef PRODUCT
+  if (G1VerifyConcMark) {
+    G1CollectedHeap::heap()->prepare_for_verify();
+    G1CollectedHeap::heap()->verify(true,false);
+  }
+#endif
+}
+
+void ConcurrentMark::completeCleanup() {
+  // A full collection intervened.
+  if (has_aborted()) return;
+
+  int first = 0;
+  int last = (int)MAX2(ParallelGCThreads, (size_t)1);
+  for (int t = 0; t < last; t++) {
+    UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
+    assert(list->well_formed(), "Inv");
+    HeapRegion* hd = list->hd();
+    while (hd != NULL) {
+      // Now finish up the other stuff.
+      hd->rem_set()->clear();
+      HeapRegion* next_hd = hd->next_from_unclean_list();
+      (void)list->pop();
+      guarantee(list->hd() == next_hd, "how not?");
+      _g1h->put_region_on_unclean_list(hd);
+      if (!hd->isHumongous()) {
+        // Add this to the _free_regions count by 1.
+        _g1h->finish_free_region_work(0, 0, 1, NULL);
+      }
+      hd = list->hd();
+      guarantee(hd == next_hd, "how not?");
+    }
+  }
+}
+
+
+class G1CMIsAliveClosure: public BoolObjectClosure {
+  G1CollectedHeap* _g1;
+ public:
+  G1CMIsAliveClosure(G1CollectedHeap* g1) :
+    _g1(g1)
+  {}
+
+  void do_object(oop obj) {
+    assert(false, "not to be invoked");
+  }
+  bool do_object_b(oop obj) {
+    HeapWord* addr = (HeapWord*)obj;
+    return addr != NULL &&
+           (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
+  }
+};
+
+class G1CMKeepAliveClosure: public OopClosure {
+  G1CollectedHeap* _g1;
+  ConcurrentMark*  _cm;
+  CMBitMap*        _bitMap;
+ public:
+  G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
+                       CMBitMap* bitMap) :
+    _g1(g1), _cm(cm),
+    _bitMap(bitMap) {}
+
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  void do_oop(oop* p) {
+    oop thisOop = *p;
+    HeapWord* addr = (HeapWord*)thisOop;
+    if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
+      _bitMap->mark(addr);
+      _cm->mark_stack_push(thisOop);
+    }
+  }
+};
+
+class G1CMDrainMarkingStackClosure: public VoidClosure {
+  CMMarkStack*                  _markStack;
+  CMBitMap*                     _bitMap;
+  G1CMKeepAliveClosure*         _oopClosure;
+ public:
+  G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
+                               G1CMKeepAliveClosure* oopClosure) :
+    _bitMap(bitMap),
+    _markStack(markStack),
+    _oopClosure(oopClosure)
+  {}
+
+  void do_void() {
+    _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
+  }
+};
+
+void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
+  ResourceMark rm;
+  HandleMark   hm;
+  ReferencePolicy* soft_ref_policy;
+
+  // Process weak references.
+  if (clear_all_soft_refs) {
+    soft_ref_policy = new AlwaysClearPolicy();
+  } else {
+#ifdef COMPILER2
+    soft_ref_policy = new LRUMaxHeapPolicy();
+#else
+    soft_ref_policy = new LRUCurrentHeapPolicy();
+#endif
+  }
+  assert(_markStack.isEmpty(), "mark stack should be empty");
+
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  G1CMIsAliveClosure g1IsAliveClosure(g1);
+
+  G1CMKeepAliveClosure g1KeepAliveClosure(g1, this, nextMarkBitMap());
+  G1CMDrainMarkingStackClosure
+    g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
+                               &g1KeepAliveClosure);
+
+  // XXXYYY  Also: copy the parallel ref processing code from CMS.
+  ReferenceProcessor* rp = g1->ref_processor();
+  rp->process_discovered_references(soft_ref_policy,
+                                    &g1IsAliveClosure,
+                                    &g1KeepAliveClosure,
+                                    &g1DrainMarkingStackClosure,
+                                    NULL);
+  assert(_markStack.overflow() || _markStack.isEmpty(),
+         "mark stack should be empty (unless it overflowed)");
+  if (_markStack.overflow()) {
+    set_has_overflown();
+  }
+
+  rp->enqueue_discovered_references();
+  rp->verify_no_references_recorded();
+  assert(!rp->discovery_enabled(), "should have been disabled");
+
+  // Now clean up stale oops in SymbolTable and StringTable
+  SymbolTable::unlink(&g1IsAliveClosure);
+  StringTable::unlink(&g1IsAliveClosure);
+}
+
+void ConcurrentMark::swapMarkBitMaps() {
+  CMBitMapRO* temp = _prevMarkBitMap;
+  _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
+  _nextMarkBitMap  = (CMBitMap*)  temp;
+}
+
+class CMRemarkTask: public AbstractGangTask {
+private:
+  ConcurrentMark *_cm;
+
+public:
+  void work(int worker_i) {
+    // Since all available tasks are actually started, we should
+    // only proceed if we're supposed to be actived.
+    if ((size_t)worker_i < _cm->active_tasks()) {
+      CMTask* task = _cm->task(worker_i);
+      task->record_start_time();
+      do {
+        task->do_marking_step(1000000000.0 /* something very large */);
+      } while (task->has_aborted() && !_cm->has_overflown());
+      // If we overflow, then we do not want to restart. We instead
+      // want to abort remark and do concurrent marking again.
+      task->record_end_time();
+    }
+  }
+
+  CMRemarkTask(ConcurrentMark* cm) :
+    AbstractGangTask("Par Remark"), _cm(cm) { }
+};
+
+void ConcurrentMark::checkpointRootsFinalWork() {
+  ResourceMark rm;
+  HandleMark   hm;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  g1h->ensure_parsability(false);
+
+  if (ParallelGCThreads > 0) {
+    g1h->change_strong_roots_parity();
+    // this is remark, so we'll use up all available threads
+    int active_workers = ParallelGCThreads;
+    set_phase(active_workers, false);
+
+    CMRemarkTask remarkTask(this);
+    // We will start all available threads, even if we decide that the
+    // active_workers will be fewer. The extra ones will just bail out
+    // immediately.
+    int n_workers = g1h->workers()->total_workers();
+    g1h->set_par_threads(n_workers);
+    g1h->workers()->run_task(&remarkTask);
+    g1h->set_par_threads(0);
+
+    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+    guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
+  } else {
+    g1h->change_strong_roots_parity();
+    // this is remark, so we'll use up all available threads
+    int active_workers = 1;
+    set_phase(active_workers, false);
+
+    CMRemarkTask remarkTask(this);
+    // We will start all available threads, even if we decide that the
+    // active_workers will be fewer. The extra ones will just bail out
+    // immediately.
+    remarkTask.work(0);
+
+    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+    guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
+  }
+
+  print_stats();
+
+  if (!restart_for_overflow())
+    set_non_marking_state();
+
+#if VERIFY_OBJS_PROCESSED
+  if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
+    gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
+                           _scan_obj_cl.objs_processed,
+                           ThreadLocalObjQueue::objs_enqueued);
+    guarantee(_scan_obj_cl.objs_processed ==
+              ThreadLocalObjQueue::objs_enqueued,
+              "Different number of objs processed and enqueued.");
+  }
+#endif
+}
+
+class ReachablePrinterOopClosure: public OopClosure {
+private:
+  G1CollectedHeap* _g1h;
+  CMBitMapRO*      _bitmap;
+  outputStream*    _out;
+
+public:
+  ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) :
+    _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { }
+
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  void do_oop(oop* p) {
+    oop         obj = *p;
+    const char* str = NULL;
+    const char* str2 = "";
+
+    if (!_g1h->is_in_g1_reserved(obj))
+      str = "outside G1 reserved";
+    else {
+      HeapRegion* hr  = _g1h->heap_region_containing(obj);
+      guarantee( hr != NULL, "invariant" );
+      if (hr->obj_allocated_since_prev_marking(obj)) {
+        str = "over TAMS";
+        if (_bitmap->isMarked((HeapWord*) obj))
+          str2 = " AND MARKED";
+      } else if (_bitmap->isMarked((HeapWord*) obj))
+        str = "marked";
+      else
+        str = "#### NOT MARKED ####";
+    }
+
+    _out->print_cr("    "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
+                   p, (void*) obj, str, str2);
+  }
+};
+
+class ReachablePrinterClosure: public BitMapClosure {
+private:
+  CMBitMapRO* _bitmap;
+  outputStream* _out;
+
+public:
+  ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
+    _bitmap(bitmap), _out(out) { }
+
+  bool do_bit(size_t offset) {
+    HeapWord* addr = _bitmap->offsetToHeapWord(offset);
+    ReachablePrinterOopClosure oopCl(_bitmap, _out);
+
+    _out->print_cr("  obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
+    oop(addr)->oop_iterate(&oopCl);
+    _out->print_cr("");
+
+    return true;
+  }
+};
+
+class ObjInRegionReachablePrinterClosure : public ObjectClosure {
+private:
+  CMBitMapRO* _bitmap;
+  outputStream* _out;
+
+public:
+  void do_object(oop o) {
+    ReachablePrinterOopClosure oopCl(_bitmap, _out);
+
+    _out->print_cr("  obj "PTR_FORMAT" (over TAMS)", (void*) o);
+    o->oop_iterate(&oopCl);
+    _out->print_cr("");
+  }
+
+  ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
+    _bitmap(bitmap), _out(out) { }
+};
+
+class RegionReachablePrinterClosure : public HeapRegionClosure {
+private:
+  CMBitMapRO* _bitmap;
+  outputStream* _out;
+
+public:
+  bool doHeapRegion(HeapRegion* hr) {
+    HeapWord* b = hr->bottom();
+    HeapWord* e = hr->end();
+    HeapWord* t = hr->top();
+    HeapWord* p = hr->prev_top_at_mark_start();
+    _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
+                   "PTAMS: "PTR_FORMAT, b, e, t, p);
+    _out->print_cr("");
+
+    ObjInRegionReachablePrinterClosure ocl(_bitmap, _out);
+    hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
+
+    return false;
+  }
+
+  RegionReachablePrinterClosure(CMBitMapRO* bitmap,
+                                outputStream* out) :
+    _bitmap(bitmap), _out(out) { }
+};
+
+void ConcurrentMark::print_prev_bitmap_reachable() {
+  outputStream* out = gclog_or_tty;
+
+#if SEND_HEAP_DUMP_TO_FILE
+  guarantee(heap_dump_file == NULL, "Protocol");
+  char fn_buf[100];
+  sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id());
+  heap_dump_file = fopen(fn_buf, "w");
+  fileStream fstream(heap_dump_file);
+  out = &fstream;
+#endif // SEND_HEAP_DUMP_TO_FILE
+
+  RegionReachablePrinterClosure rcl(_prevMarkBitMap, out);
+  out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP");
+  _g1h->heap_region_iterate(&rcl);
+  out->print_cr("");
+
+  ReachablePrinterClosure cl(_prevMarkBitMap, out);
+  out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP");
+  _prevMarkBitMap->iterate(&cl);
+  out->print_cr("");
+
+#if SEND_HEAP_DUMP_TO_FILE
+  fclose(heap_dump_file);
+  heap_dump_file = NULL;
+#endif // SEND_HEAP_DUMP_TO_FILE
+}
+
+// This note is for drainAllSATBBuffers and the code in between.
+// In the future we could reuse a task to do this work during an
+// evacuation pause (since now tasks are not active and can be claimed
+// during an evacuation pause). This was a late change to the code and
+// is currently not being taken advantage of.
+
+class CMGlobalObjectClosure : public ObjectClosure {
+private:
+  ConcurrentMark* _cm;
+
+public:
+  void do_object(oop obj) {
+    _cm->deal_with_reference(obj);
+  }
+
+  CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
+};
+
+void ConcurrentMark::deal_with_reference(oop obj) {
+  if (verbose_high())
+    gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
+                           (void*) obj);
+
+
+  HeapWord* objAddr = (HeapWord*) obj;
+  if (_g1h->is_in_g1_reserved(objAddr)) {
+    tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
+    HeapRegion* hr = _g1h->heap_region_containing(obj);
+    if (_g1h->is_obj_ill(obj, hr)) {
+      if (verbose_high())
+        gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
+                               "marked", (void*) obj);
+
+      // we need to mark it first
+      if (_nextMarkBitMap->parMark(objAddr)) {
+        // No OrderAccess:store_load() is needed. It is implicit in the
+        // CAS done in parMark(objAddr) above
+        HeapWord* finger = _finger;
+        if (objAddr < finger) {
+          if (verbose_high())
+            gclog_or_tty->print_cr("[global] below the global finger "
+                                   "("PTR_FORMAT"), pushing it", finger);
+          if (!mark_stack_push(obj)) {
+            if (verbose_low())
+              gclog_or_tty->print_cr("[global] global stack overflow during "
+                                     "deal_with_reference");
+          }
+        }
+      }
+    }
+  }
+}
+
+void ConcurrentMark::drainAllSATBBuffers() {
+  CMGlobalObjectClosure oc(this);
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  satb_mq_set.set_closure(&oc);
+
+  while (satb_mq_set.apply_closure_to_completed_buffer()) {
+    if (verbose_medium())
+      gclog_or_tty->print_cr("[global] processed an SATB buffer");
+  }
+
+  // no need to check whether we should do this, as this is only
+  // called during an evacuation pause
+  satb_mq_set.iterate_closure_all_threads();
+
+  satb_mq_set.set_closure(NULL);
+  guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
+}
+
+void ConcurrentMark::markPrev(oop p) {
+  // Note we are overriding the read-only view of the prev map here, via
+  // the cast.
+  ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
+}
+
+void ConcurrentMark::clear(oop p) {
+  assert(p != NULL && p->is_oop(), "expected an oop");
+  HeapWord* addr = (HeapWord*)p;
+  assert(addr >= _nextMarkBitMap->startWord() ||
+         addr < _nextMarkBitMap->endWord(), "in a region");
+
+  _nextMarkBitMap->clear(addr);
+}
+
+void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
+  // Note we are overriding the read-only view of the prev map here, via
+  // the cast.
+  ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
+  _nextMarkBitMap->clearRange(mr);
+}
+
+HeapRegion*
+ConcurrentMark::claim_region(int task_num) {
+  // "checkpoint" the finger
+  HeapWord* finger = _finger;
+
+  // _heap_end will not change underneath our feet; it only changes at
+  // yield points.
+  while (finger < _heap_end) {
+    tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" );
+
+    // is the gap between reading the finger and doing the CAS too long?
+
+    HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
+    HeapWord*   bottom        = curr_region->bottom();
+    HeapWord*   end           = curr_region->end();
+    HeapWord*   limit         = curr_region->next_top_at_mark_start();
+
+    if (verbose_low())
+      gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
+                             "["PTR_FORMAT", "PTR_FORMAT"), "
+                             "limit = "PTR_FORMAT,
+                             task_num, curr_region, bottom, end, limit);
+
+    HeapWord* res =
+      (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
+    if (res == finger) {
+      // we succeeded
+
+      // notice that _finger == end cannot be guaranteed here since,
+      // someone else might have moved the finger even further
+      guarantee( _finger >= end, "the finger should have moved forward" );
+
+      if (verbose_low())
+        gclog_or_tty->print_cr("[%d] we were successful with region = "
+                               PTR_FORMAT, task_num, curr_region);
+
+      if (limit > bottom) {
+        if (verbose_low())
+          gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
+                                 "returning it ", task_num, curr_region);
+        return curr_region;
+      } else {
+        tmp_guarantee_CM( limit == bottom,
+                          "the region limit should be at bottom" );
+        if (verbose_low())
+          gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
+                                 "returning NULL", task_num, curr_region);
+        // we return NULL and the caller should try calling
+        // claim_region() again.
+        return NULL;
+      }
+    } else {
+      guarantee( _finger > finger, "the finger should have moved forward" );
+      if (verbose_low())
+        gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
+                               "global finger = "PTR_FORMAT", "
+                               "our finger = "PTR_FORMAT,
+                               task_num, _finger, finger);
+
+      // read it again
+      finger = _finger;
+    }
+  }
+
+  return NULL;
+}
+
+void ConcurrentMark::oops_do(OopClosure* cl) {
+  if (_markStack.size() > 0 && verbose_low())
+    gclog_or_tty->print_cr("[global] scanning the global marking stack, "
+                           "size = %d", _markStack.size());
+  // we first iterate over the contents of the mark stack...
+  _markStack.oops_do(cl);
+
+  for (int i = 0; i < (int)_max_task_num; ++i) {
+    OopTaskQueue* queue = _task_queues->queue((int)i);
+
+    if (queue->size() > 0 && verbose_low())
+      gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
+                             "size = %d", i, queue->size());
+
+    // ...then over the contents of the all the task queues.
+    queue->oops_do(cl);
+  }
+
+  // finally, invalidate any entries that in the region stack that
+  // point into the collection set
+  if (_regionStack.invalidate_entries_into_cset()) {
+    // otherwise, any gray objects copied during the evacuation pause
+    // might not be visited.
+    guarantee( _should_gray_objects, "invariant" );
+  }
+}
+
+void ConcurrentMark::clear_marking_state() {
+  _markStack.setEmpty();
+  _markStack.clear_overflow();
+  _regionStack.setEmpty();
+  _regionStack.clear_overflow();
+  clear_has_overflown();
+  _finger = _heap_start;
+
+  for (int i = 0; i < (int)_max_task_num; ++i) {
+    OopTaskQueue* queue = _task_queues->queue(i);
+    queue->set_empty();
+  }
+}
+
+void ConcurrentMark::print_stats() {
+  if (verbose_stats()) {
+    gclog_or_tty->print_cr("---------------------------------------------------------------------");
+    for (size_t i = 0; i < _active_tasks; ++i) {
+      _tasks[i]->print_stats();
+      gclog_or_tty->print_cr("---------------------------------------------------------------------");
+    }
+  }
+}
+
+class CSMarkOopClosure: public OopClosure {
+  friend class CSMarkBitMapClosure;
+
+  G1CollectedHeap* _g1h;
+  CMBitMap*        _bm;
+  ConcurrentMark*  _cm;
+  oop*             _ms;
+  jint*            _array_ind_stack;
+  int              _ms_size;
+  int              _ms_ind;
+  int              _array_increment;
+
+  bool push(oop obj, int arr_ind = 0) {
+    if (_ms_ind == _ms_size) {
+      gclog_or_tty->print_cr("Mark stack is full.");
+      return false;
+    }
+    _ms[_ms_ind] = obj;
+    if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
+    _ms_ind++;
+    return true;
+  }
+
+  oop pop() {
+    if (_ms_ind == 0) return NULL;
+    else {
+      _ms_ind--;
+      return _ms[_ms_ind];
+    }
+  }
+
+  bool drain() {
+    while (_ms_ind > 0) {
+      oop obj = pop();
+      assert(obj != NULL, "Since index was non-zero.");
+      if (obj->is_objArray()) {
+        jint arr_ind = _array_ind_stack[_ms_ind];
+        objArrayOop aobj = objArrayOop(obj);
+        jint len = aobj->length();
+        jint next_arr_ind = arr_ind + _array_increment;
+        if (next_arr_ind < len) {
+          push(obj, next_arr_ind);
+        }
+        // Now process this portion of this one.
+        int lim = MIN2(next_arr_ind, len);
+        assert(!UseCompressedOops, "This needs to be fixed");
+        for (int j = arr_ind; j < lim; j++) {
+          do_oop(aobj->obj_at_addr<oop>(j));
+        }
+
+      } else {
+        obj->oop_iterate(this);
+      }
+      if (abort()) return false;
+    }
+    return true;
+  }
+
+public:
+  CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
+    _g1h(G1CollectedHeap::heap()),
+    _cm(cm),
+    _bm(cm->nextMarkBitMap()),
+    _ms_size(ms_size), _ms_ind(0),
+    _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
+    _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
+    _array_increment(MAX2(ms_size/8, 16))
+  {}
+
+  ~CSMarkOopClosure() {
+    FREE_C_HEAP_ARRAY(oop, _ms);
+    FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
+  }
+
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  void do_oop(oop* p) {
+    oop obj = *p;
+    if (obj == NULL) return;
+    if (obj->is_forwarded()) {
+      // If the object has already been forwarded, we have to make sure
+      // that it's marked.  So follow the forwarding pointer.  Note that
+      // this does the right thing for self-forwarding pointers in the
+      // evacuation failure case.
+      obj = obj->forwardee();
+    }
+    HeapRegion* hr = _g1h->heap_region_containing(obj);
+    if (hr != NULL) {
+      if (hr->in_collection_set()) {
+        if (_g1h->is_obj_ill(obj)) {
+          _bm->mark((HeapWord*)obj);
+          if (!push(obj)) {
+            gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
+            set_abort();
+          }
+        }
+      } else {
+        // Outside the collection set; we need to gray it
+        _cm->deal_with_reference(obj);
+      }
+    }
+  }
+};
+
+class CSMarkBitMapClosure: public BitMapClosure {
+  G1CollectedHeap* _g1h;
+  CMBitMap*        _bitMap;
+  ConcurrentMark*  _cm;
+  CSMarkOopClosure _oop_cl;
+public:
+  CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
+    _g1h(G1CollectedHeap::heap()),
+    _bitMap(cm->nextMarkBitMap()),
+    _oop_cl(cm, ms_size)
+  {}
+
+  ~CSMarkBitMapClosure() {}
+
+  bool do_bit(size_t offset) {
+    // convert offset into a HeapWord*
+    HeapWord* addr = _bitMap->offsetToHeapWord(offset);
+    assert(_bitMap->endWord() && addr < _bitMap->endWord(),
+           "address out of range");
+    assert(_bitMap->isMarked(addr), "tautology");
+    oop obj = oop(addr);
+    if (!obj->is_forwarded()) {
+      if (!_oop_cl.push(obj)) return false;
+      if (!_oop_cl.drain()) return false;
+    }
+    // Otherwise...
+    return true;
+  }
+};
+
+
+class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
+  CMBitMap* _bm;
+  CSMarkBitMapClosure _bit_cl;
+  enum SomePrivateConstants {
+    MSSize = 1000
+  };
+  bool _completed;
+public:
+  CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
+    _bm(cm->nextMarkBitMap()),
+    _bit_cl(cm, MSSize),
+    _completed(true)
+  {}
+
+  ~CompleteMarkingInCSHRClosure() {}
+
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->evacuation_failed()) {
+      MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
+      if (!mr.is_empty()) {
+        if (!_bm->iterate(&_bit_cl, mr)) {
+          _completed = false;
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+
+  bool completed() { return _completed; }
+};
+
+class ClearMarksInHRClosure: public HeapRegionClosure {
+  CMBitMap* _bm;
+public:
+  ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
+
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->used_region().is_empty() && !r->evacuation_failed()) {
+      MemRegion usedMR = r->used_region();
+      _bm->clearRange(r->used_region());
+    }
+    return false;
+  }
+};
+
+void ConcurrentMark::complete_marking_in_collection_set() {
+  G1CollectedHeap* g1h =  G1CollectedHeap::heap();
+
+  if (!g1h->mark_in_progress()) {
+    g1h->g1_policy()->record_mark_closure_time(0.0);
+    return;
+  }
+
+  int i = 1;
+  double start = os::elapsedTime();
+  while (true) {
+    i++;
+    CompleteMarkingInCSHRClosure cmplt(this);
+    g1h->collection_set_iterate(&cmplt);
+    if (cmplt.completed()) break;
+  }
+  double end_time = os::elapsedTime();
+  double elapsed_time_ms = (end_time - start) * 1000.0;
+  g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
+  if (PrintGCDetails) {
+    gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
+  }
+
+  ClearMarksInHRClosure clr(nextMarkBitMap());
+  g1h->collection_set_iterate(&clr);
+}
+
+// The next two methods deal with the following optimisation. Some
+// objects are gray by being marked and located above the finger. If
+// they are copied, during an evacuation pause, below the finger then
+// the need to be pushed on the stack. The observation is that, if
+// there are no regions in the collection set located above the
+// finger, then the above cannot happen, hence we do not need to
+// explicitly gray any objects when copying them to below the
+// finger. The global stack will be scanned to ensure that, if it
+// points to objects being copied, it will update their
+// location. There is a tricky situation with the gray objects in
+// region stack that are being coped, however. See the comment in
+// newCSet().
+
+void ConcurrentMark::newCSet() {
+  if (!concurrent_marking_in_progress())
+    // nothing to do if marking is not in progress
+    return;
+
+  // find what the lowest finger is among the global and local fingers
+  _min_finger = _finger;
+  for (int i = 0; i < (int)_max_task_num; ++i) {
+    CMTask* task = _tasks[i];
+    HeapWord* task_finger = task->finger();
+    if (task_finger != NULL && task_finger < _min_finger)
+      _min_finger = task_finger;
+  }
+
+  _should_gray_objects = false;
+
+  // This fixes a very subtle and fustrating bug. It might be the case
+  // that, during en evacuation pause, heap regions that contain
+  // objects that are gray (by being in regions contained in the
+  // region stack) are included in the collection set. Since such gray
+  // objects will be moved, and because it's not easy to redirect
+  // region stack entries to point to a new location (because objects
+  // in one region might be scattered to multiple regions after they
+  // are copied), one option is to ensure that all marked objects
+  // copied during a pause are pushed on the stack. Notice, however,
+  // that this problem can only happen when the region stack is not
+  // empty during an evacuation pause. So, we make the fix a bit less
+  // conservative and ensure that regions are pushed on the stack,
+  // irrespective whether all collection set regions are below the
+  // finger, if the region stack is not empty. This is expected to be
+  // a rare case, so I don't think it's necessary to be smarted about it.
+  if (!region_stack_empty())
+    _should_gray_objects = true;
+}
+
+void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
+  if (!concurrent_marking_in_progress())
+    return;
+
+  HeapWord* region_end = hr->end();
+  if (region_end > _min_finger)
+    _should_gray_objects = true;
+}
+
+void ConcurrentMark::disable_co_trackers() {
+  if (has_aborted()) {
+    if (_cleanup_co_tracker.enabled())
+      _cleanup_co_tracker.disable();
+    for (int i = 0; i < (int)_max_task_num; ++i) {
+      CMTask* task = _tasks[i];
+      if (task->co_tracker_enabled())
+        task->disable_co_tracker();
+    }
+  } else {
+    guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
+    for (int i = 0; i < (int)_max_task_num; ++i) {
+      CMTask* task = _tasks[i];
+      guarantee( !task->co_tracker_enabled(), "invariant" );
+    }
+  }
+}
+
+// abandon current marking iteration due to a Full GC
+void ConcurrentMark::abort() {
+  // If we're not marking, nothing to do.
+  if (!G1ConcMark) return;
+
+  // Clear all marks to force marking thread to do nothing
+  _nextMarkBitMap->clearAll();
+  // Empty mark stack
+  clear_marking_state();
+  for (int i = 0; i < (int)_max_task_num; ++i)
+    _tasks[i]->clear_region_fields();
+  _has_aborted = true;
+
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  satb_mq_set.abandon_partial_marking();
+  satb_mq_set.set_active_all_threads(false);
+}
+
+static void print_ms_time_info(const char* prefix, const char* name,
+                               NumberSeq& ns) {
+  gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
+                         prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
+  if (ns.num() > 0) {
+    gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
+                           prefix, ns.sd(), ns.maximum());
+  }
+}
+
+void ConcurrentMark::print_summary_info() {
+  gclog_or_tty->print_cr(" Concurrent marking:");
+  print_ms_time_info("  ", "init marks", _init_times);
+  print_ms_time_info("  ", "remarks", _remark_times);
+  {
+    print_ms_time_info("     ", "final marks", _remark_mark_times);
+    print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
+
+  }
+  print_ms_time_info("  ", "cleanups", _cleanup_times);
+  gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
+                         _total_counting_time,
+                         (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
+                          (double)_cleanup_times.num()
+                         : 0.0));
+  if (G1ScrubRemSets) {
+    gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
+                           _total_rs_scrub_time,
+                           (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
+                            (double)_cleanup_times.num()
+                           : 0.0));
+  }
+  gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
+                         (_init_times.sum() + _remark_times.sum() +
+                          _cleanup_times.sum())/1000.0);
+  gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
+                "(%8.2f s marking, %8.2f s counting).",
+                cmThread()->vtime_accum(),
+                cmThread()->vtime_mark_accum(),
+                cmThread()->vtime_count_accum());
+}
+
+// Closures
+// XXX: there seems to be a lot of code  duplication here;
+// should refactor and consolidate the shared code.
+
+// This closure is used to mark refs into the CMS generation in
+// the CMS bit map. Called at the first checkpoint.
+
+// We take a break if someone is trying to stop the world.
+bool ConcurrentMark::do_yield_check(int worker_i) {
+  if (should_yield()) {
+    if (worker_i == 0)
+      _g1h->g1_policy()->record_concurrent_pause();
+    cmThread()->yield();
+    if (worker_i == 0)
+      _g1h->g1_policy()->record_concurrent_pause_end();
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool ConcurrentMark::should_yield() {
+  return cmThread()->should_yield();
+}
+
+bool ConcurrentMark::containing_card_is_marked(void* p) {
+  size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
+  return _card_bm.at(offset >> CardTableModRefBS::card_shift);
+}
+
+bool ConcurrentMark::containing_cards_are_marked(void* start,
+                                                 void* last) {
+  return
+    containing_card_is_marked(start) &&
+    containing_card_is_marked(last);
+}
+
+#ifndef PRODUCT
+// for debugging purposes
+void ConcurrentMark::print_finger() {
+  gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
+                         _heap_start, _heap_end, _finger);
+  for (int i = 0; i < (int) _max_task_num; ++i) {
+    gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
+  }
+  gclog_or_tty->print_cr("");
+}
+#endif
+
+// Closure for iteration over bitmaps
+class CMBitMapClosure : public BitMapClosure {
+private:
+  // the bitmap that is being iterated over
+  CMBitMap*                   _nextMarkBitMap;
+  ConcurrentMark*             _cm;
+  CMTask*                     _task;
+  // true if we're scanning a heap region claimed by the task (so that
+  // we move the finger along), false if we're not, i.e. currently when
+  // scanning a heap region popped from the region stack (so that we
+  // do not move the task finger along; it'd be a mistake if we did so).
+  bool                        _scanning_heap_region;
+
+public:
+  CMBitMapClosure(CMTask *task,
+                  ConcurrentMark* cm,
+                  CMBitMap* nextMarkBitMap)
+    :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
+
+  void set_scanning_heap_region(bool scanning_heap_region) {
+    _scanning_heap_region = scanning_heap_region;
+  }
+
+  bool do_bit(size_t offset) {
+    HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
+    tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" );
+    tmp_guarantee_CM( addr < _cm->finger(), "invariant" );
+
+    if (_scanning_heap_region) {
+      statsOnly( _task->increase_objs_found_on_bitmap() );
+      tmp_guarantee_CM( addr >= _task->finger(), "invariant" );
+      // We move that task's local finger along.
+      _task->move_finger_to(addr);
+    } else {
+      // We move the task's region finger along.
+      _task->move_region_finger_to(addr);
+    }
+
+    _task->scan_object(oop(addr));
+    // we only partially drain the local queue and global stack
+    _task->drain_local_queue(true);
+    _task->drain_global_stack(true);
+
+    // if the has_aborted flag has been raised, we need to bail out of
+    // the iteration
+    return !_task->has_aborted();
+  }
+};
+
+// Closure for iterating over objects, currently only used for
+// processing SATB buffers.
+class CMObjectClosure : public ObjectClosure {
+private:
+  CMTask* _task;
+
+public:
+  void do_object(oop obj) {
+    _task->deal_with_reference(obj);
+  }
+
+  CMObjectClosure(CMTask* task) : _task(task) { }
+};
+
+// Closure for iterating over object fields
+class CMOopClosure : public OopClosure {
+private:
+  G1CollectedHeap*   _g1h;
+  ConcurrentMark*    _cm;
+  CMTask*            _task;
+
+public:
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  void do_oop(oop* p) {
+    tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" );
+
+    oop obj = *p;
+    if (_cm->verbose_high())
+      gclog_or_tty->print_cr("[%d] we're looking at location "
+                             "*"PTR_FORMAT" = "PTR_FORMAT,
+                             _task->task_id(), p, (void*) obj);
+    _task->deal_with_reference(obj);
+  }
+
+  CMOopClosure(G1CollectedHeap* g1h,
+               ConcurrentMark* cm,
+               CMTask* task)
+    : _g1h(g1h), _cm(cm), _task(task) { }
+};
+
+void CMTask::setup_for_region(HeapRegion* hr) {
+  tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(),
+      "claim_region() should have filtered out continues humongous regions" );
+
+  if (_cm->verbose_low())
+    gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
+                           _task_id, hr);
+
+  _curr_region  = hr;
+  _finger       = hr->bottom();
+  update_region_limit();
+}
+
+void CMTask::update_region_limit() {
+  HeapRegion* hr            = _curr_region;
+  HeapWord* bottom          = hr->bottom();
+  HeapWord* limit           = hr->next_top_at_mark_start();
+
+  if (limit == bottom) {
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] found an empty region "
+                             "["PTR_FORMAT", "PTR_FORMAT")",
+                             _task_id, bottom, limit);
+    // The region was collected underneath our feet.
+    // We set the finger to bottom to ensure that the bitmap
+    // iteration that will follow this will not do anything.
+    // (this is not a condition that holds when we set the region up,
+    // as the region is not supposed to be empty in the first place)
+    _finger = bottom;
+  } else if (limit >= _region_limit) {
+    tmp_guarantee_CM( limit >= _finger, "peace of mind" );
+  } else {
+    tmp_guarantee_CM( limit < _region_limit, "only way to get here" );
+    // This can happen under some pretty unusual circumstances.  An
+    // evacuation pause empties the region underneath our feet (NTAMS
+    // at bottom). We then do some allocation in the region (NTAMS
+    // stays at bottom), followed by the region being used as a GC
+    // alloc region (NTAMS will move to top() and the objects
+    // originally below it will be grayed). All objects now marked in
+    // the region are explicitly grayed, if below the global finger,
+    // and we do not need in fact to scan anything else. So, we simply
+    // set _finger to be limit to ensure that the bitmap iteration
+    // doesn't do anything.
+    _finger = limit;
+  }
+
+  _region_limit = limit;
+}
+
+void CMTask::giveup_current_region() {
+  tmp_guarantee_CM( _curr_region != NULL, "invariant" );
+  if (_cm->verbose_low())
+    gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
+                           _task_id, _curr_region);
+  clear_region_fields();
+}
+
+void CMTask::clear_region_fields() {
+  // Values for these three fields that indicate that we're not
+  // holding on to a region.
+  _curr_region   = NULL;
+  _finger        = NULL;
+  _region_limit  = NULL;
+
+  _region_finger = NULL;
+}
+
+void CMTask::reset(CMBitMap* nextMarkBitMap) {
+  guarantee( nextMarkBitMap != NULL, "invariant" );
+
+  if (_cm->verbose_low())
+    gclog_or_tty->print_cr("[%d] resetting", _task_id);
+
+  _nextMarkBitMap                = nextMarkBitMap;
+  clear_region_fields();
+
+  _calls                         = 0;
+  _elapsed_time_ms               = 0.0;
+  _termination_time_ms           = 0.0;
+  _termination_start_time_ms     = 0.0;
+
+#if _MARKING_STATS_
+  _local_pushes                  = 0;
+  _local_pops                    = 0;
+  _local_max_size                = 0;
+  _objs_scanned                  = 0;
+  _global_pushes                 = 0;
+  _global_pops                   = 0;
+  _global_max_size               = 0;
+  _global_transfers_to           = 0;
+  _global_transfers_from         = 0;
+  _region_stack_pops             = 0;
+  _regions_claimed               = 0;
+  _objs_found_on_bitmap          = 0;
+  _satb_buffers_processed        = 0;
+  _steal_attempts                = 0;
+  _steals                        = 0;
+  _aborted                       = 0;
+  _aborted_overflow              = 0;
+  _aborted_cm_aborted            = 0;
+  _aborted_yield                 = 0;
+  _aborted_timed_out             = 0;
+  _aborted_satb                  = 0;
+  _aborted_termination           = 0;
+#endif // _MARKING_STATS_
+}
+
+bool CMTask::should_exit_termination() {
+  regular_clock_call();
+  // This is called when we are in the termination protocol. We should
+  // quit if, for some reason, this task wants to abort or the global
+  // stack is not empty (this means that we can get work from it).
+  return !_cm->mark_stack_empty() || has_aborted();
+}
+
+// This determines whether the method below will check both the local
+// and global fingers when determining whether to push on the stack a
+// gray object (value 1) or whether it will only check the global one
+// (value 0). The tradeoffs are that the former will be a bit more
+// accurate and possibly push less on the stack, but it might also be
+// a little bit slower.
+
+#define _CHECK_BOTH_FINGERS_      1
+
+void CMTask::deal_with_reference(oop obj) {
+  if (_cm->verbose_high())
+    gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
+                           _task_id, (void*) obj);
+
+  ++_refs_reached;
+
+  HeapWord* objAddr = (HeapWord*) obj;
+  if (_g1h->is_in_g1_reserved(objAddr)) {
+    tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
+    HeapRegion* hr =  _g1h->heap_region_containing(obj);
+    if (_g1h->is_obj_ill(obj, hr)) {
+      if (_cm->verbose_high())
+        gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
+                               _task_id, (void*) obj);
+
+      // we need to mark it first
+      if (_nextMarkBitMap->parMark(objAddr)) {
+        // No OrderAccess:store_load() is needed. It is implicit in the
+        // CAS done in parMark(objAddr) above
+        HeapWord* global_finger = _cm->finger();
+
+#if _CHECK_BOTH_FINGERS_
+        // we will check both the local and global fingers
+
+        if (_finger != NULL && objAddr < _finger) {
+          if (_cm->verbose_high())
+            gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
+                                   "pushing it", _task_id, _finger);
+          push(obj);
+        } else if (_curr_region != NULL && objAddr < _region_limit) {
+          // do nothing
+        } else if (objAddr < global_finger) {
+          // Notice that the global finger might be moving forward
+          // concurrently. This is not a problem. In the worst case, we
+          // mark the object while it is above the global finger and, by
+          // the time we read the global finger, it has moved forward
+          // passed this object. In this case, the object will probably
+          // be visited when a task is scanning the region and will also
+          // be pushed on the stack. So, some duplicate work, but no
+          // correctness problems.
+
+          if (_cm->verbose_high())
+            gclog_or_tty->print_cr("[%d] below the global finger "
+                                   "("PTR_FORMAT"), pushing it",
+                                   _task_id, global_finger);
+          push(obj);
+        } else {
+          // do nothing
+        }
+#else // _CHECK_BOTH_FINGERS_
+      // we will only check the global finger
+
+        if (objAddr < global_finger) {
+          // see long comment above
+
+          if (_cm->verbose_high())
+            gclog_or_tty->print_cr("[%d] below the global finger "
+                                   "("PTR_FORMAT"), pushing it",
+                                   _task_id, global_finger);
+          push(obj);
+        }
+#endif // _CHECK_BOTH_FINGERS_
+      }
+    }
+  }
+}
+
+void CMTask::push(oop obj) {
+  HeapWord* objAddr = (HeapWord*) obj;
+  tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" );
+  tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" );
+  tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" );
+
+  if (_cm->verbose_high())
+    gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
+
+  if (!_task_queue->push(obj)) {
+    // The local task queue looks full. We need to push some entries
+    // to the global stack.
+
+    if (_cm->verbose_medium())
+      gclog_or_tty->print_cr("[%d] task queue overflow, "
+                             "moving entries to the global stack",
+                             _task_id);
+    move_entries_to_global_stack();
+
+    // this should succeed since, even if we overflow the global
+    // stack, we should have definitely removed some entries from the
+    // local queue. So, there must be space on it.
+    bool success = _task_queue->push(obj);
+    tmp_guarantee_CM( success, "invariant" );
+  }
+
+  statsOnly( int tmp_size = _task_queue->size();
+             if (tmp_size > _local_max_size)
+               _local_max_size = tmp_size;
+             ++_local_pushes );
+}
+
+void CMTask::reached_limit() {
+  tmp_guarantee_CM( _words_scanned >= _words_scanned_limit ||
+                    _refs_reached >= _refs_reached_limit ,
+                 "shouldn't have been called otherwise" );
+  regular_clock_call();
+}
+
+void CMTask::regular_clock_call() {
+  if (has_aborted())
+    return;
+
+  // First, we need to recalculate the words scanned and refs reached
+  // limits for the next clock call.
+  recalculate_limits();
+
+  // During the regular clock call we do the following
+
+  // (1) If an overflow has been flagged, then we abort.
+  if (_cm->has_overflown()) {
+    set_has_aborted();
+    return;
+  }
+
+  // If we are not concurrent (i.e. we're doing remark) we don't need
+  // to check anything else. The other steps are only needed during
+  // the concurrent marking phase.
+  if (!concurrent())
+    return;
+
+  // (2) If marking has been aborted for Full GC, then we also abort.
+  if (_cm->has_aborted()) {
+    set_has_aborted();
+    statsOnly( ++_aborted_cm_aborted );
+    return;
+  }
+
+  double curr_time_ms = os::elapsedVTime() * 1000.0;
+
+  // (3) If marking stats are enabled, then we update the step history.
+#if _MARKING_STATS_
+  if (_words_scanned >= _words_scanned_limit)
+    ++_clock_due_to_scanning;
+  if (_refs_reached >= _refs_reached_limit)
+    ++_clock_due_to_marking;
+
+  double last_interval_ms = curr_time_ms - _interval_start_time_ms;
+  _interval_start_time_ms = curr_time_ms;
+  _all_clock_intervals_ms.add(last_interval_ms);
+
+  if (_cm->verbose_medium()) {
+    gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
+                           "scanned = %d%s, refs reached = %d%s",
+                           _task_id, last_interval_ms,
+                           _words_scanned,
+                           (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
+                           _refs_reached,
+                           (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
+  }
+#endif // _MARKING_STATS_
+
+  // (4) We check whether we should yield. If we have to, then we abort.
+  if (_cm->should_yield()) {
+    // We should yield. To do this we abort the task. The caller is
+    // responsible for yielding.
+    set_has_aborted();
+    statsOnly( ++_aborted_yield );
+    return;
+  }
+
+  // (5) We check whether we've reached our time quota. If we have,
+  // then we abort.
+  double elapsed_time_ms = curr_time_ms - _start_time_ms;
+  if (elapsed_time_ms > _time_target_ms) {
+    set_has_aborted();
+    _has_aborted_timed_out = true;
+    statsOnly( ++_aborted_timed_out );
+    return;
+  }
+
+  // (6) Finally, we check whether there are enough completed STAB
+  // buffers available for processing. If there are, we abort.
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
+                             _task_id);
+    // we do need to process SATB buffers, we'll abort and restart
+    // the marking task to do so
+    set_has_aborted();
+    statsOnly( ++_aborted_satb );
+    return;
+  }
+}
+
+void CMTask::recalculate_limits() {
+  _real_words_scanned_limit = _words_scanned + words_scanned_period;
+  _words_scanned_limit      = _real_words_scanned_limit;
+
+  _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
+  _refs_reached_limit       = _real_refs_reached_limit;
+}
+
+void CMTask::decrease_limits() {
+  // This is called when we believe that we're going to do an infrequent
+  // operation which will increase the per byte scanned cost (i.e. move
+  // entries to/from the global stack). It basically tries to decrease the
+  // scanning limit so that the clock is called earlier.
+
+  if (_cm->verbose_medium())
+    gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
+
+  _words_scanned_limit = _real_words_scanned_limit -
+    3 * words_scanned_period / 4;
+  _refs_reached_limit  = _real_refs_reached_limit -
+    3 * refs_reached_period / 4;
+}
+
+void CMTask::move_entries_to_global_stack() {
+  // local array where we'll store the entries that will be popped
+  // from the local queue
+  oop buffer[global_stack_transfer_size];
+
+  int n = 0;
+  oop obj;
+  while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
+    buffer[n] = obj;
+    ++n;
+  }
+
+  if (n > 0) {
+    // we popped at least one entry from the local queue
+
+    statsOnly( ++_global_transfers_to; _local_pops += n );
+
+    if (!_cm->mark_stack_push(buffer, n)) {
+      if (_cm->verbose_low())
+        gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
+      set_has_aborted();
+    } else {
+      // the transfer was successful
+
+      if (_cm->verbose_medium())
+        gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
+                               _task_id, n);
+      statsOnly( int tmp_size = _cm->mark_stack_size();
+                 if (tmp_size > _global_max_size)
+                   _global_max_size = tmp_size;
+                 _global_pushes += n );
+    }
+  }
+
+  // this operation was quite expensive, so decrease the limits
+  decrease_limits();
+}
+
+void CMTask::get_entries_from_global_stack() {
+  // local array where we'll store the entries that will be popped
+  // from the global stack.
+  oop buffer[global_stack_transfer_size];
+  int n;
+  _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
+  tmp_guarantee_CM( n <= global_stack_transfer_size,
+                    "we should not pop more than the given limit" );
+  if (n > 0) {
+    // yes, we did actually pop at least one entry
+
+    statsOnly( ++_global_transfers_from; _global_pops += n );
+    if (_cm->verbose_medium())
+      gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
+                             _task_id, n);
+    for (int i = 0; i < n; ++i) {
+      bool success = _task_queue->push(buffer[i]);
+      // We only call this when the local queue is empty or under a
+      // given target limit. So, we do not expect this push to fail.
+      tmp_guarantee_CM( success, "invariant" );
+    }
+
+    statsOnly( int tmp_size = _task_queue->size();
+               if (tmp_size > _local_max_size)
+                 _local_max_size = tmp_size;
+               _local_pushes += n );
+  }
+
+  // this operation was quite expensive, so decrease the limits
+  decrease_limits();
+}
+
+void CMTask::drain_local_queue(bool partially) {
+  if (has_aborted())
+    return;
+
+  // Decide what the target size is, depending whether we're going to
+  // drain it partially (so that other tasks can steal if they run out
+  // of things to do) or totally (at the very end).
+  size_t target_size;
+  if (partially)
+    target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
+  else
+    target_size = 0;
+
+  if (_task_queue->size() > target_size) {
+    if (_cm->verbose_high())
+      gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
+                             _task_id, target_size);
+
+    oop obj;
+    bool ret = _task_queue->pop_local(obj);
+    while (ret) {
+      statsOnly( ++_local_pops );
+
+      if (_cm->verbose_high())
+        gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
+                               (void*) obj);
+
+      tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj),
+                        "invariant" );
+
+      scan_object(obj);
+
+      if (_task_queue->size() <= target_size || has_aborted())
+        ret = false;
+      else
+        ret = _task_queue->pop_local(obj);
+    }
+
+    if (_cm->verbose_high())
+      gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
+                             _task_id, _task_queue->size());
+  }
+}
+
+void CMTask::drain_global_stack(bool partially) {
+  if (has_aborted())
+    return;
+
+  // We have a policy to drain the local queue before we attempt to
+  // drain the global stack.
+  tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" );
+
+  // Decide what the target size is, depending whether we're going to
+  // drain it partially (so that other tasks can steal if they run out
+  // of things to do) or totally (at the very end).  Notice that,
+  // because we move entries from the global stack in chunks or
+  // because another task might be doing the same, we might in fact
+  // drop below the target. But, this is not a problem.
+  size_t target_size;
+  if (partially)
+    target_size = _cm->partial_mark_stack_size_target();
+  else
+    target_size = 0;
+
+  if (_cm->mark_stack_size() > target_size) {
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
+                             _task_id, target_size);
+
+    while (!has_aborted() && _cm->mark_stack_size() > target_size) {
+      get_entries_from_global_stack();
+      drain_local_queue(partially);
+    }
+
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
+                             _task_id, _cm->mark_stack_size());
+  }
+}
+
+// SATB Queue has several assumptions on whether to call the par or
+// non-par versions of the methods. this is why some of the code is
+// replicated. We should really get rid of the single-threaded version
+// of the code to simplify things.
+void CMTask::drain_satb_buffers() {
+  if (has_aborted())
+    return;
+
+  // We set this so that the regular clock knows that we're in the
+  // middle of draining buffers and doesn't set the abort flag when it
+  // notices that SATB buffers are available for draining. It'd be
+  // very counter productive if it did that. :-)
+  _draining_satb_buffers = true;
+
+  CMObjectClosure oc(this);
+  SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
+  if (ParallelGCThreads > 0)
+    satb_mq_set.set_par_closure(_task_id, &oc);
+  else
+    satb_mq_set.set_closure(&oc);
+
+  // This keeps claiming and applying the closure to completed buffers
+  // until we run out of buffers or we need to abort.
+  if (ParallelGCThreads > 0) {
+    while (!has_aborted() &&
+           satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
+      if (_cm->verbose_medium())
+        gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
+      statsOnly( ++_satb_buffers_processed );
+      regular_clock_call();
+    }
+  } else {
+    while (!has_aborted() &&
+           satb_mq_set.apply_closure_to_completed_buffer()) {
+      if (_cm->verbose_medium())
+        gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
+      statsOnly( ++_satb_buffers_processed );
+      regular_clock_call();
+    }
+  }
+
+  if (!concurrent() && !has_aborted()) {
+    // We should only do this during remark.
+    if (ParallelGCThreads > 0)
+      satb_mq_set.par_iterate_closure_all_threads(_task_id);
+    else
+      satb_mq_set.iterate_closure_all_threads();
+  }
+
+  _draining_satb_buffers = false;
+
+  tmp_guarantee_CM( has_aborted() ||
+                    concurrent() ||
+                    satb_mq_set.completed_buffers_num() == 0, "invariant" );
+
+  if (ParallelGCThreads > 0)
+    satb_mq_set.set_par_closure(_task_id, NULL);
+  else
+    satb_mq_set.set_closure(NULL);
+
+  // again, this was a potentially expensive operation, decrease the
+  // limits to get the regular clock call early
+  decrease_limits();
+}
+
+void CMTask::drain_region_stack(BitMapClosure* bc) {
+  if (has_aborted())
+    return;
+
+  tmp_guarantee_CM( _region_finger == NULL,
+                    "it should be NULL when we're not scanning a region" );
+
+  if (!_cm->region_stack_empty()) {
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
+                             _task_id, _cm->region_stack_size());
+
+    MemRegion mr = _cm->region_stack_pop();
+    // it returns MemRegion() if the pop fails
+    statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
+
+    while (mr.start() != NULL) {
+      if (_cm->verbose_medium())
+        gclog_or_tty->print_cr("[%d] we are scanning region "
+                               "["PTR_FORMAT", "PTR_FORMAT")",
+                               _task_id, mr.start(), mr.end());
+      tmp_guarantee_CM( mr.end() <= _cm->finger(),
+                        "otherwise the region shouldn't be on the stack" );
+      assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
+      if (_nextMarkBitMap->iterate(bc, mr)) {
+        tmp_guarantee_CM( !has_aborted(),
+               "cannot abort the task without aborting the bitmap iteration" );
+
+        // We finished iterating over the region without aborting.
+        regular_clock_call();
+        if (has_aborted())
+          mr = MemRegion();
+        else {
+          mr = _cm->region_stack_pop();
+          // it returns MemRegion() if the pop fails
+          statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
+        }
+      } else {
+        guarantee( has_aborted(), "currently the only way to do so" );
+
+        // The only way to abort the bitmap iteration is to return
+        // false from the do_bit() method. However, inside the
+        // do_bit() method we move the _region_finger to point to the
+        // object currently being looked at. So, if we bail out, we
+        // have definitely set _region_finger to something non-null.
+        guarantee( _region_finger != NULL, "invariant" );
+
+        // The iteration was actually aborted. So now _region_finger
+        // points to the address of the object we last scanned. If we
+        // leave it there, when we restart this task, we will rescan
+        // the object. It is easy to avoid this. We move the finger by
+        // enough to point to the next possible object header (the
+        // bitmap knows by how much we need to move it as it knows its
+        // granularity).
+        MemRegion newRegion =
+          MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
+
+        if (!newRegion.is_empty()) {
+          if (_cm->verbose_low()) {
+            gclog_or_tty->print_cr("[%d] pushing unscanned region"
+                                   "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
+                                   _task_id,
+                                   newRegion.start(), newRegion.end());
+          }
+          // Now push the part of the region we didn't scan on the
+          // region stack to make sure a task scans it later.
+          _cm->region_stack_push(newRegion);
+        }
+        // break from while
+        mr = MemRegion();
+      }
+      _region_finger = NULL;
+    }
+
+    // We only push regions on the region stack during evacuation
+    // pauses. So if we come out the above iteration because we region
+    // stack is empty, it will remain empty until the next yield
+    // point. So, the guarantee below is safe.
+    guarantee( has_aborted() || _cm->region_stack_empty(),
+               "only way to exit the loop" );
+
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
+                             _task_id, _cm->region_stack_size());
+  }
+}
+
+void CMTask::print_stats() {
+  gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
+                         _task_id, _calls);
+  gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
+                         _elapsed_time_ms, _termination_time_ms);
+  gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
+                         _step_times_ms.num(), _step_times_ms.avg(),
+                         _step_times_ms.sd());
+  gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
+                         _step_times_ms.maximum(), _step_times_ms.sum());
+
+#if _MARKING_STATS_
+  gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
+                         _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
+                         _all_clock_intervals_ms.sd());
+  gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
+                         _all_clock_intervals_ms.maximum(),
+                         _all_clock_intervals_ms.sum());
+  gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
+                         _clock_due_to_scanning, _clock_due_to_marking);
+  gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
+                         _objs_scanned, _objs_found_on_bitmap);
+  gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
+                         _local_pushes, _local_pops, _local_max_size);
+  gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
+                         _global_pushes, _global_pops, _global_max_size);
+  gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
+                         _global_transfers_to,_global_transfers_from);
+  gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
+                         _regions_claimed, _region_stack_pops);
+  gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
+  gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
+                         _steal_attempts, _steals);
+  gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
+  gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
+                         _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
+  gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
+                         _aborted_timed_out, _aborted_satb, _aborted_termination);
+#endif // _MARKING_STATS_
+}
+
+/*****************************************************************************
+
+    The do_marking_step(time_target_ms) method is the building block
+    of the parallel marking framework. It can be called in parallel
+    with other invocations of do_marking_step() on different tasks
+    (but only one per task, obviously) and concurrently with the
+    mutator threads, or during remark, hence it eliminates the need
+    for two versions of the code. When called during remark, it will
+    pick up from where the task left off during the concurrent marking
+    phase. Interestingly, tasks are also claimable during evacuation
+    pauses too, since do_marking_step() ensures that it aborts before
+    it needs to yield.
+
+    The data structures that is uses to do marking work are the
+    following:
+
+      (1) Marking Bitmap. If there are gray objects that appear only
+      on the bitmap (this happens either when dealing with an overflow
+      or when the initial marking phase has simply marked the roots
+      and didn't push them on the stack), then tasks claim heap
+      regions whose bitmap they then scan to find gray objects. A
+      global finger indicates where the end of the last claimed region
+      is. A local finger indicates how far into the region a task has
+      scanned. The two fingers are used to determine how to gray an
+      object (i.e. whether simply marking it is OK, as it will be
+      visited by a task in the future, or whether it needs to be also
+      pushed on a stack).
+
+      (2) Local Queue. The local queue of the task which is accessed
+      reasonably efficiently by the task. Other tasks can steal from
+      it when they run out of work. Throughout the marking phase, a
+      task attempts to keep its local queue short but not totally
+      empty, so that entries are available for stealing by other
+      tasks. Only when there is no more work, a task will totally
+      drain its local queue.
+
+      (3) Global Mark Stack. This handles local queue overflow. During
+      marking only sets of entries are moved between it and the local
+      queues, as access to it requires a mutex and more fine-grain
+      interaction with it which might cause contention. If it
+      overflows, then the marking phase should restart and iterate
+      over the bitmap to identify gray objects. Throughout the marking
+      phase, tasks attempt to keep the global mark stack at a small
+      length but not totally empty, so that entries are available for
+      popping by other tasks. Only when there is no more work, tasks
+      will totally drain the global mark stack.
+
+      (4) Global Region Stack. Entries on it correspond to areas of
+      the bitmap that need to be scanned since they contain gray
+      objects. Pushes on the region stack only happen during
+      evacuation pauses and typically correspond to areas covered by
+      GC LABS. If it overflows, then the marking phase should restart
+      and iterate over the bitmap to identify gray objects. Tasks will
+      try to totally drain the region stack as soon as possible.
+
+      (5) SATB Buffer Queue. This is where completed SATB buffers are
+      made available. Buffers are regularly removed from this queue
+      and scanned for roots, so that the queue doesn't get too
+      long. During remark, all completed buffers are processed, as
+      well as the filled in parts of any uncompleted buffers.
+
+    The do_marking_step() method tries to abort when the time target
+    has been reached. There are a few other cases when the
+    do_marking_step() method also aborts:
+
+      (1) When the marking phase has been aborted (after a Full GC).
+
+      (2) When a global overflow (either on the global stack or the
+      region stack) has been triggered. Before the task aborts, it
+      will actually sync up with the other tasks to ensure that all
+      the marking data structures (local queues, stacks, fingers etc.)
+      are re-initialised so that when do_marking_step() completes,
+      the marking phase can immediately restart.
+
+      (3) When enough completed SATB buffers are available. The
+      do_marking_step() method only tries to drain SATB buffers right
+      at the beginning. So, if enough buffers are available, the
+      marking step aborts and the SATB buffers are processed at
+      the beginning of the next invocation.
+
+      (4) To yield. when we have to yield then we abort and yield
+      right at the end of do_marking_step(). This saves us from a lot
+      of hassle as, by yielding we might allow a Full GC. If this
+      happens then objects will be compacted underneath our feet, the
+      heap might shrink, etc. We save checking for this by just
+      aborting and doing the yield right at the end.
+
+    From the above it follows that the do_marking_step() method should
+    be called in a loop (or, otherwise, regularly) until it completes.
+
+    If a marking step completes without its has_aborted() flag being
+    true, it means it has completed the current marking phase (and
+    also all other marking tasks have done so and have all synced up).
+
+    A method called regular_clock_call() is invoked "regularly" (in
+    sub ms intervals) throughout marking. It is this clock method that
+    checks all the abort conditions which were mentioned above and
+    decides when the task should abort. A work-based scheme is used to
+    trigger this clock method: when the number of object words the
+    marking phase has scanned or the number of references the marking
+    phase has visited reach a given limit. Additional invocations to
+    the method clock have been planted in a few other strategic places
+    too. The initial reason for the clock method was to avoid calling
+    vtime too regularly, as it is quite expensive. So, once it was in
+    place, it was natural to piggy-back all the other conditions on it
+    too and not constantly check them throughout the code.
+
+ *****************************************************************************/
+
+void CMTask::do_marking_step(double time_target_ms) {
+  guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" );
+  guarantee( concurrent() == _cm->concurrent(), "they should be the same" );
+
+  guarantee( concurrent() || _cm->region_stack_empty(),
+             "the region stack should have been cleared before remark" );
+  guarantee( _region_finger == NULL,
+             "this should be non-null only when a region is being scanned" );
+
+  G1CollectorPolicy* g1_policy = _g1h->g1_policy();
+  guarantee( _task_queues != NULL, "invariant" );
+  guarantee( _task_queue != NULL,  "invariant" );
+  guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" );
+
+  guarantee( !_claimed,
+             "only one thread should claim this task at any one time" );
+
+  // OK, this doesn't safeguard again all possible scenarios, as it is
+  // possible for two threads to set the _claimed flag at the same
+  // time. But it is only for debugging purposes anyway and it will
+  // catch most problems.
+  _claimed = true;
+
+  _start_time_ms = os::elapsedVTime() * 1000.0;
+  statsOnly( _interval_start_time_ms = _start_time_ms );
+
+  double diff_prediction_ms =
+    g1_policy->get_new_prediction(&_marking_step_diffs_ms);
+  _time_target_ms = time_target_ms - diff_prediction_ms;
+
+  // set up the variables that are used in the work-based scheme to
+  // call the regular clock method
+  _words_scanned = 0;
+  _refs_reached  = 0;
+  recalculate_limits();
+
+  // clear all flags
+  clear_has_aborted();
+  _has_aborted_timed_out = false;
+  _draining_satb_buffers = false;
+
+  ++_calls;
+
+  if (_cm->verbose_low())
+    gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
+                           "target = %1.2lfms >>>>>>>>>>",
+                           _task_id, _calls, _time_target_ms);
+
+  // Set up the bitmap and oop closures. Anything that uses them is
+  // eventually called from this method, so it is OK to allocate these
+  // statically.
+  CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
+  CMOopClosure    oop_closure(_g1h, _cm, this);
+  set_oop_closure(&oop_closure);
+
+  if (_cm->has_overflown()) {
+    // This can happen if the region stack or the mark stack overflows
+    // during a GC pause and this task, after a yield point,
+    // restarts. We have to abort as we need to get into the overflow
+    // protocol which happens right at the end of this task.
+    set_has_aborted();
+  }
+
+  // First drain any available SATB buffers. After this, we will not
+  // look at SATB buffers before the next invocation of this method.
+  // If enough completed SATB buffers are queued up, the regular clock
+  // will abort this task so that it restarts.
+  drain_satb_buffers();
+  // ...then partially drain the local queue and the global stack
+  drain_local_queue(true);
+  drain_global_stack(true);
+
+  // Then totally drain the region stack.  We will not look at
+  // it again before the next invocation of this method. Entries on
+  // the region stack are only added during evacuation pauses, for
+  // which we have to yield. When we do, we abort the task anyway so
+  // it will look at the region stack again when it restarts.
+  bitmap_closure.set_scanning_heap_region(false);
+  drain_region_stack(&bitmap_closure);
+  // ...then partially drain the local queue and the global stack
+  drain_local_queue(true);
+  drain_global_stack(true);
+
+  do {
+    if (!has_aborted() && _curr_region != NULL) {
+      // This means that we're already holding on to a region.
+      tmp_guarantee_CM( _finger != NULL,
+                        "if region is not NULL, then the finger "
+                        "should not be NULL either" );
+
+      // We might have restarted this task after an evacuation pause
+      // which might have evacuated the region we're holding on to
+      // underneath our feet. Let's read its limit again to make sure
+      // that we do not iterate over a region of the heap that
+      // contains garbage (update_region_limit() will also move
+      // _finger to the start of the region if it is found empty).
+      update_region_limit();
+      // We will start from _finger not from the start of the region,
+      // as we might be restarting this task after aborting half-way
+      // through scanning this region. In this case, _finger points to
+      // the address where we last found a marked object. If this is a
+      // fresh region, _finger points to start().
+      MemRegion mr = MemRegion(_finger, _region_limit);
+
+      if (_cm->verbose_low())
+        gclog_or_tty->print_cr("[%d] we're scanning part "
+                               "["PTR_FORMAT", "PTR_FORMAT") "
+                               "of region "PTR_FORMAT,
+                               _task_id, _finger, _region_limit, _curr_region);
+
+      // Let's iterate over the bitmap of the part of the
+      // region that is left.
+      bitmap_closure.set_scanning_heap_region(true);
+      if (mr.is_empty() ||
+          _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
+        // We successfully completed iterating over the region. Now,
+        // let's give up the region.
+        giveup_current_region();
+        regular_clock_call();
+      } else {
+        guarantee( has_aborted(), "currently the only way to do so" );
+        // The only way to abort the bitmap iteration is to return
+        // false from the do_bit() method. However, inside the
+        // do_bit() method we move the _finger to point to the
+        // object currently being looked at. So, if we bail out, we
+        // have definitely set _finger to something non-null.
+        guarantee( _finger != NULL, "invariant" );
+
+        // Region iteration was actually aborted. So now _finger
+        // points to the address of the object we last scanned. If we
+        // leave it there, when we restart this task, we will rescan
+        // the object. It is easy to avoid this. We move the finger by
+        // enough to point to the next possible object header (the
+        // bitmap knows by how much we need to move it as it knows its
+        // granularity).
+        move_finger_to(_nextMarkBitMap->nextWord(_finger));
+      }
+    }
+    // At this point we have either completed iterating over the
+    // region we were holding on to, or we have aborted.
+
+    // We then partially drain the local queue and the global stack.
+    // (Do we really need this?)
+    drain_local_queue(true);
+    drain_global_stack(true);
+
+    // Read the note on the claim_region() method on why it might
+    // return NULL with potentially more regions available for
+    // claiming and why we have to check out_of_regions() to determine
+    // whether we're done or not.
+    while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
+      // We are going to try to claim a new region. We should have
+      // given up on the previous one.
+      tmp_guarantee_CM( _curr_region  == NULL &&
+                        _finger       == NULL &&
+                        _region_limit == NULL, "invariant" );
+      if (_cm->verbose_low())
+        gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
+      HeapRegion* claimed_region = _cm->claim_region(_task_id);
+      if (claimed_region != NULL) {
+        // Yes, we managed to claim one
+        statsOnly( ++_regions_claimed );
+
+        if (_cm->verbose_low())
+          gclog_or_tty->print_cr("[%d] we successfully claimed "
+                                 "region "PTR_FORMAT,
+                                 _task_id, claimed_region);
+
+        setup_for_region(claimed_region);
+        tmp_guarantee_CM( _curr_region == claimed_region, "invariant" );
+      }
+      // It is important to call the regular clock here. It might take
+      // a while to claim a region if, for example, we hit a large
+      // block of empty regions. So we need to call the regular clock
+      // method once round the loop to make sure it's called
+      // frequently enough.
+      regular_clock_call();
+    }
+
+    if (!has_aborted() && _curr_region == NULL) {
+      tmp_guarantee_CM( _cm->out_of_regions(),
+                        "at this point we should be out of regions" );
+    }
+  } while ( _curr_region != NULL && !has_aborted());
+
+  if (!has_aborted()) {
+    // We cannot check whether the global stack is empty, since other
+    // tasks might be pushing objects to it concurrently.
+    tmp_guarantee_CM( _cm->out_of_regions() && _cm->region_stack_empty(),
+                      "at this point we should be out of regions" );
+
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
+
+    // Try to reduce the number of available SATB buffers so that
+    // remark has less work to do.
+    drain_satb_buffers();
+  }
+
+  // Since we've done everything else, we can now totally drain the
+  // local queue and global stack.
+  drain_local_queue(false);
+  drain_global_stack(false);
+
+  // Attempt at work stealing from other task's queues.
+  if (!has_aborted()) {
+    // We have not aborted. This means that we have finished all that
+    // we could. Let's try to do some stealing...
+
+    // We cannot check whether the global stack is empty, since other
+    // tasks might be pushing objects to it concurrently.
+    guarantee( _cm->out_of_regions() &&
+               _cm->region_stack_empty() &&
+               _task_queue->size() == 0, "only way to reach here" );
+
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
+
+    while (!has_aborted()) {
+      oop obj;
+      statsOnly( ++_steal_attempts );
+
+      if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
+        if (_cm->verbose_medium())
+          gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
+                                 _task_id, (void*) obj);
+
+        statsOnly( ++_steals );
+
+        tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
+                          "any stolen object should be marked" );
+        scan_object(obj);
+
+        // And since we're towards the end, let's totally drain the
+        // local queue and global stack.
+        drain_local_queue(false);
+        drain_global_stack(false);
+      } else {
+        break;
+      }
+    }
+  }
+
+  // We still haven't aborted. Now, let's try to get into the
+  // termination protocol.
+  if (!has_aborted()) {
+    // We cannot check whether the global stack is empty, since other
+    // tasks might be concurrently pushing objects on it.
+    guarantee( _cm->out_of_regions() &&
+               _cm->region_stack_empty() &&
+               _task_queue->size() == 0, "only way to reach here" );
+
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
+
+    _termination_start_time_ms = os::elapsedVTime() * 1000.0;
+    // The CMTask class also extends the TerminatorTerminator class,
+    // hence its should_exit_termination() method will also decide
+    // whether to exit the termination protocol or not.
+    bool finished = _cm->terminator()->offer_termination(this);
+    double termination_end_time_ms = os::elapsedVTime() * 1000.0;
+    _termination_time_ms +=
+      termination_end_time_ms - _termination_start_time_ms;
+
+    if (finished) {
+      // We're all done.
+
+      if (_task_id == 0) {
+        // let's allow task 0 to do this
+        if (concurrent()) {
+          guarantee( _cm->concurrent_marking_in_progress(), "invariant" );
+          // we need to set this to false before the next
+          // safepoint. This way we ensure that the marking phase
+          // doesn't observe any more heap expansions.
+          _cm->clear_concurrent_marking_in_progress();
+        }
+      }
+
+      // We can now guarantee that the global stack is empty, since
+      // all other tasks have finished.
+      guarantee( _cm->out_of_regions() &&
+                 _cm->region_stack_empty() &&
+                 _cm->mark_stack_empty() &&
+                 _task_queue->size() == 0 &&
+                 !_cm->has_overflown() &&
+                 !_cm->mark_stack_overflow() &&
+                 !_cm->region_stack_overflow(),
+                 "only way to reach here" );
+
+      if (_cm->verbose_low())
+        gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
+    } else {
+      // Apparently there's more work to do. Let's abort this task. It
+      // will restart it and we can hopefully find more things to do.
+
+      if (_cm->verbose_low())
+        gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
+
+      set_has_aborted();
+      statsOnly( ++_aborted_termination );
+    }
+  }
+
+  // Mainly for debugging purposes to make sure that a pointer to the
+  // closure which was statically allocated in this frame doesn't
+  // escape it by accident.
+  set_oop_closure(NULL);
+  double end_time_ms = os::elapsedVTime() * 1000.0;
+  double elapsed_time_ms = end_time_ms - _start_time_ms;
+  // Update the step history.
+  _step_times_ms.add(elapsed_time_ms);
+
+  if (has_aborted()) {
+    // The task was aborted for some reason.
+
+    statsOnly( ++_aborted );
+
+    if (_has_aborted_timed_out) {
+      double diff_ms = elapsed_time_ms - _time_target_ms;
+      // Keep statistics of how well we did with respect to hitting
+      // our target only if we actually timed out (if we aborted for
+      // other reasons, then the results might get skewed).
+      _marking_step_diffs_ms.add(diff_ms);
+    }
+
+    if (_cm->has_overflown()) {
+      // This is the interesting one. We aborted because a global
+      // overflow was raised. This means we have to restart the
+      // marking phase and start iterating over regions. However, in
+      // order to do this we have to make sure that all tasks stop
+      // what they are doing and re-initialise in a safe manner. We
+      // will achieve this with the use of two barrier sync points.
+
+      if (_cm->verbose_low())
+        gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
+
+      _cm->enter_first_sync_barrier(_task_id);
+      // When we exit this sync barrier we know that all tasks have
+      // stopped doing marking work. So, it's now safe to
+      // re-initialise our data structures. At the end of this method,
+      // task 0 will clear the global data structures.
+
+      statsOnly( ++_aborted_overflow );
+
+      // We clear the local state of this task...
+      clear_region_fields();
+
+      // ...and enter the second barrier.
+      _cm->enter_second_sync_barrier(_task_id);
+      // At this point everything has bee re-initialised and we're
+      // ready to restart.
+    }
+
+    if (_cm->verbose_low()) {
+      gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
+                             "elapsed = %1.2lfms <<<<<<<<<<",
+                             _task_id, _time_target_ms, elapsed_time_ms);
+      if (_cm->has_aborted())
+        gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
+                               _task_id);
+    }
+  } else {
+    if (_cm->verbose_low())
+      gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
+                             "elapsed = %1.2lfms <<<<<<<<<<",
+                             _task_id, _time_target_ms, elapsed_time_ms);
+  }
+
+  _claimed = false;
+}
+
+CMTask::CMTask(int task_id,
+               ConcurrentMark* cm,
+               CMTaskQueue* task_queue,
+               CMTaskQueueSet* task_queues)
+  : _g1h(G1CollectedHeap::heap()),
+    _co_tracker(G1CMGroup),
+    _task_id(task_id), _cm(cm),
+    _claimed(false),
+    _nextMarkBitMap(NULL), _hash_seed(17),
+    _task_queue(task_queue),
+    _task_queues(task_queues),
+    _oop_closure(NULL) {
+  guarantee( task_queue != NULL, "invariant" );
+  guarantee( task_queues != NULL, "invariant" );
+
+  statsOnly( _clock_due_to_scanning = 0;
+             _clock_due_to_marking  = 0 );
+
+  _marking_step_diffs_ms.add(0.5);
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentMark.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,1049 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class G1CollectedHeap;
+class CMTask;
+typedef GenericTaskQueue<oop> CMTaskQueue;
+typedef GenericTaskQueueSet<oop> CMTaskQueueSet;
+
+// A generic CM bit map.  This is essentially a wrapper around the BitMap
+// class, with one bit per (1<<_shifter) HeapWords.
+
+class CMBitMapRO {
+ protected:
+  HeapWord* _bmStartWord;      // base address of range covered by map
+  size_t    _bmWordSize;       // map size (in #HeapWords covered)
+  const int _shifter;          // map to char or bit
+  VirtualSpace _virtual_space; // underlying the bit map
+  BitMap    _bm;               // the bit map itself
+
+ public:
+  // constructor
+  CMBitMapRO(ReservedSpace rs, int shifter);
+
+  enum { do_yield = true };
+
+  // inquiries
+  HeapWord* startWord()   const { return _bmStartWord; }
+  size_t    sizeInWords() const { return _bmWordSize;  }
+  // the following is one past the last word in space
+  HeapWord* endWord()     const { return _bmStartWord + _bmWordSize; }
+
+  // read marks
+
+  bool isMarked(HeapWord* addr) const {
+    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
+           "outside underlying space?");
+    return _bm.at(heapWordToOffset(addr));
+  }
+
+  // iteration
+  bool iterate(BitMapClosure* cl) { return _bm.iterate(cl); }
+  bool iterate(BitMapClosure* cl, MemRegion mr);
+
+  // Return the address corresponding to the next marked bit at or after
+  // "addr", and before "limit", if "limit" is non-NULL.  If there is no
+  // such bit, returns "limit" if that is non-NULL, or else "endWord()".
+  HeapWord* getNextMarkedWordAddress(HeapWord* addr,
+                                     HeapWord* limit = NULL) const;
+  // Return the address corresponding to the next unmarked bit at or after
+  // "addr", and before "limit", if "limit" is non-NULL.  If there is no
+  // such bit, returns "limit" if that is non-NULL, or else "endWord()".
+  HeapWord* getNextUnmarkedWordAddress(HeapWord* addr,
+                                       HeapWord* limit = NULL) const;
+
+  // conversion utilities
+  // XXX Fix these so that offsets are size_t's...
+  HeapWord* offsetToHeapWord(size_t offset) const {
+    return _bmStartWord + (offset << _shifter);
+  }
+  size_t heapWordToOffset(HeapWord* addr) const {
+    return pointer_delta(addr, _bmStartWord) >> _shifter;
+  }
+  int heapWordDiffToOffsetDiff(size_t diff) const;
+  HeapWord* nextWord(HeapWord* addr) {
+    return offsetToHeapWord(heapWordToOffset(addr) + 1);
+  }
+
+  void mostly_disjoint_range_union(BitMap*   from_bitmap,
+                                   size_t    from_start_index,
+                                   HeapWord* to_start_word,
+                                   size_t    word_num);
+
+  // debugging
+  NOT_PRODUCT(bool covers(ReservedSpace rs) const;)
+};
+
+class CMBitMap : public CMBitMapRO {
+
+ public:
+  // constructor
+  CMBitMap(ReservedSpace rs, int shifter) :
+    CMBitMapRO(rs, shifter) {}
+
+  // write marks
+  void mark(HeapWord* addr) {
+    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
+           "outside underlying space?");
+    _bm.at_put(heapWordToOffset(addr), true);
+  }
+  void clear(HeapWord* addr) {
+    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
+           "outside underlying space?");
+    _bm.at_put(heapWordToOffset(addr), false);
+  }
+  bool parMark(HeapWord* addr) {
+    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
+           "outside underlying space?");
+    return _bm.par_at_put(heapWordToOffset(addr), true);
+  }
+  bool parClear(HeapWord* addr) {
+    assert(_bmStartWord <= addr && addr < (_bmStartWord + _bmWordSize),
+           "outside underlying space?");
+    return _bm.par_at_put(heapWordToOffset(addr), false);
+  }
+  void markRange(MemRegion mr);
+  void clearAll();
+  void clearRange(MemRegion mr);
+
+  // Starting at the bit corresponding to "addr" (inclusive), find the next
+  // "1" bit, if any.  This bit starts some run of consecutive "1"'s; find
+  // the end of this run (stopping at "end_addr").  Return the MemRegion
+  // covering from the start of the region corresponding to the first bit
+  // of the run to the end of the region corresponding to the last bit of
+  // the run.  If there is no "1" bit at or after "addr", return an empty
+  // MemRegion.
+  MemRegion getAndClearMarkedRegion(HeapWord* addr, HeapWord* end_addr);
+};
+
+// Represents a marking stack used by the CM collector.
+// Ideally this should be GrowableArray<> just like MSC's marking stack(s).
+class CMMarkStack {
+  ConcurrentMark* _cm;
+  oop*   _base;      // bottom of stack
+  jint   _index;     // one more than last occupied index
+  jint   _capacity;  // max #elements
+  jint   _oops_do_bound;  // Number of elements to include in next iteration.
+  NOT_PRODUCT(jint _max_depth;)  // max depth plumbed during run
+
+  bool   _overflow;
+  DEBUG_ONLY(bool _drain_in_progress;)
+  DEBUG_ONLY(bool _drain_in_progress_yields;)
+
+ public:
+  CMMarkStack(ConcurrentMark* cm);
+  ~CMMarkStack();
+
+  void allocate(size_t size);
+
+  oop pop() {
+    if (!isEmpty()) {
+      return _base[--_index] ;
+    }
+    return NULL;
+  }
+
+  // If overflow happens, don't do the push, and record the overflow.
+  // *Requires* that "ptr" is already marked.
+  void push(oop ptr) {
+    if (isFull()) {
+      // Record overflow.
+      _overflow = true;
+      return;
+    } else {
+      _base[_index++] = ptr;
+      NOT_PRODUCT(_max_depth = MAX2(_max_depth, _index));
+    }
+  }
+  // Non-block impl.  Note: concurrency is allowed only with other
+  // "par_push" operations, not with "pop" or "drain".  We would need
+  // parallel versions of them if such concurrency was desired.
+  void par_push(oop ptr);
+
+  // Pushes the first "n" elements of "ptr_arr" on the stack.
+  // Non-block impl.  Note: concurrency is allowed only with other
+  // "par_adjoin_arr" or "push" operations, not with "pop" or "drain".
+  void par_adjoin_arr(oop* ptr_arr, int n);
+
+  // Pushes the first "n" elements of "ptr_arr" on the stack.
+  // Locking impl: concurrency is allowed only with
+  // "par_push_arr" and/or "par_pop_arr" operations, which use the same
+  // locking strategy.
+  void par_push_arr(oop* ptr_arr, int n);
+
+  // If returns false, the array was empty.  Otherwise, removes up to "max"
+  // elements from the stack, and transfers them to "ptr_arr" in an
+  // unspecified order.  The actual number transferred is given in "n" ("n
+  // == 0" is deliberately redundant with the return value.)  Locking impl:
+  // concurrency is allowed only with "par_push_arr" and/or "par_pop_arr"
+  // operations, which use the same locking strategy.
+  bool par_pop_arr(oop* ptr_arr, int max, int* n);
+
+  // Drain the mark stack, applying the given closure to all fields of
+  // objects on the stack.  (That is, continue until the stack is empty,
+  // even if closure applications add entries to the stack.)  The "bm"
+  // argument, if non-null, may be used to verify that only marked objects
+  // are on the mark stack.  If "yield_after" is "true", then the
+  // concurrent marker performing the drain offers to yield after
+  // processing each object.  If a yield occurs, stops the drain operation
+  // and returns false.  Otherwise, returns true.
+  template<class OopClosureClass>
+  bool drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after = false);
+
+  bool isEmpty()    { return _index == 0; }
+  bool isFull()     { return _index == _capacity; }
+  int maxElems()    { return _capacity; }
+
+  bool overflow() { return _overflow; }
+  void clear_overflow() { _overflow = false; }
+
+  int  size() { return _index; }
+
+  void setEmpty()   { _index = 0; clear_overflow(); }
+
+  // Record the current size; a subsequent "oops_do" will iterate only over
+  // indices valid at the time of this call.
+  void set_oops_do_bound(jint bound = -1) {
+    if (bound == -1) {
+      _oops_do_bound = _index;
+    } else {
+      _oops_do_bound = bound;
+    }
+  }
+  jint oops_do_bound() { return _oops_do_bound; }
+  // iterate over the oops in the mark stack, up to the bound recorded via
+  // the call above.
+  void oops_do(OopClosure* f);
+};
+
+class CMRegionStack {
+  MemRegion* _base;
+  jint _capacity;
+  jint _index;
+  jint _oops_do_bound;
+  bool _overflow;
+public:
+  CMRegionStack();
+  ~CMRegionStack();
+  void allocate(size_t size);
+
+  // This is lock-free; assumes that it will only be called in parallel
+  // with other "push" operations (no pops).
+  void push(MemRegion mr);
+
+  // Lock-free; assumes that it will only be called in parallel
+  // with other "pop" operations (no pushes).
+  MemRegion pop();
+
+  bool isEmpty()    { return _index == 0; }
+  bool isFull()     { return _index == _capacity; }
+
+  bool overflow() { return _overflow; }
+  void clear_overflow() { _overflow = false; }
+
+  int  size() { return _index; }
+
+  // It iterates over the entries in the region stack and it
+  // invalidates (i.e. assigns MemRegion()) the ones that point to
+  // regions in the collection set.
+  bool invalidate_entries_into_cset();
+
+  // This gives an upper bound up to which the iteration in
+  // invalidate_entries_into_cset() will reach. This prevents
+  // newly-added entries to be unnecessarily scanned.
+  void set_oops_do_bound() {
+    _oops_do_bound = _index;
+  }
+
+  void setEmpty()   { _index = 0; clear_overflow(); }
+};
+
+// this will enable a variety of different statistics per GC task
+#define _MARKING_STATS_       0
+// this will enable the higher verbose levels
+#define _MARKING_VERBOSE_     0
+
+#if _MARKING_STATS_
+#define statsOnly(statement)  \
+do {                          \
+  statement ;                 \
+} while (0)
+#else // _MARKING_STATS_
+#define statsOnly(statement)  \
+do {                          \
+} while (0)
+#endif // _MARKING_STATS_
+
+// Some extra guarantees that I like to also enable in optimised mode
+// when debugging. If you want to enable them, comment out the assert
+// macro and uncomment out the guaratee macro
+// #define tmp_guarantee_CM(expr, str) guarantee(expr, str)
+#define tmp_guarantee_CM(expr, str) assert(expr, str)
+
+typedef enum {
+  no_verbose  = 0,   // verbose turned off
+  stats_verbose,     // only prints stats at the end of marking
+  low_verbose,       // low verbose, mostly per region and per major event
+  medium_verbose,    // a bit more detailed than low
+  high_verbose       // per object verbose
+} CMVerboseLevel;
+
+
+class ConcurrentMarkThread;
+
+class ConcurrentMark {
+  friend class ConcurrentMarkThread;
+  friend class CMTask;
+  friend class CMBitMapClosure;
+  friend class CSMarkOopClosure;
+  friend class CMGlobalObjectClosure;
+  friend class CMRemarkTask;
+  friend class CMConcurrentMarkingTask;
+  friend class G1ParNoteEndTask;
+  friend class CalcLiveObjectsClosure;
+
+protected:
+  ConcurrentMarkThread* _cmThread;   // the thread doing the work
+  G1CollectedHeap*      _g1h;        // the heap.
+  size_t                _parallel_marking_threads; // the number of marking
+                                                   // threads we'll use
+  double                _sleep_factor; // how much we have to sleep, with
+                                       // respect to the work we just did, to
+                                       // meet the marking overhead goal
+  double                _marking_task_overhead; // marking target overhead for
+                                                // a single task
+
+  // same as the two above, but for the cleanup task
+  double                _cleanup_sleep_factor;
+  double                _cleanup_task_overhead;
+
+  // Stuff related to age cohort processing.
+  struct ParCleanupThreadState {
+    char _pre[64];
+    UncleanRegionList list;
+    char _post[64];
+  };
+  ParCleanupThreadState** _par_cleanup_thread_state;
+
+  // CMS marking support structures
+  CMBitMap                _markBitMap1;
+  CMBitMap                _markBitMap2;
+  CMBitMapRO*             _prevMarkBitMap; // completed mark bitmap
+  CMBitMap*               _nextMarkBitMap; // under-construction mark bitmap
+  bool                    _at_least_one_mark_complete;
+
+  BitMap                  _region_bm;
+  BitMap                  _card_bm;
+
+  // Heap bounds
+  HeapWord*               _heap_start;
+  HeapWord*               _heap_end;
+
+  // For gray objects
+  CMMarkStack             _markStack; // Grey objects behind global finger.
+  CMRegionStack           _regionStack; // Grey regions behind global finger.
+  HeapWord* volatile      _finger;  // the global finger, region aligned,
+                                    // always points to the end of the
+                                    // last claimed region
+
+  // marking tasks
+  size_t                  _max_task_num; // maximum task number
+  size_t                  _active_tasks; // task num currently active
+  CMTask**                _tasks;        // task queue array (max_task_num len)
+  CMTaskQueueSet*         _task_queues;  // task queue set
+  ParallelTaskTerminator  _terminator;   // for termination
+
+  // Two sync barriers that are used to synchronise tasks when an
+  // overflow occurs. The algorithm is the following. All tasks enter
+  // the first one to ensure that they have all stopped manipulating
+  // the global data structures. After they exit it, they re-initialise
+  // their data structures and task 0 re-initialises the global data
+  // structures. Then, they enter the second sync barrier. This
+  // ensure, that no task starts doing work before all data
+  // structures (local and global) have been re-initialised. When they
+  // exit it, they are free to start working again.
+  WorkGangBarrierSync     _first_overflow_barrier_sync;
+  WorkGangBarrierSync     _second_overflow_barrier_sync;
+
+
+  // this is set by any task, when an overflow on the global data
+  // structures is detected.
+  volatile bool           _has_overflown;
+  // true: marking is concurrent, false: we're in remark
+  volatile bool           _concurrent;
+  // set at the end of a Full GC so that marking aborts
+  volatile bool           _has_aborted;
+  // used when remark aborts due to an overflow to indicate that
+  // another concurrent marking phase should start
+  volatile bool           _restart_for_overflow;
+
+  // This is true from the very start of concurrent marking until the
+  // point when all the tasks complete their work. It is really used
+  // to determine the points between the end of concurrent marking and
+  // time of remark.
+  volatile bool           _concurrent_marking_in_progress;
+
+  // verbose level
+  CMVerboseLevel          _verbose_level;
+
+  COTracker               _cleanup_co_tracker;
+
+  // These two fields are used to implement the optimisation that
+  // avoids pushing objects on the global/region stack if there are
+  // no collection set regions above the lowest finger.
+
+  // This is the lowest finger (among the global and local fingers),
+  // which is calculated before a new collection set is chosen.
+  HeapWord* _min_finger;
+  // If this flag is true, objects/regions that are marked below the
+  // finger should be pushed on the stack(s). If this is flag is
+  // false, it is safe not to push them on the stack(s).
+  bool      _should_gray_objects;
+
+  // All of these times are in ms.
+  NumberSeq _init_times;
+  NumberSeq _remark_times;
+  NumberSeq   _remark_mark_times;
+  NumberSeq   _remark_weak_ref_times;
+  NumberSeq _cleanup_times;
+  double    _total_counting_time;
+  double    _total_rs_scrub_time;
+
+  double*   _accum_task_vtime;   // accumulated task vtime
+
+  WorkGang* _parallel_workers;
+
+  void weakRefsWork(bool clear_all_soft_refs);
+
+  void swapMarkBitMaps();
+
+  // It resets the global marking data structures, as well as the
+  // task local ones; should be called during initial mark.
+  void reset();
+  // It resets all the marking data structures.
+  void clear_marking_state();
+
+  // It should be called to indicate which phase we're in (concurrent
+  // mark or remark) and how many threads are currently active.
+  void set_phase(size_t active_tasks, bool concurrent);
+  // We do this after we're done with marking so that the marking data
+  // structures are initialised to a sensible and predictable state.
+  void set_non_marking_state();
+
+  // prints all gathered CM-related statistics
+  void print_stats();
+
+  // accessor methods
+  size_t parallel_marking_threads() { return _parallel_marking_threads; }
+  double sleep_factor()             { return _sleep_factor; }
+  double marking_task_overhead()    { return _marking_task_overhead;}
+  double cleanup_sleep_factor()     { return _cleanup_sleep_factor; }
+  double cleanup_task_overhead()    { return _cleanup_task_overhead;}
+
+  HeapWord*               finger()        { return _finger;   }
+  bool                    concurrent()    { return _concurrent; }
+  size_t                  active_tasks()  { return _active_tasks; }
+  ParallelTaskTerminator* terminator()    { return &_terminator; }
+
+  // It claims the next available region to be scanned by a marking
+  // task. It might return NULL if the next region is empty or we have
+  // run out of regions. In the latter case, out_of_regions()
+  // determines whether we've really run out of regions or the task
+  // should call claim_region() again.  This might seem a bit
+  // awkward. Originally, the code was written so that claim_region()
+  // either successfully returned with a non-empty region or there
+  // were no more regions to be claimed. The problem with this was
+  // that, in certain circumstances, it iterated over large chunks of
+  // the heap finding only empty regions and, while it was working, it
+  // was preventing the calling task to call its regular clock
+  // method. So, this way, each task will spend very little time in
+  // claim_region() and is allowed to call the regular clock method
+  // frequently.
+  HeapRegion* claim_region(int task);
+
+  // It determines whether we've run out of regions to scan.
+  bool        out_of_regions() { return _finger == _heap_end; }
+
+  // Returns the task with the given id
+  CMTask* task(int id) {
+    guarantee( 0 <= id && id < (int) _active_tasks, "task id not within "
+               "active bounds" );
+    return _tasks[id];
+  }
+
+  // Returns the task queue with the given id
+  CMTaskQueue* task_queue(int id) {
+    guarantee( 0 <= id && id < (int) _active_tasks, "task queue id not within "
+               "active bounds" );
+    return (CMTaskQueue*) _task_queues->queue(id);
+  }
+
+  // Returns the task queue set
+  CMTaskQueueSet* task_queues()  { return _task_queues; }
+
+  // Access / manipulation of the overflow flag which is set to
+  // indicate that the global stack or region stack has overflown
+  bool has_overflown()           { return _has_overflown; }
+  void set_has_overflown()       { _has_overflown = true; }
+  void clear_has_overflown()     { _has_overflown = false; }
+
+  bool has_aborted()             { return _has_aborted; }
+  bool restart_for_overflow()    { return _restart_for_overflow; }
+
+  // Methods to enter the two overflow sync barriers
+  void enter_first_sync_barrier(int task_num);
+  void enter_second_sync_barrier(int task_num);
+
+public:
+  // Manipulation of the global mark stack.
+  // Notice that the first mark_stack_push is CAS-based, whereas the
+  // two below are Mutex-based. This is OK since the first one is only
+  // called during evacuation pauses and doesn't compete with the
+  // other two (which are called by the marking tasks during
+  // concurrent marking or remark).
+  bool mark_stack_push(oop p) {
+    _markStack.par_push(p);
+    if (_markStack.overflow()) {
+      set_has_overflown();
+      return false;
+    }
+    return true;
+  }
+  bool mark_stack_push(oop* arr, int n) {
+    _markStack.par_push_arr(arr, n);
+    if (_markStack.overflow()) {
+      set_has_overflown();
+      return false;
+    }
+    return true;
+  }
+  void mark_stack_pop(oop* arr, int max, int* n) {
+    _markStack.par_pop_arr(arr, max, n);
+  }
+  size_t mark_stack_size()              { return _markStack.size(); }
+  size_t partial_mark_stack_size_target() { return _markStack.maxElems()/3; }
+  bool mark_stack_overflow()            { return _markStack.overflow(); }
+  bool mark_stack_empty()               { return _markStack.isEmpty(); }
+
+  // Manipulation of the region stack
+  bool region_stack_push(MemRegion mr) {
+    _regionStack.push(mr);
+    if (_regionStack.overflow()) {
+      set_has_overflown();
+      return false;
+    }
+    return true;
+  }
+  MemRegion region_stack_pop()          { return _regionStack.pop(); }
+  int region_stack_size()               { return _regionStack.size(); }
+  bool region_stack_overflow()          { return _regionStack.overflow(); }
+  bool region_stack_empty()             { return _regionStack.isEmpty(); }
+
+  bool concurrent_marking_in_progress() {
+    return _concurrent_marking_in_progress;
+  }
+  void set_concurrent_marking_in_progress() {
+    _concurrent_marking_in_progress = true;
+  }
+  void clear_concurrent_marking_in_progress() {
+    _concurrent_marking_in_progress = false;
+  }
+
+  void update_accum_task_vtime(int i, double vtime) {
+    _accum_task_vtime[i] += vtime;
+  }
+
+  double all_task_accum_vtime() {
+    double ret = 0.0;
+    for (int i = 0; i < (int)_max_task_num; ++i)
+      ret += _accum_task_vtime[i];
+    return ret;
+  }
+
+  // Attempts to steal an object from the task queues of other tasks
+  bool try_stealing(int task_num, int* hash_seed, oop& obj) {
+    return _task_queues->steal(task_num, hash_seed, obj);
+  }
+
+  // It grays an object by first marking it. Then, if it's behind the
+  // global finger, it also pushes it on the global stack.
+  void deal_with_reference(oop obj);
+
+  ConcurrentMark(ReservedSpace rs, int max_regions);
+  ~ConcurrentMark();
+  ConcurrentMarkThread* cmThread() { return _cmThread; }
+
+  CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
+  CMBitMap*   nextMarkBitMap() const { return _nextMarkBitMap; }
+
+  // The following three are interaction between CM and
+  // G1CollectedHeap
+
+  // This notifies CM that a root during initial-mark needs to be
+  // grayed and it's MT-safe. Currently, we just mark it. But, in the
+  // future, we can experiment with pushing it on the stack and we can
+  // do this without changing G1CollectedHeap.
+  void grayRoot(oop p);
+  // It's used during evacuation pauses to gray a region, if
+  // necessary, and it's MT-safe. It assumes that the caller has
+  // marked any objects on that region. If _should_gray_objects is
+  // true and we're still doing concurrent marking, the region is
+  // pushed on the region stack, if it is located below the global
+  // finger, otherwise we do nothing.
+  void grayRegionIfNecessary(MemRegion mr);
+  // It's used during evacuation pauses to mark and, if necessary,
+  // gray a single object and it's MT-safe. It assumes the caller did
+  // not mark the object. If _should_gray_objects is true and we're
+  // still doing concurrent marking, the objects is pushed on the
+  // global stack, if it is located below the global finger, otherwise
+  // we do nothing.
+  void markAndGrayObjectIfNecessary(oop p);
+
+  // This iterates over the bitmap of the previous marking and prints
+  // out all objects that are marked on the bitmap and indicates
+  // whether what they point to is also marked or not.
+  void print_prev_bitmap_reachable();
+
+  // Clear the next marking bitmap (will be called concurrently).
+  void clearNextBitmap();
+
+  // main CMS steps and related support
+  void checkpointRootsInitial();
+
+  // These two do the work that needs to be done before and after the
+  // initial root checkpoint. Since this checkpoint can be done at two
+  // different points (i.e. an explicit pause or piggy-backed on a
+  // young collection), then it's nice to be able to easily share the
+  // pre/post code. It might be the case that we can put everything in
+  // the post method. TP
+  void checkpointRootsInitialPre();
+  void checkpointRootsInitialPost();
+
+  // Do concurrent phase of marking, to a tentative transitive closure.
+  void markFromRoots();
+
+  // Process all unprocessed SATB buffers. It is called at the
+  // beginning of an evacuation pause.
+  void drainAllSATBBuffers();
+
+  void checkpointRootsFinal(bool clear_all_soft_refs);
+  void checkpointRootsFinalWork();
+  void calcDesiredRegions();
+  void cleanup();
+  void completeCleanup();
+
+  // Mark in the previous bitmap.  NB: this is usually read-only, so use
+  // this carefully!
+  void markPrev(oop p);
+  void clear(oop p);
+  // Clears marks for all objects in the given range, for both prev and
+  // next bitmaps.  NB: the previous bitmap is usually read-only, so use
+  // this carefully!
+  void clearRangeBothMaps(MemRegion mr);
+
+  // Record the current top of the mark and region stacks; a
+  // subsequent oops_do() on the mark stack and
+  // invalidate_entries_into_cset() on the region stack will iterate
+  // only over indices valid at the time of this call.
+  void set_oops_do_bound() {
+    _markStack.set_oops_do_bound();
+    _regionStack.set_oops_do_bound();
+  }
+  // Iterate over the oops in the mark stack and all local queues. It
+  // also calls invalidate_entries_into_cset() on the region stack.
+  void oops_do(OopClosure* f);
+  // It is called at the end of an evacuation pause during marking so
+  // that CM is notified of where the new end of the heap is. It
+  // doesn't do anything if concurrent_marking_in_progress() is false,
+  // unless the force parameter is true.
+  void update_g1_committed(bool force = false);
+
+  void complete_marking_in_collection_set();
+
+  // It indicates that a new collection set is being chosen.
+  void newCSet();
+  // It registers a collection set heap region with CM. This is used
+  // to determine whether any heap regions are located above the finger.
+  void registerCSetRegion(HeapRegion* hr);
+
+  // Returns "true" if at least one mark has been completed.
+  bool at_least_one_mark_complete() { return _at_least_one_mark_complete; }
+
+  bool isMarked(oop p) const {
+    assert(p != NULL && p->is_oop(), "expected an oop");
+    HeapWord* addr = (HeapWord*)p;
+    assert(addr >= _nextMarkBitMap->startWord() ||
+           addr < _nextMarkBitMap->endWord(), "in a region");
+
+    return _nextMarkBitMap->isMarked(addr);
+  }
+
+  inline bool not_yet_marked(oop p) const;
+
+  // XXX Debug code
+  bool containing_card_is_marked(void* p);
+  bool containing_cards_are_marked(void* start, void* last);
+
+  bool isPrevMarked(oop p) const {
+    assert(p != NULL && p->is_oop(), "expected an oop");
+    HeapWord* addr = (HeapWord*)p;
+    assert(addr >= _prevMarkBitMap->startWord() ||
+           addr < _prevMarkBitMap->endWord(), "in a region");
+
+    return _prevMarkBitMap->isMarked(addr);
+  }
+
+  inline bool do_yield_check(int worker_i = 0);
+  inline bool should_yield();
+
+  // Called to abort the marking cycle after a Full GC takes palce.
+  void abort();
+
+  void disable_co_trackers();
+
+  // This prints the global/local fingers. It is used for debugging.
+  NOT_PRODUCT(void print_finger();)
+
+  void print_summary_info();
+
+  // The following indicate whether a given verbose level has been
+  // set. Notice that anything above stats is conditional to
+  // _MARKING_VERBOSE_ having been set to 1
+  bool verbose_stats()
+    { return _verbose_level >= stats_verbose; }
+  bool verbose_low()
+    { return _MARKING_VERBOSE_ && _verbose_level >= low_verbose; }
+  bool verbose_medium()
+    { return _MARKING_VERBOSE_ && _verbose_level >= medium_verbose; }
+  bool verbose_high()
+    { return _MARKING_VERBOSE_ && _verbose_level >= high_verbose; }
+};
+
+// A class representing a marking task.
+class CMTask : public TerminatorTerminator {
+private:
+  enum PrivateConstants {
+    // the regular clock call is called once the scanned words reaches
+    // this limit
+    words_scanned_period          = 12*1024,
+    // the regular clock call is called once the number of visited
+    // references reaches this limit
+    refs_reached_period           = 384,
+    // initial value for the hash seed, used in the work stealing code
+    init_hash_seed                = 17,
+    // how many entries will be transferred between global stack and
+    // local queues
+    global_stack_transfer_size    = 16
+  };
+
+  int                         _task_id;
+  G1CollectedHeap*            _g1h;
+  ConcurrentMark*             _cm;
+  CMBitMap*                   _nextMarkBitMap;
+  // the task queue of this task
+  CMTaskQueue*                _task_queue;
+  // the task queue set---needed for stealing
+  CMTaskQueueSet*             _task_queues;
+  // indicates whether the task has been claimed---this is only  for
+  // debugging purposes
+  bool                        _claimed;
+
+  // number of calls to this task
+  int                         _calls;
+
+  // concurrent overhead over a single CPU for this task
+  COTracker                   _co_tracker;
+
+  // when the virtual timer reaches this time, the marking step should
+  // exit
+  double                      _time_target_ms;
+  // the start time of the current marking step
+  double                      _start_time_ms;
+
+  // the oop closure used for iterations over oops
+  OopClosure*                 _oop_closure;
+
+  // the region this task is scanning, NULL if we're not scanning any
+  HeapRegion*                 _curr_region;
+  // the local finger of this task, NULL if we're not scanning a region
+  HeapWord*                   _finger;
+  // limit of the region this task is scanning, NULL if we're not scanning one
+  HeapWord*                   _region_limit;
+
+  // This is used only when we scan regions popped from the region
+  // stack. It records what the last object on such a region we
+  // scanned was. It is used to ensure that, if we abort region
+  // iteration, we do not rescan the first part of the region. This
+  // should be NULL when we're not scanning a region from the region
+  // stack.
+  HeapWord*                   _region_finger;
+
+  // the number of words this task has scanned
+  size_t                      _words_scanned;
+  // When _words_scanned reaches this limit, the regular clock is
+  // called. Notice that this might be decreased under certain
+  // circumstances (i.e. when we believe that we did an expensive
+  // operation).
+  size_t                      _words_scanned_limit;
+  // the initial value of _words_scanned_limit (i.e. what it was
+  // before it was decreased).
+  size_t                      _real_words_scanned_limit;
+
+  // the number of references this task has visited
+  size_t                      _refs_reached;
+  // When _refs_reached reaches this limit, the regular clock is
+  // called. Notice this this might be decreased under certain
+  // circumstances (i.e. when we believe that we did an expensive
+  // operation).
+  size_t                      _refs_reached_limit;
+  // the initial value of _refs_reached_limit (i.e. what it was before
+  // it was decreased).
+  size_t                      _real_refs_reached_limit;
+
+  // used by the work stealing stuff
+  int                         _hash_seed;
+  // if this is true, then the task has aborted for some reason
+  bool                        _has_aborted;
+  // set when the task aborts because it has met its time quota
+  bool                        _has_aborted_timed_out;
+  // true when we're draining SATB buffers; this avoids the task
+  // aborting due to SATB buffers being available (as we're already
+  // dealing with them)
+  bool                        _draining_satb_buffers;
+
+  // number sequence of past step times
+  NumberSeq                   _step_times_ms;
+  // elapsed time of this task
+  double                      _elapsed_time_ms;
+  // termination time of this task
+  double                      _termination_time_ms;
+  // when this task got into the termination protocol
+  double                      _termination_start_time_ms;
+
+  // true when the task is during a concurrent phase, false when it is
+  // in the remark phase (so, in the latter case, we do not have to
+  // check all the things that we have to check during the concurrent
+  // phase, i.e. SATB buffer availability...)
+  bool                        _concurrent;
+
+  TruncatedSeq                _marking_step_diffs_ms;
+
+  // LOTS of statistics related with this task
+#if _MARKING_STATS_
+  NumberSeq                   _all_clock_intervals_ms;
+  double                      _interval_start_time_ms;
+
+  int                         _aborted;
+  int                         _aborted_overflow;
+  int                         _aborted_cm_aborted;
+  int                         _aborted_yield;
+  int                         _aborted_timed_out;
+  int                         _aborted_satb;
+  int                         _aborted_termination;
+
+  int                         _steal_attempts;
+  int                         _steals;
+
+  int                         _clock_due_to_marking;
+  int                         _clock_due_to_scanning;
+
+  int                         _local_pushes;
+  int                         _local_pops;
+  int                         _local_max_size;
+  int                         _objs_scanned;
+
+  int                         _global_pushes;
+  int                         _global_pops;
+  int                         _global_max_size;
+
+  int                         _global_transfers_to;
+  int                         _global_transfers_from;
+
+  int                         _region_stack_pops;
+
+  int                         _regions_claimed;
+  int                         _objs_found_on_bitmap;
+
+  int                         _satb_buffers_processed;
+#endif // _MARKING_STATS_
+
+  // it updates the local fields after this task has claimed
+  // a new region to scan
+  void setup_for_region(HeapRegion* hr);
+  // it brings up-to-date the limit of the region
+  void update_region_limit();
+  // it resets the local fields after a task has finished scanning a
+  // region
+  void giveup_current_region();
+
+  // called when either the words scanned or the refs visited limit
+  // has been reached
+  void reached_limit();
+  // recalculates the words scanned and refs visited limits
+  void recalculate_limits();
+  // decreases the words scanned and refs visited limits when we reach
+  // an expensive operation
+  void decrease_limits();
+  // it checks whether the words scanned or refs visited reached their
+  // respective limit and calls reached_limit() if they have
+  void check_limits() {
+    if (_words_scanned >= _words_scanned_limit ||
+        _refs_reached >= _refs_reached_limit)
+      reached_limit();
+  }
+  // this is supposed to be called regularly during a marking step as
+  // it checks a bunch of conditions that might cause the marking step
+  // to abort
+  void regular_clock_call();
+  bool concurrent() { return _concurrent; }
+
+public:
+  // It resets the task; it should be called right at the beginning of
+  // a marking phase.
+  void reset(CMBitMap* _nextMarkBitMap);
+  // it clears all the fields that correspond to a claimed region.
+  void clear_region_fields();
+
+  void set_concurrent(bool concurrent) { _concurrent = concurrent; }
+
+  void enable_co_tracker() {
+    guarantee( !_co_tracker.enabled(), "invariant" );
+    _co_tracker.enable();
+  }
+  void disable_co_tracker() {
+    guarantee( _co_tracker.enabled(), "invariant" );
+    _co_tracker.disable();
+  }
+  bool co_tracker_enabled() {
+    return _co_tracker.enabled();
+  }
+  void reset_co_tracker(double starting_conc_overhead = 0.0) {
+    _co_tracker.reset(starting_conc_overhead);
+  }
+  void start_co_tracker() {
+    _co_tracker.start();
+  }
+  void update_co_tracker(bool force_end = false) {
+    _co_tracker.update(force_end);
+  }
+
+  // The main method of this class which performs a marking step
+  // trying not to exceed the given duration. However, it might exit
+  // prematurely, according to some conditions (i.e. SATB buffers are
+  // available for processing).
+  void do_marking_step(double target_ms);
+
+  // These two calls start and stop the timer
+  void record_start_time() {
+    _elapsed_time_ms = os::elapsedTime() * 1000.0;
+  }
+  void record_end_time() {
+    _elapsed_time_ms = os::elapsedTime() * 1000.0 - _elapsed_time_ms;
+  }
+
+  // returns the task ID
+  int task_id() { return _task_id; }
+
+  // From TerminatorTerminator. It determines whether this task should
+  // exit the termination protocol after it's entered it.
+  virtual bool should_exit_termination();
+
+  HeapWord* finger()            { return _finger; }
+
+  bool has_aborted()            { return _has_aborted; }
+  void set_has_aborted()        { _has_aborted = true; }
+  void clear_has_aborted()      { _has_aborted = false; }
+  bool claimed() { return _claimed; }
+
+  void set_oop_closure(OopClosure* oop_closure) {
+    _oop_closure = oop_closure;
+  }
+
+  // It grays the object by marking it and, if necessary, pushing it
+  // on the local queue
+  void deal_with_reference(oop obj);
+
+  // It scans an object and visits its children.
+  void scan_object(oop obj) {
+    tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
+                      "invariant" );
+
+    if (_cm->verbose_high())
+      gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
+                             _task_id, (void*) obj);
+
+    size_t obj_size = obj->size();
+    _words_scanned += obj_size;
+
+    obj->oop_iterate(_oop_closure);
+    statsOnly( ++_objs_scanned );
+    check_limits();
+  }
+
+  // It pushes an object on the local queue.
+  void push(oop obj);
+
+  // These two move entries to/from the global stack.
+  void move_entries_to_global_stack();
+  void get_entries_from_global_stack();
+
+  // It pops and scans objects from the local queue. If partially is
+  // true, then it stops when the queue size is of a given limit. If
+  // partially is false, then it stops when the queue is empty.
+  void drain_local_queue(bool partially);
+  // It moves entries from the global stack to the local queue and
+  // drains the local queue. If partially is true, then it stops when
+  // both the global stack and the local queue reach a given size. If
+  // partially if false, it tries to empty them totally.
+  void drain_global_stack(bool partially);
+  // It keeps picking SATB buffers and processing them until no SATB
+  // buffers are available.
+  void drain_satb_buffers();
+  // It keeps popping regions from the region stack and processing
+  // them until the region stack is empty.
+  void drain_region_stack(BitMapClosure* closure);
+
+  // moves the local finger to a new location
+  inline void move_finger_to(HeapWord* new_finger) {
+    tmp_guarantee_CM( new_finger >= _finger && new_finger < _region_limit,
+                   "invariant" );
+    _finger = new_finger;
+  }
+
+  // moves the region finger to a new location
+  inline void move_region_finger_to(HeapWord* new_finger) {
+    tmp_guarantee_CM( new_finger < _cm->finger(), "invariant" );
+    _region_finger = new_finger;
+  }
+
+  CMTask(int task_num, ConcurrentMark *cm,
+         CMTaskQueue* task_queue, CMTaskQueueSet* task_queues);
+
+  // it prints statistics associated with this task
+  void print_stats();
+
+#if _MARKING_STATS_
+  void increase_objs_found_on_bitmap() { ++_objs_found_on_bitmap; }
+#endif // _MARKING_STATS_
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,336 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_concurrentMarkThread.cpp.incl"
+
+// ======= Concurrent Mark Thread ========
+
+// The CM thread is created when the G1 garbage collector is used
+
+SurrogateLockerThread*
+     ConcurrentMarkThread::_slt = NULL;
+
+ConcurrentMarkThread::ConcurrentMarkThread(ConcurrentMark* cm) :
+  ConcurrentGCThread(),
+  _cm(cm),
+  _started(false),
+  _in_progress(false),
+  _vtime_accum(0.0),
+  _vtime_mark_accum(0.0),
+  _vtime_count_accum(0.0)
+{
+  create_and_start();
+}
+
+class CMCheckpointRootsInitialClosure: public VoidClosure {
+
+  ConcurrentMark* _cm;
+public:
+
+  CMCheckpointRootsInitialClosure(ConcurrentMark* cm) :
+    _cm(cm) {}
+
+  void do_void(){
+    _cm->checkpointRootsInitial();
+  }
+};
+
+class CMCheckpointRootsFinalClosure: public VoidClosure {
+
+  ConcurrentMark* _cm;
+public:
+
+  CMCheckpointRootsFinalClosure(ConcurrentMark* cm) :
+    _cm(cm) {}
+
+  void do_void(){
+    _cm->checkpointRootsFinal(false); // !clear_all_soft_refs
+  }
+};
+
+class CMCleanUp: public VoidClosure {
+  ConcurrentMark* _cm;
+public:
+
+  CMCleanUp(ConcurrentMark* cm) :
+    _cm(cm) {}
+
+  void do_void(){
+    _cm->cleanup();
+  }
+};
+
+
+
+void ConcurrentMarkThread::run() {
+  initialize_in_thread();
+  _vtime_start = os::elapsedVTime();
+  wait_for_universe_init();
+
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  G1CollectorPolicy* g1_policy = g1->g1_policy();
+  G1MMUTracker *mmu_tracker = g1_policy->mmu_tracker();
+  Thread *current_thread = Thread::current();
+
+  while (!_should_terminate) {
+    // wait until started is set.
+    sleepBeforeNextCycle();
+    {
+      ResourceMark rm;
+      HandleMark   hm;
+      double cycle_start = os::elapsedVTime();
+      double mark_start_sec = os::elapsedTime();
+      char verbose_str[128];
+
+      if (PrintGC) {
+        gclog_or_tty->date_stamp(PrintGCDateStamps);
+        gclog_or_tty->stamp(PrintGCTimeStamps);
+        tty->print_cr("[GC concurrent-mark-start]");
+      }
+
+      if (!g1_policy->in_young_gc_mode()) {
+        // this ensures the flag is not set if we bail out of the marking
+        // cycle; normally the flag is cleared immediately after cleanup
+        g1->set_marking_complete();
+
+        if (g1_policy->adaptive_young_list_length()) {
+          double now = os::elapsedTime();
+          double init_prediction_ms = g1_policy->predict_init_time_ms();
+          jlong sleep_time_ms = mmu_tracker->when_ms(now, init_prediction_ms);
+          os::sleep(current_thread, sleep_time_ms, false);
+        }
+
+        // We don't have to skip here if we've been asked to restart, because
+        // in the worst case we just enqueue a new VM operation to start a
+        // marking.  Note that the init operation resets has_aborted()
+        CMCheckpointRootsInitialClosure init_cl(_cm);
+        strcpy(verbose_str, "GC initial-mark");
+        VM_CGC_Operation op(&init_cl, verbose_str);
+        VMThread::execute(&op);
+      }
+
+      int iter = 0;
+      do {
+        iter++;
+        if (!cm()->has_aborted()) {
+          _cm->markFromRoots();
+        } else {
+          if (TraceConcurrentMark)
+            gclog_or_tty->print_cr("CM-skip-mark-from-roots");
+        }
+
+        double mark_end_time = os::elapsedVTime();
+        double mark_end_sec = os::elapsedTime();
+        _vtime_mark_accum += (mark_end_time - cycle_start);
+        if (!cm()->has_aborted()) {
+          if (g1_policy->adaptive_young_list_length()) {
+            double now = os::elapsedTime();
+            double remark_prediction_ms = g1_policy->predict_remark_time_ms();
+            jlong sleep_time_ms = mmu_tracker->when_ms(now, remark_prediction_ms);
+            os::sleep(current_thread, sleep_time_ms, false);
+          }
+
+          if (PrintGC) {
+            gclog_or_tty->date_stamp(PrintGCDateStamps);
+            gclog_or_tty->stamp(PrintGCTimeStamps);
+            gclog_or_tty->print_cr("[GC concurrent-mark-end, %1.7lf sec]",
+                                      mark_end_sec - mark_start_sec);
+          }
+
+          CMCheckpointRootsFinalClosure final_cl(_cm);
+          sprintf(verbose_str, "GC remark");
+          VM_CGC_Operation op(&final_cl, verbose_str);
+          VMThread::execute(&op);
+        } else {
+          if (TraceConcurrentMark)
+            gclog_or_tty->print_cr("CM-skip-remark");
+        }
+        if (cm()->restart_for_overflow() &&
+            G1TraceMarkStackOverflow) {
+          gclog_or_tty->print_cr("Restarting conc marking because of MS overflow "
+                                 "in remark (restart #%d).", iter);
+        }
+
+        if (cm()->restart_for_overflow()) {
+          if (PrintGC) {
+            gclog_or_tty->date_stamp(PrintGCDateStamps);
+            gclog_or_tty->stamp(PrintGCTimeStamps);
+            gclog_or_tty->print_cr("[GC concurrent-mark-restart-for-overflow]");
+          }
+        }
+      } while (cm()->restart_for_overflow());
+      double counting_start_time = os::elapsedVTime();
+
+      // YSR: These look dubious (i.e. redundant) !!! FIX ME
+      slt()->manipulatePLL(SurrogateLockerThread::acquirePLL);
+      slt()->manipulatePLL(SurrogateLockerThread::releaseAndNotifyPLL);
+
+      if (!cm()->has_aborted()) {
+        double count_start_sec = os::elapsedTime();
+        if (PrintGC) {
+          gclog_or_tty->date_stamp(PrintGCDateStamps);
+          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->print_cr("[GC concurrent-count-start]");
+        }
+
+        _sts.join();
+        _cm->calcDesiredRegions();
+        _sts.leave();
+
+        if (!cm()->has_aborted()) {
+          double count_end_sec = os::elapsedTime();
+          if (PrintGC) {
+            gclog_or_tty->date_stamp(PrintGCDateStamps);
+            gclog_or_tty->stamp(PrintGCTimeStamps);
+            gclog_or_tty->print_cr("[GC concurrent-count-end, %1.7lf]",
+                                   count_end_sec - count_start_sec);
+          }
+        }
+      } else {
+        if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-skip-end-game");
+      }
+      double end_time = os::elapsedVTime();
+      _vtime_count_accum += (end_time - counting_start_time);
+      // Update the total virtual time before doing this, since it will try
+      // to measure it to get the vtime for this marking.  We purposely
+      // neglect the presumably-short "completeCleanup" phase here.
+      _vtime_accum = (end_time - _vtime_start);
+      if (!cm()->has_aborted()) {
+        if (g1_policy->adaptive_young_list_length()) {
+          double now = os::elapsedTime();
+          double cleanup_prediction_ms = g1_policy->predict_cleanup_time_ms();
+          jlong sleep_time_ms = mmu_tracker->when_ms(now, cleanup_prediction_ms);
+          os::sleep(current_thread, sleep_time_ms, false);
+        }
+
+        CMCleanUp cl_cl(_cm);
+        sprintf(verbose_str, "GC cleanup");
+        VM_CGC_Operation op(&cl_cl, verbose_str);
+        VMThread::execute(&op);
+      } else {
+        if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-skip-cleanup");
+        G1CollectedHeap::heap()->set_marking_complete();
+      }
+
+      if (!cm()->has_aborted()) {
+        double cleanup_start_sec = os::elapsedTime();
+        if (PrintGC) {
+          gclog_or_tty->date_stamp(PrintGCDateStamps);
+          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->print_cr("[GC concurrent-cleanup-start]");
+        }
+
+        // Now do the remainder of the cleanup operation.
+        _sts.join();
+        _cm->completeCleanup();
+        if (!cm()->has_aborted()) {
+          g1_policy->record_concurrent_mark_cleanup_completed();
+
+          double cleanup_end_sec = os::elapsedTime();
+          if (PrintGC) {
+            gclog_or_tty->date_stamp(PrintGCDateStamps);
+            gclog_or_tty->stamp(PrintGCTimeStamps);
+            gclog_or_tty->print_cr("[GC concurrent-cleanup-end, %1.7lf]",
+                                   cleanup_end_sec - cleanup_start_sec);
+          }
+        }
+        _sts.leave();
+      }
+      // We're done: no more unclean regions coming.
+      G1CollectedHeap::heap()->set_unclean_regions_coming(false);
+
+      if (cm()->has_aborted()) {
+        if (PrintGC) {
+          gclog_or_tty->date_stamp(PrintGCDateStamps);
+          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->print_cr("[GC concurrent-mark-abort]");
+        }
+      }
+
+      _sts.join();
+      _cm->disable_co_trackers();
+      _sts.leave();
+
+      // we now want to allow clearing of the marking bitmap to be
+      // suspended by a collection pause.
+      _sts.join();
+      _cm->clearNextBitmap();
+      _sts.leave();
+    }
+  }
+  assert(_should_terminate, "just checking");
+
+  terminate();
+}
+
+
+void ConcurrentMarkThread::yield() {
+  if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-yield");
+  _sts.yield("Concurrent Mark");
+  if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-yield-end");
+}
+
+void ConcurrentMarkThread::stop() {
+  // it is ok to take late safepoints here, if needed
+  MutexLockerEx mu(Terminator_lock);
+  _should_terminate = true;
+  while (!_has_terminated) {
+    Terminator_lock->wait();
+  }
+  if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-stop");
+}
+
+void ConcurrentMarkThread::print() {
+  gclog_or_tty->print("\"Concurrent Mark GC Thread\" ");
+  Thread::print();
+  gclog_or_tty->cr();
+}
+
+void ConcurrentMarkThread::sleepBeforeNextCycle() {
+  clear_in_progress();
+  // We join here because we don't want to do the "shouldConcurrentMark()"
+  // below while the world is otherwise stopped.
+  MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
+  while (!started()) {
+    if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-sleeping");
+    CGC_lock->wait(Mutex::_no_safepoint_check_flag);
+  }
+  set_in_progress();
+  clear_started();
+  if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-starting");
+
+  return;
+}
+
+// Note: this method, although exported by the ConcurrentMarkSweepThread,
+// which is a non-JavaThread, can only be called by a JavaThread.
+// Currently this is done at vm creation time (post-vm-init) by the
+// main/Primordial (Java)Thread.
+// XXX Consider changing this in the future to allow the CMS thread
+// itself to create this thread?
+void ConcurrentMarkThread::makeSurrogateLockerThread(TRAPS) {
+  assert(_slt == NULL, "SLT already created");
+  _slt = SurrogateLockerThread::make(THREAD);
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// The Concurrent Mark GC Thread (could be several in the future).
+// This is copied from the Concurrent Mark Sweep GC Thread
+// Still under construction.
+
+class ConcurrentMark;
+
+class ConcurrentMarkThread: public ConcurrentGCThread {
+  friend class VMStructs;
+
+  double _vtime_start;  // Initial virtual time.
+  double _vtime_accum;  // Accumulated virtual time.
+
+  double _vtime_mark_accum;
+  double _vtime_count_accum;
+
+ public:
+  virtual void run();
+
+ private:
+  ConcurrentMark*                  _cm;
+  bool                             _started;
+  bool                             _in_progress;
+
+  void sleepBeforeNextCycle();
+
+  static SurrogateLockerThread*         _slt;
+
+ public:
+  // Constructor
+  ConcurrentMarkThread(ConcurrentMark* cm);
+
+  static void makeSurrogateLockerThread(TRAPS);
+  static SurrogateLockerThread* slt() { return _slt; }
+
+  // Printing
+  void print();
+
+  // Total virtual time so far.
+  double vtime_accum();
+  // Marking virtual time so far
+  double vtime_mark_accum();
+  // Counting virtual time so far.
+  double vtime_count_accum() { return _vtime_count_accum; }
+
+  ConcurrentMark* cm()                           { return _cm;     }
+
+  void            set_started()                  { _started = true;   }
+  void            clear_started()                { _started = false;  }
+  bool            started()                      { return _started;   }
+
+  void            set_in_progress()              { _in_progress = true;   }
+  void            clear_in_progress()            { _in_progress = false;  }
+  bool            in_progress()                  { return _in_progress;   }
+
+  // Yield for GC
+  void            yield();
+
+  // shutdown
+  static void stop();
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentMarkThread.inline.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.inline.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+  // Total virtual time so far.
+inline double ConcurrentMarkThread::vtime_accum() {
+  return _vtime_accum + _cm->all_task_accum_vtime();
+}
+
+// Marking virtual time so far
+inline double ConcurrentMarkThread::vtime_mark_accum() {
+  return _vtime_mark_accum + _cm->all_task_accum_vtime();
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentZFThread.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/concurrentZFThread.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,191 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_concurrentZFThread.cpp.incl"
+
+// ======= Concurrent Zero-Fill Thread ========
+
+// The CM thread is created when the G1 garbage collector is used
+
+int ConcurrentZFThread::_region_allocs = 0;
+int ConcurrentZFThread::_sync_zfs = 0;
+int ConcurrentZFThread::_zf_waits = 0;
+int ConcurrentZFThread::_regions_filled = 0;
+
+ConcurrentZFThread::ConcurrentZFThread() :
+  ConcurrentGCThread(),
+  _co_tracker(G1ZFGroup)
+{
+  create_and_start();
+}
+
+void ConcurrentZFThread::wait_for_ZF_completed(HeapRegion* hr) {
+  assert(ZF_mon->owned_by_self(), "Precondition.");
+  note_zf_wait();
+  while (hr->zero_fill_state() == HeapRegion::ZeroFilling) {
+    ZF_mon->wait(Mutex::_no_safepoint_check_flag);
+  }
+}
+
+void ConcurrentZFThread::processHeapRegion(HeapRegion* hr) {
+  assert(!Universe::heap()->is_gc_active(),
+         "This should not happen during GC.");
+  assert(hr != NULL, "Precondition");
+  // These are unlocked reads, but if this test is successful, then no
+  // other thread will attempt this zero filling.  Only a GC thread can
+  // modify the ZF state of a region whose state is zero-filling, and this
+  // should only happen while the ZF thread is locking out GC.
+  if (hr->zero_fill_state() == HeapRegion::ZeroFilling
+      && hr->zero_filler() == Thread::current()) {
+    assert(hr->top() == hr->bottom(), "better be empty!");
+    assert(!hr->isHumongous(), "Only free regions on unclean list.");
+    Copy::fill_to_words(hr->bottom(), hr->capacity()/HeapWordSize);
+    note_region_filled();
+  }
+}
+
+void ConcurrentZFThread::run() {
+  initialize_in_thread();
+  Thread* thr_self = Thread::current();
+  _vtime_start = os::elapsedVTime();
+  wait_for_universe_init();
+  _co_tracker.enable();
+  _co_tracker.start();
+
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  _sts.join();
+  while (!_should_terminate) {
+    _sts.leave();
+
+    {
+      MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+
+      // This local variable will hold a region being zero-filled.  This
+      // region will neither be on the unclean or zero-filled lists, and
+      // will not be available for allocation; thus, we might have an
+      // allocation fail, causing a full GC, because of this, but this is a
+      // price we will pay.  (In future, we might want to make the fact
+      // that there's a region being zero-filled apparent to the G1 heap,
+      // which could then wait for it in this extreme case...)
+      HeapRegion* to_fill;
+
+      while (!g1->should_zf()
+             || (to_fill = g1->pop_unclean_region_list_locked()) == NULL)
+        ZF_mon->wait(Mutex::_no_safepoint_check_flag);
+      while (to_fill->zero_fill_state() == HeapRegion::ZeroFilling)
+        ZF_mon->wait(Mutex::_no_safepoint_check_flag);
+
+      // So now to_fill is non-NULL and is not ZeroFilling.  It might be
+      // Allocated or ZeroFilled.  (The latter could happen if this thread
+      // starts the zero-filling of a region, but a GC intervenes and
+      // pushes new regions needing on the front of the filling on the
+      // front of the list.)
+
+      switch (to_fill->zero_fill_state()) {
+      case HeapRegion::Allocated:
+        to_fill = NULL;
+        break;
+
+      case HeapRegion::NotZeroFilled:
+        to_fill->set_zero_fill_in_progress(thr_self);
+
+        ZF_mon->unlock();
+        _sts.join();
+        processHeapRegion(to_fill);
+        _sts.leave();
+        ZF_mon->lock_without_safepoint_check();
+
+        if (to_fill->zero_fill_state() == HeapRegion::ZeroFilling
+            && to_fill->zero_filler() == thr_self) {
+          to_fill->set_zero_fill_complete();
+          (void)g1->put_free_region_on_list_locked(to_fill);
+        }
+        break;
+
+      case HeapRegion::ZeroFilled:
+        (void)g1->put_free_region_on_list_locked(to_fill);
+        break;
+
+      case HeapRegion::ZeroFilling:
+        ShouldNotReachHere();
+        break;
+      }
+    }
+    _vtime_accum = (os::elapsedVTime() - _vtime_start);
+    _sts.join();
+
+    _co_tracker.update();
+  }
+  _co_tracker.update(false);
+  _sts.leave();
+
+  assert(_should_terminate, "just checking");
+  terminate();
+}
+
+bool ConcurrentZFThread::offer_yield() {
+  if (_sts.should_yield()) {
+    _sts.yield("Concurrent ZF");
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void ConcurrentZFThread::stop() {
+  // it is ok to take late safepoints here, if needed
+  MutexLockerEx mu(Terminator_lock);
+  _should_terminate = true;
+  while (!_has_terminated) {
+    Terminator_lock->wait();
+  }
+}
+
+void ConcurrentZFThread::print() {
+  gclog_or_tty->print("\"Concurrent ZF Thread\" ");
+  Thread::print();
+  gclog_or_tty->cr();
+}
+
+
+double ConcurrentZFThread::_vtime_accum;
+
+void ConcurrentZFThread::print_summary_info() {
+  gclog_or_tty->print("\nConcurrent Zero-Filling:\n");
+  gclog_or_tty->print("  Filled %d regions, used %5.2fs.\n",
+                      _regions_filled,
+                      vtime_accum());
+  gclog_or_tty->print("  Of %d region allocs, %d (%5.2f%%) required sync ZF,\n",
+                      _region_allocs, _sync_zfs,
+                      (_region_allocs > 0 ?
+                       (float)_sync_zfs/(float)_region_allocs*100.0 :
+                       0.0));
+  gclog_or_tty->print("     and %d (%5.2f%%) required a ZF wait.\n",
+                      _zf_waits,
+                      (_region_allocs > 0 ?
+                       (float)_zf_waits/(float)_region_allocs*100.0 :
+                       0.0));
+
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/concurrentZFThread.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// The Concurrent ZF Thread.  Performs concurrent zero-filling.
+
+class ConcurrentZFThread: public ConcurrentGCThread {
+  friend class VMStructs;
+  friend class ZeroFillRegionClosure;
+
+ private:
+
+  // Zero fill the heap region.
+  void processHeapRegion(HeapRegion* r);
+
+  // Stats
+  //   Allocation (protected by heap lock).
+  static int _region_allocs;  // Number of regions allocated
+  static int _sync_zfs;       //   Synchronous zero-fills +
+  static int _zf_waits;      //   Wait for conc zero-fill completion.
+
+  // Number of regions CFZ thread fills.
+  static int _regions_filled;
+
+  COTracker _co_tracker;
+
+  double _vtime_start;  // Initial virtual time.
+
+  // These are static because the "print_summary_info" method is, and
+  // it currently assumes there is only one ZF thread.  We'll change when
+  // we need to.
+  static double _vtime_accum;  // Initial virtual time.
+  static double vtime_accum() { return _vtime_accum; }
+
+  // Offer yield for GC.  Returns true if yield occurred.
+  bool offer_yield();
+
+ public:
+  // Constructor
+  ConcurrentZFThread();
+
+  // Main loop.
+  virtual void run();
+
+  // Printing
+  void print();
+
+  // Waits until "r" has been zero-filled.  Requires caller to hold the
+  // ZF_mon.
+  static void wait_for_ZF_completed(HeapRegion* r);
+
+  // Get or clear the current unclean region.  Should be done
+  // while holding the ZF_needed_mon lock.
+
+  // shutdown
+  static void stop();
+
+  // Stats
+  static void note_region_alloc() {_region_allocs++; }
+  static void note_sync_zfs() { _sync_zfs++; }
+  static void note_zf_wait() { _zf_waits++; }
+  static void note_region_filled() { _regions_filled++; }
+
+  static void print_summary_info();
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,307 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_dirtyCardQueue.cpp.incl"
+
+bool DirtyCardQueue::apply_closure(CardTableEntryClosure* cl,
+                                   bool consume,
+                                   size_t worker_i) {
+  bool res = true;
+  if (_buf != NULL) {
+    res = apply_closure_to_buffer(cl, _buf, _index, _sz,
+                                  consume,
+                                  (int) worker_i);
+    if (res && consume) _index = _sz;
+  }
+  return res;
+}
+
+bool DirtyCardQueue::apply_closure_to_buffer(CardTableEntryClosure* cl,
+                                             void** buf,
+                                             size_t index, size_t sz,
+                                             bool consume,
+                                             int worker_i) {
+  if (cl == NULL) return true;
+  for (size_t i = index; i < sz; i += oopSize) {
+    int ind = byte_index_to_index((int)i);
+    jbyte* card_ptr = (jbyte*)buf[ind];
+    if (card_ptr != NULL) {
+      // Set the entry to null, so we don't do it again (via the test
+      // above) if we reconsider this buffer.
+      if (consume) buf[ind] = NULL;
+      if (!cl->do_card_ptr(card_ptr, worker_i)) return false;
+    }
+  }
+  return true;
+}
+
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+DirtyCardQueueSet::DirtyCardQueueSet() :
+  PtrQueueSet(true /*notify_when_complete*/),
+  _closure(NULL),
+  _shared_dirty_card_queue(this, true /*perm*/),
+  _free_ids(NULL),
+  _processed_buffers_mut(0), _processed_buffers_rs_thread(0)
+{
+  _all_active = true;
+}
+
+size_t DirtyCardQueueSet::num_par_ids() {
+  return MAX2(ParallelGCThreads, (size_t)2);
+}
+
+
+void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                                   int max_completed_queue,
+                                   Mutex* lock) {
+  PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue);
+  set_buffer_size(DCQBarrierQueueBufferSize);
+  set_process_completed_threshold(DCQBarrierProcessCompletedThreshold);
+
+  _shared_dirty_card_queue.set_lock(lock);
+  _free_ids = new FreeIdSet((int) num_par_ids(), _cbl_mon);
+  bool b = _free_ids->claim_perm_id(0);
+  guarantee(b, "Must reserve id zero for concurrent refinement thread.");
+}
+
+void DirtyCardQueueSet::handle_zero_index_for_thread(JavaThread* t) {
+  t->dirty_card_queue().handle_zero_index();
+}
+
+void DirtyCardQueueSet::set_closure(CardTableEntryClosure* closure) {
+  _closure = closure;
+}
+
+void DirtyCardQueueSet::iterate_closure_all_threads(bool consume,
+                                                    size_t worker_i) {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    bool b = t->dirty_card_queue().apply_closure(_closure, consume);
+    guarantee(b, "Should not be interrupted.");
+  }
+  bool b = shared_dirty_card_queue()->apply_closure(_closure,
+                                                    consume,
+                                                    worker_i);
+  guarantee(b, "Should not be interrupted.");
+}
+
+bool DirtyCardQueueSet::mut_process_buffer(void** buf) {
+
+  // Used to determine if we had already claimed a par_id
+  // before entering this method.
+  bool already_claimed = false;
+
+  // We grab the current JavaThread.
+  JavaThread* thread = JavaThread::current();
+
+  // We get the the number of any par_id that this thread
+  // might have already claimed.
+  int worker_i = thread->get_claimed_par_id();
+
+  // If worker_i is not -1 then the thread has already claimed
+  // a par_id. We make note of it using the already_claimed value
+  if (worker_i != -1) {
+    already_claimed = true;
+  } else {
+
+    // Otherwise we need to claim a par id
+    worker_i = _free_ids->claim_par_id();
+
+    // And store the par_id value in the thread
+    thread->set_claimed_par_id(worker_i);
+  }
+
+  bool b = false;
+  if (worker_i != -1) {
+    b = DirtyCardQueue::apply_closure_to_buffer(_closure, buf, 0,
+                                                _sz, true, worker_i);
+    if (b) Atomic::inc(&_processed_buffers_mut);
+
+    // If we had not claimed an id before entering the method
+    // then we must release the id.
+    if (!already_claimed) {
+
+      // we release the id
+      _free_ids->release_par_id(worker_i);
+
+      // and set the claimed_id in the thread to -1
+      thread->set_claimed_par_id(-1);
+    }
+  }
+  return b;
+}
+
+DirtyCardQueueSet::CompletedBufferNode*
+DirtyCardQueueSet::get_completed_buffer_lock(int stop_at) {
+  CompletedBufferNode* nd = NULL;
+  MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+
+  if ((int)_n_completed_buffers <= stop_at) {
+    _process_completed = false;
+    return NULL;
+  }
+
+  if (_completed_buffers_head != NULL) {
+    nd = _completed_buffers_head;
+    _completed_buffers_head = nd->next;
+    if (_completed_buffers_head == NULL)
+      _completed_buffers_tail = NULL;
+    _n_completed_buffers--;
+  }
+  debug_only(assert_completed_buffer_list_len_correct_locked());
+  return nd;
+}
+
+// We only do this in contexts where there is no concurrent enqueueing.
+DirtyCardQueueSet::CompletedBufferNode*
+DirtyCardQueueSet::get_completed_buffer_CAS() {
+  CompletedBufferNode* nd = _completed_buffers_head;
+
+  while (nd != NULL) {
+    CompletedBufferNode* next = nd->next;
+    CompletedBufferNode* result =
+      (CompletedBufferNode*)Atomic::cmpxchg_ptr(next,
+                                                &_completed_buffers_head,
+                                                nd);
+    if (result == nd) {
+      return result;
+    } else {
+      nd = _completed_buffers_head;
+    }
+  }
+  assert(_completed_buffers_head == NULL, "Loop post");
+  _completed_buffers_tail = NULL;
+  return NULL;
+}
+
+bool DirtyCardQueueSet::
+apply_closure_to_completed_buffer_helper(int worker_i,
+                                         CompletedBufferNode* nd) {
+  if (nd != NULL) {
+    bool b =
+      DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf,
+                                              nd->index, _sz,
+                                              true, worker_i);
+    void** buf = nd->buf;
+    delete nd;
+    if (b) {
+      deallocate_buffer(buf);
+      return true;  // In normal case, go on to next buffer.
+    } else {
+      enqueue_complete_buffer(buf, nd->index, true);
+      return false;
+    }
+  } else {
+    return false;
+  }
+}
+
+bool DirtyCardQueueSet::apply_closure_to_completed_buffer(int worker_i,
+                                                          int stop_at,
+                                                          bool with_CAS)
+{
+  CompletedBufferNode* nd = NULL;
+  if (with_CAS) {
+    guarantee(stop_at == 0, "Precondition");
+    nd = get_completed_buffer_CAS();
+  } else {
+    nd = get_completed_buffer_lock(stop_at);
+  }
+  bool res = apply_closure_to_completed_buffer_helper(worker_i, nd);
+  if (res) _processed_buffers_rs_thread++;
+  return res;
+}
+
+void DirtyCardQueueSet::apply_closure_to_all_completed_buffers() {
+  CompletedBufferNode* nd = _completed_buffers_head;
+  while (nd != NULL) {
+    bool b =
+      DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf, 0, _sz,
+                                              false);
+    guarantee(b, "Should not stop early.");
+    nd = nd->next;
+  }
+}
+
+void DirtyCardQueueSet::abandon_logs() {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  CompletedBufferNode* buffers_to_delete = NULL;
+  {
+    MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+    while (_completed_buffers_head != NULL) {
+      CompletedBufferNode* nd = _completed_buffers_head;
+      _completed_buffers_head = nd->next;
+      nd->next = buffers_to_delete;
+      buffers_to_delete = nd;
+    }
+    _n_completed_buffers = 0;
+    _completed_buffers_tail = NULL;
+    debug_only(assert_completed_buffer_list_len_correct_locked());
+  }
+  while (buffers_to_delete != NULL) {
+    CompletedBufferNode* nd = buffers_to_delete;
+    buffers_to_delete = nd->next;
+    deallocate_buffer(nd->buf);
+    delete nd;
+  }
+  // Since abandon is done only at safepoints, we can safely manipulate
+  // these queues.
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->dirty_card_queue().reset();
+  }
+  shared_dirty_card_queue()->reset();
+}
+
+
+void DirtyCardQueueSet::concatenate_logs() {
+  // Iterate over all the threads, if we find a partial log add it to
+  // the global list of logs.  Temporarily turn off the limit on the number
+  // of outstanding buffers.
+  int save_max_completed_queue = _max_completed_queue;
+  _max_completed_queue = max_jint;
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    DirtyCardQueue& dcq = t->dirty_card_queue();
+    if (dcq.size() != 0) {
+      void **buf = t->dirty_card_queue().get_buf();
+      // We must NULL out the unused entries, then enqueue.
+      for (size_t i = 0; i < t->dirty_card_queue().get_index(); i += oopSize) {
+        buf[PtrQueue::byte_index_to_index((int)i)] = NULL;
+      }
+      enqueue_complete_buffer(dcq.get_buf(), dcq.get_index());
+      dcq.reinitialize();
+    }
+  }
+  if (_shared_dirty_card_queue.size() != 0) {
+    enqueue_complete_buffer(_shared_dirty_card_queue.get_buf(),
+                            _shared_dirty_card_queue.get_index());
+    _shared_dirty_card_queue.reinitialize();
+  }
+  // Restore the completed buffer queue limit.
+  _max_completed_queue = save_max_completed_queue;
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class FreeIdSet;
+
+// A closure class for processing card table entries.  Note that we don't
+// require these closure objects to be stack-allocated.
+class CardTableEntryClosure: public CHeapObj {
+public:
+  // Process the card whose card table entry is "card_ptr".  If returns
+  // "false", terminate the iteration early.
+  virtual bool do_card_ptr(jbyte* card_ptr, int worker_i = 0) = 0;
+};
+
+// A ptrQueue whose elements are "oops", pointers to object heads.
+class DirtyCardQueue: public PtrQueue {
+public:
+  DirtyCardQueue(PtrQueueSet* qset_, bool perm = false) :
+    PtrQueue(qset_, perm)
+  {
+    // Dirty card queues are always active.
+    _active = true;
+  }
+  // Apply the closure to all elements, and reset the index to make the
+  // buffer empty.  If a closure application returns "false", return
+  // "false" immediately, halting the iteration.  If "consume" is true,
+  // deletes processed entries from logs.
+  bool apply_closure(CardTableEntryClosure* cl,
+                     bool consume = true,
+                     size_t worker_i = 0);
+
+  // Apply the closure to all elements of "buf", down to "index"
+  // (inclusive.)  If returns "false", then a closure application returned
+  // "false", and we return immediately.  If "consume" is true, entries are
+  // set to NULL as they are processed, so they will not be processed again
+  // later.
+  static bool apply_closure_to_buffer(CardTableEntryClosure* cl,
+                                      void** buf, size_t index, size_t sz,
+                                      bool consume = true,
+                                      int worker_i = 0);
+  void **get_buf() { return _buf;}
+  void set_buf(void **buf) {_buf = buf;}
+  size_t get_index() { return _index;}
+  void reinitialize() { _buf = 0; _sz = 0; _index = 0;}
+};
+
+
+
+class DirtyCardQueueSet: public PtrQueueSet {
+  CardTableEntryClosure* _closure;
+
+  DirtyCardQueue _shared_dirty_card_queue;
+
+  // Override.
+  bool mut_process_buffer(void** buf);
+
+  // Protected by the _cbl_mon.
+  FreeIdSet* _free_ids;
+
+  // The number of completed buffers processed by mutator and rs thread,
+  // respectively.
+  jint _processed_buffers_mut;
+  jint _processed_buffers_rs_thread;
+
+public:
+  DirtyCardQueueSet();
+
+  void initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                  int max_completed_queue = 0,
+                  Mutex* lock = NULL);
+
+  // The number of parallel ids that can be claimed to allow collector or
+  // mutator threads to do card-processing work.
+  static size_t num_par_ids();
+
+  static void handle_zero_index_for_thread(JavaThread* t);
+
+  // Register "blk" as "the closure" for all queues.  Only one such closure
+  // is allowed.  The "apply_closure_to_completed_buffer" method will apply
+  // this closure to a completed buffer, and "iterate_closure_all_threads"
+  // applies it to partially-filled buffers (the latter should only be done
+  // with the world stopped).
+  void set_closure(CardTableEntryClosure* closure);
+
+  // If there is a registered closure for buffers, apply it to all entries
+  // in all currently-active buffers.  This should only be applied at a
+  // safepoint.  (Currently must not be called in parallel; this should
+  // change in the future.)  If "consume" is true, processed entries are
+  // discarded.
+  void iterate_closure_all_threads(bool consume = true,
+                                   size_t worker_i = 0);
+
+  // If there exists some completed buffer, pop it, then apply the
+  // registered closure to all its elements, nulling out those elements
+  // processed.  If all elements are processed, returns "true".  If no
+  // completed buffers exist, returns false.  If a completed buffer exists,
+  // but is only partially completed before a "yield" happens, the
+  // partially completed buffer (with its processed elements set to NULL)
+  // is returned to the completed buffer set, and this call returns false.
+  bool apply_closure_to_completed_buffer(int worker_i = 0,
+                                         int stop_at = 0,
+                                         bool with_CAS = false);
+  bool apply_closure_to_completed_buffer_helper(int worker_i,
+                                                CompletedBufferNode* nd);
+
+  CompletedBufferNode* get_completed_buffer_CAS();
+  CompletedBufferNode* get_completed_buffer_lock(int stop_at);
+  // Applies the current closure to all completed buffers,
+  // non-consumptively.
+  void apply_closure_to_all_completed_buffers();
+
+  DirtyCardQueue* shared_dirty_card_queue() {
+    return &_shared_dirty_card_queue;
+  }
+
+  // If a full collection is happening, reset partial logs, and ignore
+  // completed ones: the full collection will make them all irrelevant.
+  void abandon_logs();
+
+  // If any threads have partial logs, add them to the global list of logs.
+  void concatenate_logs();
+  void clear_n_completed_buffers() { _n_completed_buffers = 0;}
+
+  jint processed_buffers_mut() {
+    return _processed_buffers_mut;
+  }
+  jint processed_buffers_rs_thread() {
+    return _processed_buffers_rs_thread;
+  }
+
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,624 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1BlockOffsetTable.cpp.incl"
+
+//////////////////////////////////////////////////////////////////////
+// G1BlockOffsetSharedArray
+//////////////////////////////////////////////////////////////////////
+
+G1BlockOffsetSharedArray::G1BlockOffsetSharedArray(MemRegion reserved,
+                                                   size_t init_word_size) :
+  _reserved(reserved), _end(NULL)
+{
+  size_t size = compute_size(reserved.word_size());
+  ReservedSpace rs(ReservedSpace::allocation_align_size_up(size));
+  if (!rs.is_reserved()) {
+    vm_exit_during_initialization("Could not reserve enough space for heap offset array");
+  }
+  if (!_vs.initialize(rs, 0)) {
+    vm_exit_during_initialization("Could not reserve enough space for heap offset array");
+  }
+  _offset_array = (u_char*)_vs.low_boundary();
+  resize(init_word_size);
+  if (TraceBlockOffsetTable) {
+    gclog_or_tty->print_cr("G1BlockOffsetSharedArray::G1BlockOffsetSharedArray: ");
+    gclog_or_tty->print_cr("  "
+                  "  rs.base(): " INTPTR_FORMAT
+                  "  rs.size(): " INTPTR_FORMAT
+                  "  rs end(): " INTPTR_FORMAT,
+                  rs.base(), rs.size(), rs.base() + rs.size());
+    gclog_or_tty->print_cr("  "
+                  "  _vs.low_boundary(): " INTPTR_FORMAT
+                  "  _vs.high_boundary(): " INTPTR_FORMAT,
+                  _vs.low_boundary(),
+                  _vs.high_boundary());
+  }
+}
+
+void G1BlockOffsetSharedArray::resize(size_t new_word_size) {
+  assert(new_word_size <= _reserved.word_size(), "Resize larger than reserved");
+  size_t new_size = compute_size(new_word_size);
+  size_t old_size = _vs.committed_size();
+  size_t delta;
+  char* high = _vs.high();
+  _end = _reserved.start() + new_word_size;
+  if (new_size > old_size) {
+    delta = ReservedSpace::page_align_size_up(new_size - old_size);
+    assert(delta > 0, "just checking");
+    if (!_vs.expand_by(delta)) {
+      // Do better than this for Merlin
+      vm_exit_out_of_memory(delta, "offset table expansion");
+    }
+    assert(_vs.high() == high + delta, "invalid expansion");
+    // Initialization of the contents is left to the
+    // G1BlockOffsetArray that uses it.
+  } else {
+    delta = ReservedSpace::page_align_size_down(old_size - new_size);
+    if (delta == 0) return;
+    _vs.shrink_by(delta);
+    assert(_vs.high() == high - delta, "invalid expansion");
+  }
+}
+
+bool G1BlockOffsetSharedArray::is_card_boundary(HeapWord* p) const {
+  assert(p >= _reserved.start(), "just checking");
+  size_t delta = pointer_delta(p, _reserved.start());
+  return (delta & right_n_bits(LogN_words)) == (size_t)NoBits;
+}
+
+
+//////////////////////////////////////////////////////////////////////
+// G1BlockOffsetArray
+//////////////////////////////////////////////////////////////////////
+
+G1BlockOffsetArray::G1BlockOffsetArray(G1BlockOffsetSharedArray* array,
+                                       MemRegion mr, bool init_to_zero) :
+  G1BlockOffsetTable(mr.start(), mr.end()),
+  _unallocated_block(_bottom),
+  _array(array), _csp(NULL),
+  _init_to_zero(init_to_zero) {
+  assert(_bottom <= _end, "arguments out of order");
+  if (!_init_to_zero) {
+    // initialize cards to point back to mr.start()
+    set_remainder_to_point_to_start(mr.start() + N_words, mr.end());
+    _array->set_offset_array(0, 0);  // set first card to 0
+  }
+}
+
+void G1BlockOffsetArray::set_space(Space* sp) {
+  _sp = sp;
+  _csp = sp->toContiguousSpace();
+}
+
+// The arguments follow the normal convention of denoting
+// a right-open interval: [start, end)
+void
+G1BlockOffsetArray:: set_remainder_to_point_to_start(HeapWord* start, HeapWord* end) {
+
+  if (start >= end) {
+    // The start address is equal to the end address (or to
+    // the right of the end address) so there are not cards
+    // that need to be updated..
+    return;
+  }
+
+  // Write the backskip value for each region.
+  //
+  //    offset
+  //    card             2nd                       3rd
+  //     | +- 1st        |                         |
+  //     v v             v                         v
+  //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+     +-+-+-+-+-+-+-+-+-+-+-
+  //    |x|0|0|0|0|0|0|0|1|1|1|1|1|1| ... |1|1|1|1|2|2|2|2|2|2| ...
+  //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+     +-+-+-+-+-+-+-+-+-+-+-
+  //    11              19                        75
+  //      12
+  //
+  //    offset card is the card that points to the start of an object
+  //      x - offset value of offset card
+  //    1st - start of first logarithmic region
+  //      0 corresponds to logarithmic value N_words + 0 and 2**(3 * 0) = 1
+  //    2nd - start of second logarithmic region
+  //      1 corresponds to logarithmic value N_words + 1 and 2**(3 * 1) = 8
+  //    3rd - start of third logarithmic region
+  //      2 corresponds to logarithmic value N_words + 2 and 2**(3 * 2) = 64
+  //
+  //    integer below the block offset entry is an example of
+  //    the index of the entry
+  //
+  //    Given an address,
+  //      Find the index for the address
+  //      Find the block offset table entry
+  //      Convert the entry to a back slide
+  //        (e.g., with today's, offset = 0x81 =>
+  //          back slip = 2**(3*(0x81 - N_words)) = 2**3) = 8
+  //      Move back N (e.g., 8) entries and repeat with the
+  //        value of the new entry
+  //
+  size_t start_card = _array->index_for(start);
+  size_t end_card = _array->index_for(end-1);
+  assert(start ==_array->address_for_index(start_card), "Precondition");
+  assert(end ==_array->address_for_index(end_card)+N_words, "Precondition");
+  set_remainder_to_point_to_start_incl(start_card, end_card); // closed interval
+}
+
+// Unlike the normal convention in this code, the argument here denotes
+// a closed, inclusive interval: [start_card, end_card], cf set_remainder_to_point_to_start()
+// above.
+void
+G1BlockOffsetArray::set_remainder_to_point_to_start_incl(size_t start_card, size_t end_card) {
+  if (start_card > end_card) {
+    return;
+  }
+  assert(start_card > _array->index_for(_bottom), "Cannot be first card");
+  assert(_array->offset_array(start_card-1) <= N_words,
+    "Offset card has an unexpected value");
+  size_t start_card_for_region = start_card;
+  u_char offset = max_jubyte;
+  for (int i = 0; i < BlockOffsetArray::N_powers; i++) {
+    // -1 so that the the card with the actual offset is counted.  Another -1
+    // so that the reach ends in this region and not at the start
+    // of the next.
+    size_t reach = start_card - 1 + (BlockOffsetArray::power_to_cards_back(i+1) - 1);
+    offset = N_words + i;
+    if (reach >= end_card) {
+      _array->set_offset_array(start_card_for_region, end_card, offset);
+      start_card_for_region = reach + 1;
+      break;
+    }
+    _array->set_offset_array(start_card_for_region, reach, offset);
+    start_card_for_region = reach + 1;
+  }
+  assert(start_card_for_region > end_card, "Sanity check");
+  DEBUG_ONLY(check_all_cards(start_card, end_card);)
+}
+
+// The block [blk_start, blk_end) has been allocated;
+// adjust the block offset table to represent this information;
+// right-open interval: [blk_start, blk_end)
+void
+G1BlockOffsetArray::alloc_block(HeapWord* blk_start, HeapWord* blk_end) {
+  mark_block(blk_start, blk_end);
+  allocated(blk_start, blk_end);
+}
+
+// Adjust BOT to show that a previously whole block has been split
+// into two.
+void G1BlockOffsetArray::split_block(HeapWord* blk, size_t blk_size,
+                                     size_t left_blk_size) {
+  // Verify that the BOT shows [blk, blk + blk_size) to be one block.
+  verify_single_block(blk, blk_size);
+  // Update the BOT to indicate that [blk + left_blk_size, blk + blk_size)
+  // is one single block.
+  mark_block(blk + left_blk_size, blk + blk_size);
+}
+
+
+// Action_mark - update the BOT for the block [blk_start, blk_end).
+//               Current typical use is for splitting a block.
+// Action_single - udpate the BOT for an allocation.
+// Action_verify - BOT verification.
+void G1BlockOffsetArray::do_block_internal(HeapWord* blk_start,
+                                           HeapWord* blk_end,
+                                           Action action) {
+  assert(Universe::heap()->is_in_reserved(blk_start),
+         "reference must be into the heap");
+  assert(Universe::heap()->is_in_reserved(blk_end-1),
+         "limit must be within the heap");
+  // This is optimized to make the test fast, assuming we only rarely
+  // cross boundaries.
+  uintptr_t end_ui = (uintptr_t)(blk_end - 1);
+  uintptr_t start_ui = (uintptr_t)blk_start;
+  // Calculate the last card boundary preceding end of blk
+  intptr_t boundary_before_end = (intptr_t)end_ui;
+  clear_bits(boundary_before_end, right_n_bits(LogN));
+  if (start_ui <= (uintptr_t)boundary_before_end) {
+    // blk starts at or crosses a boundary
+    // Calculate index of card on which blk begins
+    size_t    start_index = _array->index_for(blk_start);
+    // Index of card on which blk ends
+    size_t    end_index   = _array->index_for(blk_end - 1);
+    // Start address of card on which blk begins
+    HeapWord* boundary    = _array->address_for_index(start_index);
+    assert(boundary <= blk_start, "blk should start at or after boundary");
+    if (blk_start != boundary) {
+      // blk starts strictly after boundary
+      // adjust card boundary and start_index forward to next card
+      boundary += N_words;
+      start_index++;
+    }
+    assert(start_index <= end_index, "monotonicity of index_for()");
+    assert(boundary <= (HeapWord*)boundary_before_end, "tautology");
+    switch (action) {
+      case Action_mark: {
+        if (init_to_zero()) {
+          _array->set_offset_array(start_index, boundary, blk_start);
+          break;
+        } // Else fall through to the next case
+      }
+      case Action_single: {
+        _array->set_offset_array(start_index, boundary, blk_start);
+        // We have finished marking the "offset card". We need to now
+        // mark the subsequent cards that this blk spans.
+        if (start_index < end_index) {
+          HeapWord* rem_st = _array->address_for_index(start_index) + N_words;
+          HeapWord* rem_end = _array->address_for_index(end_index) + N_words;
+          set_remainder_to_point_to_start(rem_st, rem_end);
+        }
+        break;
+      }
+      case Action_check: {
+        _array->check_offset_array(start_index, boundary, blk_start);
+        // We have finished checking the "offset card". We need to now
+        // check the subsequent cards that this blk spans.
+        check_all_cards(start_index + 1, end_index);
+        break;
+      }
+      default:
+        ShouldNotReachHere();
+    }
+  }
+}
+
+// The card-interval [start_card, end_card] is a closed interval; this
+// is an expensive check -- use with care and only under protection of
+// suitable flag.
+void G1BlockOffsetArray::check_all_cards(size_t start_card, size_t end_card) const {
+
+  if (end_card < start_card) {
+    return;
+  }
+  guarantee(_array->offset_array(start_card) == N_words, "Wrong value in second card");
+  for (size_t c = start_card + 1; c <= end_card; c++ /* yeah! */) {
+    u_char entry = _array->offset_array(c);
+    if (c - start_card > BlockOffsetArray::power_to_cards_back(1)) {
+      guarantee(entry > N_words, "Should be in logarithmic region");
+    }
+    size_t backskip = BlockOffsetArray::entry_to_cards_back(entry);
+    size_t landing_card = c - backskip;
+    guarantee(landing_card >= (start_card - 1), "Inv");
+    if (landing_card >= start_card) {
+      guarantee(_array->offset_array(landing_card) <= entry, "monotonicity");
+    } else {
+      guarantee(landing_card == start_card - 1, "Tautology");
+      guarantee(_array->offset_array(landing_card) <= N_words, "Offset value");
+    }
+  }
+}
+
+// The range [blk_start, blk_end) represents a single contiguous block
+// of storage; modify the block offset table to represent this
+// information; Right-open interval: [blk_start, blk_end)
+// NOTE: this method does _not_ adjust _unallocated_block.
+void
+G1BlockOffsetArray::single_block(HeapWord* blk_start, HeapWord* blk_end) {
+  do_block_internal(blk_start, blk_end, Action_single);
+}
+
+// Mark the BOT such that if [blk_start, blk_end) straddles a card
+// boundary, the card following the first such boundary is marked
+// with the appropriate offset.
+// NOTE: this method does _not_ adjust _unallocated_block or
+// any cards subsequent to the first one.
+void
+G1BlockOffsetArray::mark_block(HeapWord* blk_start, HeapWord* blk_end) {
+  do_block_internal(blk_start, blk_end, Action_mark);
+}
+
+void G1BlockOffsetArray::join_blocks(HeapWord* blk1, HeapWord* blk2) {
+  HeapWord* blk1_start = Universe::heap()->block_start(blk1);
+  HeapWord* blk2_start = Universe::heap()->block_start(blk2);
+  assert(blk1 == blk1_start && blk2 == blk2_start,
+         "Must be block starts.");
+  assert(blk1 + _sp->block_size(blk1) == blk2, "Must be contiguous.");
+  size_t blk1_start_index = _array->index_for(blk1);
+  size_t blk2_start_index = _array->index_for(blk2);
+  assert(blk1_start_index <= blk2_start_index, "sanity");
+  HeapWord* blk2_card_start = _array->address_for_index(blk2_start_index);
+  if (blk2 == blk2_card_start) {
+    // blk2 starts a card.  Does blk1 start on the prevous card, or futher
+    // back?
+    assert(blk1_start_index < blk2_start_index, "must be lower card.");
+    if (blk1_start_index + 1 == blk2_start_index) {
+      // previous card; new value for blk2 card is size of blk1.
+      _array->set_offset_array(blk2_start_index, (u_char) _sp->block_size(blk1));
+    } else {
+      // Earlier card; go back a card.
+      _array->set_offset_array(blk2_start_index, N_words);
+    }
+  } else {
+    // blk2 does not start a card.  Does it cross a card?  If not, nothing
+    // to do.
+    size_t blk2_end_index =
+      _array->index_for(blk2 + _sp->block_size(blk2) - 1);
+    assert(blk2_end_index >= blk2_start_index, "sanity");
+    if (blk2_end_index > blk2_start_index) {
+      // Yes, it crosses a card.  The value for the next card must change.
+      if (blk1_start_index + 1 == blk2_start_index) {
+        // previous card; new value for second blk2 card is size of blk1.
+        _array->set_offset_array(blk2_start_index + 1,
+                                 (u_char) _sp->block_size(blk1));
+      } else {
+        // Earlier card; go back a card.
+        _array->set_offset_array(blk2_start_index + 1, N_words);
+      }
+    }
+  }
+}
+
+HeapWord* G1BlockOffsetArray::block_start_unsafe(const void* addr) {
+  assert(_bottom <= addr && addr < _end,
+         "addr must be covered by this Array");
+  // Must read this exactly once because it can be modified by parallel
+  // allocation.
+  HeapWord* ub = _unallocated_block;
+  if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) {
+    assert(ub < _end, "tautology (see above)");
+    return ub;
+  }
+  // Otherwise, find the block start using the table.
+  HeapWord* q = block_at_or_preceding(addr, false, 0);
+  return forward_to_block_containing_addr(q, addr);
+}
+
+// This duplicates a little code from the above: unavoidable.
+HeapWord*
+G1BlockOffsetArray::block_start_unsafe_const(const void* addr) const {
+  assert(_bottom <= addr && addr < _end,
+         "addr must be covered by this Array");
+  // Must read this exactly once because it can be modified by parallel
+  // allocation.
+  HeapWord* ub = _unallocated_block;
+  if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) {
+    assert(ub < _end, "tautology (see above)");
+    return ub;
+  }
+  // Otherwise, find the block start using the table.
+  HeapWord* q = block_at_or_preceding(addr, false, 0);
+  HeapWord* n = q + _sp->block_size(q);
+  return forward_to_block_containing_addr_const(q, n, addr);
+}
+
+
+HeapWord*
+G1BlockOffsetArray::forward_to_block_containing_addr_slow(HeapWord* q,
+                                                          HeapWord* n,
+                                                          const void* addr) {
+  // We're not in the normal case.  We need to handle an important subcase
+  // here: LAB allocation.  An allocation previously recorded in the
+  // offset table was actually a lab allocation, and was divided into
+  // several objects subsequently.  Fix this situation as we answer the
+  // query, by updating entries as we cross them.
+  size_t next_index = _array->index_for(n) + 1;
+  HeapWord* next_boundary = _array->address_for_index(next_index);
+  if (csp() != NULL) {
+    if (addr >= csp()->top()) return csp()->top();
+    while (next_boundary < addr) {
+      while (n <= next_boundary) {
+        q = n;
+        oop obj = oop(q);
+        if (obj->klass() == NULL) return q;
+        n += obj->size();
+      }
+      assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
+      // [q, n) is the block that crosses the boundary.
+      alloc_block_work2(&next_boundary, &next_index, q, n);
+    }
+  } else {
+    while (next_boundary < addr) {
+      while (n <= next_boundary) {
+        q = n;
+        oop obj = oop(q);
+        if (obj->klass() == NULL) return q;
+        n += _sp->block_size(q);
+      }
+      assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
+      // [q, n) is the block that crosses the boundary.
+      alloc_block_work2(&next_boundary, &next_index, q, n);
+    }
+  }
+  return forward_to_block_containing_addr_const(q, n, addr);
+}
+
+HeapWord* G1BlockOffsetArray::block_start_careful(const void* addr) const {
+  assert(_array->offset_array(0) == 0, "objects can't cross covered areas");
+
+  assert(_bottom <= addr && addr < _end,
+         "addr must be covered by this Array");
+  // Must read this exactly once because it can be modified by parallel
+  // allocation.
+  HeapWord* ub = _unallocated_block;
+  if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) {
+    assert(ub < _end, "tautology (see above)");
+    return ub;
+  }
+
+  // Otherwise, find the block start using the table, but taking
+  // care (cf block_start_unsafe() above) not to parse any objects/blocks
+  // on the cards themsleves.
+  size_t index = _array->index_for(addr);
+  assert(_array->address_for_index(index) == addr,
+         "arg should be start of card");
+
+  HeapWord* q = (HeapWord*)addr;
+  uint offset;
+  do {
+    offset = _array->offset_array(index--);
+    q -= offset;
+  } while (offset == N_words);
+  assert(q <= addr, "block start should be to left of arg");
+  return q;
+}
+
+// Note that the committed size of the covered space may have changed,
+// so the table size might also wish to change.
+void G1BlockOffsetArray::resize(size_t new_word_size) {
+  HeapWord* new_end = _bottom + new_word_size;
+  if (_end < new_end && !init_to_zero()) {
+    // verify that the old and new boundaries are also card boundaries
+    assert(_array->is_card_boundary(_end),
+           "_end not a card boundary");
+    assert(_array->is_card_boundary(new_end),
+           "new _end would not be a card boundary");
+    // set all the newly added cards
+    _array->set_offset_array(_end, new_end, N_words);
+  }
+  _end = new_end;  // update _end
+}
+
+void G1BlockOffsetArray::set_region(MemRegion mr) {
+  _bottom = mr.start();
+  _end = mr.end();
+}
+
+//
+//              threshold_
+//              |   _index_
+//              v   v
+//      +-------+-------+-------+-------+-------+
+//      | i-1   |   i   | i+1   | i+2   | i+3   |
+//      +-------+-------+-------+-------+-------+
+//       ( ^    ]
+//         block-start
+//
+void G1BlockOffsetArray::alloc_block_work2(HeapWord** threshold_, size_t* index_,
+                                           HeapWord* blk_start, HeapWord* blk_end) {
+  // For efficiency, do copy-in/copy-out.
+  HeapWord* threshold = *threshold_;
+  size_t    index = *index_;
+
+  assert(blk_start != NULL && blk_end > blk_start,
+         "phantom block");
+  assert(blk_end > threshold, "should be past threshold");
+  assert(blk_start <= threshold, "blk_start should be at or before threshold")
+  assert(pointer_delta(threshold, blk_start) <= N_words,
+         "offset should be <= BlockOffsetSharedArray::N");
+  assert(Universe::heap()->is_in_reserved(blk_start),
+         "reference must be into the heap");
+  assert(Universe::heap()->is_in_reserved(blk_end-1),
+         "limit must be within the heap");
+  assert(threshold == _array->_reserved.start() + index*N_words,
+         "index must agree with threshold");
+
+  DEBUG_ONLY(size_t orig_index = index;)
+
+  // Mark the card that holds the offset into the block.  Note
+  // that _next_offset_index and _next_offset_threshold are not
+  // updated until the end of this method.
+  _array->set_offset_array(index, threshold, blk_start);
+
+  // We need to now mark the subsequent cards that this blk spans.
+
+  // Index of card on which blk ends.
+  size_t end_index   = _array->index_for(blk_end - 1);
+
+  // Are there more cards left to be updated?
+  if (index + 1 <= end_index) {
+    HeapWord* rem_st  = _array->address_for_index(index + 1);
+    // Calculate rem_end this way because end_index
+    // may be the last valid index in the covered region.
+    HeapWord* rem_end = _array->address_for_index(end_index) +  N_words;
+    set_remainder_to_point_to_start(rem_st, rem_end);
+  }
+
+  index = end_index + 1;
+  // Calculate threshold_ this way because end_index
+  // may be the last valid index in the covered region.
+  threshold = _array->address_for_index(end_index) + N_words;
+  assert(threshold >= blk_end, "Incorrect offset threshold");
+
+  // index_ and threshold_ updated here.
+  *threshold_ = threshold;
+  *index_ = index;
+
+#ifdef ASSERT
+  // The offset can be 0 if the block starts on a boundary.  That
+  // is checked by an assertion above.
+  size_t start_index = _array->index_for(blk_start);
+  HeapWord* boundary    = _array->address_for_index(start_index);
+  assert((_array->offset_array(orig_index) == 0 &&
+          blk_start == boundary) ||
+          (_array->offset_array(orig_index) > 0 &&
+         _array->offset_array(orig_index) <= N_words),
+         "offset array should have been set");
+  for (size_t j = orig_index + 1; j <= end_index; j++) {
+    assert(_array->offset_array(j) > 0 &&
+           _array->offset_array(j) <=
+             (u_char) (N_words+BlockOffsetArray::N_powers-1),
+           "offset array should have been set");
+  }
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////
+// G1BlockOffsetArrayContigSpace
+//////////////////////////////////////////////////////////////////////
+
+HeapWord*
+G1BlockOffsetArrayContigSpace::block_start_unsafe(const void* addr) {
+  assert(_bottom <= addr && addr < _end,
+         "addr must be covered by this Array");
+  HeapWord* q = block_at_or_preceding(addr, true, _next_offset_index-1);
+  return forward_to_block_containing_addr(q, addr);
+}
+
+HeapWord*
+G1BlockOffsetArrayContigSpace::
+block_start_unsafe_const(const void* addr) const {
+  assert(_bottom <= addr && addr < _end,
+         "addr must be covered by this Array");
+  HeapWord* q = block_at_or_preceding(addr, true, _next_offset_index-1);
+  HeapWord* n = q + _sp->block_size(q);
+  return forward_to_block_containing_addr_const(q, n, addr);
+}
+
+G1BlockOffsetArrayContigSpace::
+G1BlockOffsetArrayContigSpace(G1BlockOffsetSharedArray* array,
+                              MemRegion mr) :
+  G1BlockOffsetArray(array, mr, true)
+{
+  _next_offset_threshold = NULL;
+  _next_offset_index = 0;
+}
+
+HeapWord* G1BlockOffsetArrayContigSpace::initialize_threshold() {
+  assert(!Universe::heap()->is_in_reserved(_array->_offset_array),
+         "just checking");
+  _next_offset_index = _array->index_for(_bottom);
+  _next_offset_index++;
+  _next_offset_threshold =
+    _array->address_for_index(_next_offset_index);
+  return _next_offset_threshold;
+}
+
+void G1BlockOffsetArrayContigSpace::zero_bottom_entry() {
+  assert(!Universe::heap()->is_in_reserved(_array->_offset_array),
+         "just checking");
+  size_t bottom_index = _array->index_for(_bottom);
+  assert(_array->address_for_index(bottom_index) == _bottom,
+         "Precondition of call");
+  _array->set_offset_array(bottom_index, 0);
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,487 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// The CollectedHeap type requires subtypes to implement a method
+// "block_start".  For some subtypes, notably generational
+// systems using card-table-based write barriers, the efficiency of this
+// operation may be important.  Implementations of the "BlockOffsetArray"
+// class may be useful in providing such efficient implementations.
+//
+// While generally mirroring the structure of the BOT for GenCollectedHeap,
+// the following types are tailored more towards G1's uses; these should,
+// however, be merged back into a common BOT to avoid code duplication
+// and reduce maintenance overhead.
+//
+//    G1BlockOffsetTable (abstract)
+//    -- G1BlockOffsetArray                (uses G1BlockOffsetSharedArray)
+//       -- G1BlockOffsetArrayContigSpace
+//
+// A main impediment to the consolidation of this code might be the
+// effect of making some of the block_start*() calls non-const as
+// below. Whether that might adversely affect performance optimizations
+// that compilers might normally perform in the case of non-G1
+// collectors needs to be carefully investigated prior to any such
+// consolidation.
+
+// Forward declarations
+class ContiguousSpace;
+class G1BlockOffsetSharedArray;
+
+class G1BlockOffsetTable VALUE_OBJ_CLASS_SPEC {
+  friend class VMStructs;
+protected:
+  // These members describe the region covered by the table.
+
+  // The space this table is covering.
+  HeapWord* _bottom;    // == reserved.start
+  HeapWord* _end;       // End of currently allocated region.
+
+public:
+  // Initialize the table to cover the given space.
+  // The contents of the initial table are undefined.
+  G1BlockOffsetTable(HeapWord* bottom, HeapWord* end) :
+    _bottom(bottom), _end(end)
+    {
+      assert(_bottom <= _end, "arguments out of order");
+    }
+
+  // Note that the committed size of the covered space may have changed,
+  // so the table size might also wish to change.
+  virtual void resize(size_t new_word_size) = 0;
+
+  virtual void set_bottom(HeapWord* new_bottom) {
+    assert(new_bottom <= _end, "new_bottom > _end");
+    _bottom = new_bottom;
+    resize(pointer_delta(_end, _bottom));
+  }
+
+  // Requires "addr" to be contained by a block, and returns the address of
+  // the start of that block.  (May have side effects, namely updating of
+  // shared array entries that "point" too far backwards.  This can occur,
+  // for example, when LAB allocation is used in a space covered by the
+  // table.)
+  virtual HeapWord* block_start_unsafe(const void* addr) = 0;
+  // Same as above, but does not have any of the possible side effects
+  // discussed above.
+  virtual HeapWord* block_start_unsafe_const(const void* addr) const = 0;
+
+  // Returns the address of the start of the block containing "addr", or
+  // else "null" if it is covered by no block.  (May have side effects,
+  // namely updating of shared array entries that "point" too far
+  // backwards.  This can occur, for example, when lab allocation is used
+  // in a space covered by the table.)
+  inline HeapWord* block_start(const void* addr);
+  // Same as above, but does not have any of the possible side effects
+  // discussed above.
+  inline HeapWord* block_start_const(const void* addr) const;
+};
+
+// This implementation of "G1BlockOffsetTable" divides the covered region
+// into "N"-word subregions (where "N" = 2^"LogN".  An array with an entry
+// for each such subregion indicates how far back one must go to find the
+// start of the chunk that includes the first word of the subregion.
+//
+// Each BlockOffsetArray is owned by a Space.  However, the actual array
+// may be shared by several BlockOffsetArrays; this is useful
+// when a single resizable area (such as a generation) is divided up into
+// several spaces in which contiguous allocation takes place,
+// such as, for example, in G1 or in the train generation.)
+
+// Here is the shared array type.
+
+class G1BlockOffsetSharedArray: public CHeapObj {
+  friend class G1BlockOffsetArray;
+  friend class G1BlockOffsetArrayContigSpace;
+  friend class VMStructs;
+
+private:
+  // The reserved region covered by the shared array.
+  MemRegion _reserved;
+
+  // End of the current committed region.
+  HeapWord* _end;
+
+  // Array for keeping offsets for retrieving object start fast given an
+  // address.
+  VirtualSpace _vs;
+  u_char* _offset_array;          // byte array keeping backwards offsets
+
+  // Bounds checking accessors:
+  // For performance these have to devolve to array accesses in product builds.
+  u_char offset_array(size_t index) const {
+    assert(index < _vs.committed_size(), "index out of range");
+    return _offset_array[index];
+  }
+
+  void set_offset_array(size_t index, u_char offset) {
+    assert(index < _vs.committed_size(), "index out of range");
+    assert(offset <= N_words, "offset too large");
+    _offset_array[index] = offset;
+  }
+
+  void set_offset_array(size_t index, HeapWord* high, HeapWord* low) {
+    assert(index < _vs.committed_size(), "index out of range");
+    assert(high >= low, "addresses out of order");
+    assert(pointer_delta(high, low) <= N_words, "offset too large");
+    _offset_array[index] = (u_char) pointer_delta(high, low);
+  }
+
+  void set_offset_array(HeapWord* left, HeapWord* right, u_char offset) {
+    assert(index_for(right - 1) < _vs.committed_size(),
+           "right address out of range");
+    assert(left  < right, "Heap addresses out of order");
+    size_t num_cards = pointer_delta(right, left) >> LogN_words;
+    memset(&_offset_array[index_for(left)], offset, num_cards);
+  }
+
+  void set_offset_array(size_t left, size_t right, u_char offset) {
+    assert(right < _vs.committed_size(), "right address out of range");
+    assert(left  <= right, "indexes out of order");
+    size_t num_cards = right - left + 1;
+    memset(&_offset_array[left], offset, num_cards);
+  }
+
+  void check_offset_array(size_t index, HeapWord* high, HeapWord* low) const {
+    assert(index < _vs.committed_size(), "index out of range");
+    assert(high >= low, "addresses out of order");
+    assert(pointer_delta(high, low) <= N_words, "offset too large");
+    assert(_offset_array[index] == pointer_delta(high, low),
+           "Wrong offset");
+  }
+
+  bool is_card_boundary(HeapWord* p) const;
+
+  // Return the number of slots needed for an offset array
+  // that covers mem_region_words words.
+  // We always add an extra slot because if an object
+  // ends on a card boundary we put a 0 in the next
+  // offset array slot, so we want that slot always
+  // to be reserved.
+
+  size_t compute_size(size_t mem_region_words) {
+    size_t number_of_slots = (mem_region_words / N_words) + 1;
+    return ReservedSpace::page_align_size_up(number_of_slots);
+  }
+
+public:
+  enum SomePublicConstants {
+    LogN = 9,
+    LogN_words = LogN - LogHeapWordSize,
+    N_bytes = 1 << LogN,
+    N_words = 1 << LogN_words
+  };
+
+  // Initialize the table to cover from "base" to (at least)
+  // "base + init_word_size".  In the future, the table may be expanded
+  // (see "resize" below) up to the size of "_reserved" (which must be at
+  // least "init_word_size".) The contents of the initial table are
+  // undefined; it is the responsibility of the constituent
+  // G1BlockOffsetTable(s) to initialize cards.
+  G1BlockOffsetSharedArray(MemRegion reserved, size_t init_word_size);
+
+  // Notes a change in the committed size of the region covered by the
+  // table.  The "new_word_size" may not be larger than the size of the
+  // reserved region this table covers.
+  void resize(size_t new_word_size);
+
+  void set_bottom(HeapWord* new_bottom);
+
+  // Updates all the BlockOffsetArray's sharing this shared array to
+  // reflect the current "top"'s of their spaces.
+  void update_offset_arrays();
+
+  // Return the appropriate index into "_offset_array" for "p".
+  inline size_t index_for(const void* p) const;
+
+  // Return the address indicating the start of the region corresponding to
+  // "index" in "_offset_array".
+  inline HeapWord* address_for_index(size_t index) const;
+};
+
+// And here is the G1BlockOffsetTable subtype that uses the array.
+
+class G1BlockOffsetArray: public G1BlockOffsetTable {
+  friend class G1BlockOffsetSharedArray;
+  friend class G1BlockOffsetArrayContigSpace;
+  friend class VMStructs;
+private:
+  enum SomePrivateConstants {
+    N_words = G1BlockOffsetSharedArray::N_words,
+    LogN    = G1BlockOffsetSharedArray::LogN
+  };
+
+  // The following enums are used by do_block_helper
+  enum Action {
+    Action_single,      // BOT records a single block (see single_block())
+    Action_mark,        // BOT marks the start of a block (see mark_block())
+    Action_check        // Check that BOT records block correctly
+                        // (see verify_single_block()).
+  };
+
+  // This is the array, which can be shared by several BlockOffsetArray's
+  // servicing different
+  G1BlockOffsetSharedArray* _array;
+
+  // The space that owns this subregion.
+  Space* _sp;
+
+  // If "_sp" is a contiguous space, the field below is the view of "_sp"
+  // as a contiguous space, else NULL.
+  ContiguousSpace* _csp;
+
+  // If true, array entries are initialized to 0; otherwise, they are
+  // initialized to point backwards to the beginning of the covered region.
+  bool _init_to_zero;
+
+  // The portion [_unallocated_block, _sp.end()) of the space that
+  // is a single block known not to contain any objects.
+  // NOTE: See BlockOffsetArrayUseUnallocatedBlock flag.
+  HeapWord* _unallocated_block;
+
+  // Sets the entries
+  // corresponding to the cards starting at "start" and ending at "end"
+  // to point back to the card before "start": the interval [start, end)
+  // is right-open.
+  void set_remainder_to_point_to_start(HeapWord* start, HeapWord* end);
+  // Same as above, except that the args here are a card _index_ interval
+  // that is closed: [start_index, end_index]
+  void set_remainder_to_point_to_start_incl(size_t start, size_t end);
+
+  // A helper function for BOT adjustment/verification work
+  void do_block_internal(HeapWord* blk_start, HeapWord* blk_end, Action action);
+
+protected:
+
+  ContiguousSpace* csp() const { return _csp; }
+
+  // Returns the address of a block whose start is at most "addr".
+  // If "has_max_index" is true, "assumes "max_index" is the last valid one
+  // in the array.
+  inline HeapWord* block_at_or_preceding(const void* addr,
+                                         bool has_max_index,
+                                         size_t max_index) const;
+
+  // "q" is a block boundary that is <= "addr"; "n" is the address of the
+  // next block (or the end of the space.)  Return the address of the
+  // beginning of the block that contains "addr".  Does so without side
+  // effects (see, e.g., spec of  block_start.)
+  inline HeapWord*
+  forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n,
+                                         const void* addr) const;
+
+  // "q" is a block boundary that is <= "addr"; return the address of the
+  // beginning of the block that contains "addr".  May have side effects
+  // on "this", by updating imprecise entries.
+  inline HeapWord* forward_to_block_containing_addr(HeapWord* q,
+                                                    const void* addr);
+
+  // "q" is a block boundary that is <= "addr"; "n" is the address of the
+  // next block (or the end of the space.)  Return the address of the
+  // beginning of the block that contains "addr".  May have side effects
+  // on "this", by updating imprecise entries.
+  HeapWord* forward_to_block_containing_addr_slow(HeapWord* q,
+                                                  HeapWord* n,
+                                                  const void* addr);
+
+  // Requires that "*threshold_" be the first array entry boundary at or
+  // above "blk_start", and that "*index_" be the corresponding array
+  // index.  If the block starts at or crosses "*threshold_", records
+  // "blk_start" as the appropriate block start for the array index
+  // starting at "*threshold_", and for any other indices crossed by the
+  // block.  Updates "*threshold_" and "*index_" to correspond to the first
+  // index after the block end.
+  void alloc_block_work2(HeapWord** threshold_, size_t* index_,
+                         HeapWord* blk_start, HeapWord* blk_end);
+
+public:
+  // The space may not have it's bottom and top set yet, which is why the
+  // region is passed as a parameter.  If "init_to_zero" is true, the
+  // elements of the array are initialized to zero.  Otherwise, they are
+  // initialized to point backwards to the beginning.
+  G1BlockOffsetArray(G1BlockOffsetSharedArray* array, MemRegion mr,
+                     bool init_to_zero);
+
+  // Note: this ought to be part of the constructor, but that would require
+  // "this" to be passed as a parameter to a member constructor for
+  // the containing concrete subtype of Space.
+  // This would be legal C++, but MS VC++ doesn't allow it.
+  void set_space(Space* sp);
+
+  // Resets the covered region to the given "mr".
+  void set_region(MemRegion mr);
+
+  // Resets the covered region to one with the same _bottom as before but
+  // the "new_word_size".
+  void resize(size_t new_word_size);
+
+  // These must be guaranteed to work properly (i.e., do nothing)
+  // when "blk_start" ("blk" for second version) is "NULL".
+  virtual void alloc_block(HeapWord* blk_start, HeapWord* blk_end);
+  virtual void alloc_block(HeapWord* blk, size_t size) {
+    alloc_block(blk, blk + size);
+  }
+
+  // The following methods are useful and optimized for a
+  // general, non-contiguous space.
+
+  // The given arguments are required to be the starts of adjacent ("blk1"
+  // before "blk2") well-formed blocks covered by "this".  After this call,
+  // they should be considered to form one block.
+  virtual void join_blocks(HeapWord* blk1, HeapWord* blk2);
+
+  // Given a block [blk_start, blk_start + full_blk_size), and
+  // a left_blk_size < full_blk_size, adjust the BOT to show two
+  // blocks [blk_start, blk_start + left_blk_size) and
+  // [blk_start + left_blk_size, blk_start + full_blk_size).
+  // It is assumed (and verified in the non-product VM) that the
+  // BOT was correct for the original block.
+  void split_block(HeapWord* blk_start, size_t full_blk_size,
+                           size_t left_blk_size);
+
+  // Adjust the BOT to show that it has a single block in the
+  // range [blk_start, blk_start + size). All necessary BOT
+  // cards are adjusted, but _unallocated_block isn't.
+  void single_block(HeapWord* blk_start, HeapWord* blk_end);
+  void single_block(HeapWord* blk, size_t size) {
+    single_block(blk, blk + size);
+  }
+
+  // Adjust BOT to show that it has a block in the range
+  // [blk_start, blk_start + size). Only the first card
+  // of BOT is touched. It is assumed (and verified in the
+  // non-product VM) that the remaining cards of the block
+  // are correct.
+  void mark_block(HeapWord* blk_start, HeapWord* blk_end);
+  void mark_block(HeapWord* blk, size_t size) {
+    mark_block(blk, blk + size);
+  }
+
+  // Adjust _unallocated_block to indicate that a particular
+  // block has been newly allocated or freed. It is assumed (and
+  // verified in the non-product VM) that the BOT is correct for
+  // the given block.
+  inline void allocated(HeapWord* blk_start, HeapWord* blk_end) {
+    // Verify that the BOT shows [blk, blk + blk_size) to be one block.
+    verify_single_block(blk_start, blk_end);
+    if (BlockOffsetArrayUseUnallocatedBlock) {
+      _unallocated_block = MAX2(_unallocated_block, blk_end);
+    }
+  }
+
+  inline void allocated(HeapWord* blk, size_t size) {
+    allocated(blk, blk + size);
+  }
+
+  inline void freed(HeapWord* blk_start, HeapWord* blk_end);
+
+  inline void freed(HeapWord* blk, size_t size);
+
+  virtual HeapWord* block_start_unsafe(const void* addr);
+  virtual HeapWord* block_start_unsafe_const(const void* addr) const;
+
+  // Requires "addr" to be the start of a card and returns the
+  // start of the block that contains the given address.
+  HeapWord* block_start_careful(const void* addr) const;
+
+  // If true, initialize array slots with no allocated blocks to zero.
+  // Otherwise, make them point back to the front.
+  bool init_to_zero() { return _init_to_zero; }
+
+  // Verification & debugging - ensure that the offset table reflects the fact
+  // that the block [blk_start, blk_end) or [blk, blk + size) is a
+  // single block of storage. NOTE: can;t const this because of
+  // call to non-const do_block_internal() below.
+  inline void verify_single_block(HeapWord* blk_start, HeapWord* blk_end) {
+    if (VerifyBlockOffsetArray) {
+      do_block_internal(blk_start, blk_end, Action_check);
+    }
+  }
+
+  inline void verify_single_block(HeapWord* blk, size_t size) {
+    verify_single_block(blk, blk + size);
+  }
+
+  // Verify that the given block is before _unallocated_block
+  inline void verify_not_unallocated(HeapWord* blk_start,
+                                     HeapWord* blk_end) const {
+    if (BlockOffsetArrayUseUnallocatedBlock) {
+      assert(blk_start < blk_end, "Block inconsistency?");
+      assert(blk_end <= _unallocated_block, "_unallocated_block problem");
+    }
+  }
+
+  inline void verify_not_unallocated(HeapWord* blk, size_t size) const {
+    verify_not_unallocated(blk, blk + size);
+  }
+
+  void check_all_cards(size_t left_card, size_t right_card) const;
+};
+
+// A subtype of BlockOffsetArray that takes advantage of the fact
+// that its underlying space is a ContiguousSpace, so that its "active"
+// region can be more efficiently tracked (than for a non-contiguous space).
+class G1BlockOffsetArrayContigSpace: public G1BlockOffsetArray {
+  friend class VMStructs;
+
+  // allocation boundary at which offset array must be updated
+  HeapWord* _next_offset_threshold;
+  size_t    _next_offset_index;      // index corresponding to that boundary
+
+  // Work function to be called when allocation start crosses the next
+  // threshold in the contig space.
+  void alloc_block_work1(HeapWord* blk_start, HeapWord* blk_end) {
+    alloc_block_work2(&_next_offset_threshold, &_next_offset_index,
+                      blk_start, blk_end);
+  }
+
+
+ public:
+  G1BlockOffsetArrayContigSpace(G1BlockOffsetSharedArray* array, MemRegion mr);
+
+  // Initialize the threshold to reflect the first boundary after the
+  // bottom of the covered region.
+  HeapWord* initialize_threshold();
+
+  // Zero out the entry for _bottom (offset will be zero).
+  void      zero_bottom_entry();
+
+  // Return the next threshold, the point at which the table should be
+  // updated.
+  HeapWord* threshold() const { return _next_offset_threshold; }
+
+  // These must be guaranteed to work properly (i.e., do nothing)
+  // when "blk_start" ("blk" for second version) is "NULL".  In this
+  // implementation, that's true because NULL is represented as 0, and thus
+  // never exceeds the "_next_offset_threshold".
+  void alloc_block(HeapWord* blk_start, HeapWord* blk_end) {
+    if (blk_end > _next_offset_threshold)
+      alloc_block_work1(blk_start, blk_end);
+  }
+  void alloc_block(HeapWord* blk, size_t size) {
+     alloc_block(blk, blk+size);
+  }
+
+  HeapWord* block_start_unsafe(const void* addr);
+  HeapWord* block_start_unsafe_const(const void* addr) const;
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,153 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+inline HeapWord* G1BlockOffsetTable::block_start(const void* addr) {
+  if (addr >= _bottom && addr < _end) {
+    return block_start_unsafe(addr);
+  } else {
+    return NULL;
+  }
+}
+
+inline HeapWord*
+G1BlockOffsetTable::block_start_const(const void* addr) const {
+  if (addr >= _bottom && addr < _end) {
+    return block_start_unsafe_const(addr);
+  } else {
+    return NULL;
+  }
+}
+
+inline size_t G1BlockOffsetSharedArray::index_for(const void* p) const {
+  char* pc = (char*)p;
+  assert(pc >= (char*)_reserved.start() &&
+         pc <  (char*)_reserved.end(),
+         "p not in range.");
+  size_t delta = pointer_delta(pc, _reserved.start(), sizeof(char));
+  size_t result = delta >> LogN;
+  assert(result < _vs.committed_size(), "bad index from address");
+  return result;
+}
+
+inline HeapWord*
+G1BlockOffsetSharedArray::address_for_index(size_t index) const {
+  assert(index < _vs.committed_size(), "bad index");
+  HeapWord* result = _reserved.start() + (index << LogN_words);
+  assert(result >= _reserved.start() && result < _reserved.end(),
+         "bad address from index");
+  return result;
+}
+
+inline HeapWord*
+G1BlockOffsetArray::block_at_or_preceding(const void* addr,
+                                          bool has_max_index,
+                                          size_t max_index) const {
+  assert(_array->offset_array(0) == 0, "objects can't cross covered areas");
+  size_t index = _array->index_for(addr);
+  // We must make sure that the offset table entry we use is valid.  If
+  // "addr" is past the end, start at the last known one and go forward.
+  if (has_max_index) {
+    index = MIN2(index, max_index);
+  }
+  HeapWord* q = _array->address_for_index(index);
+
+  uint offset = _array->offset_array(index);  // Extend u_char to uint.
+  while (offset >= N_words) {
+    // The excess of the offset from N_words indicates a power of Base
+    // to go back by.
+    size_t n_cards_back = BlockOffsetArray::entry_to_cards_back(offset);
+    q -= (N_words * n_cards_back);
+    assert(q >= _sp->bottom(), "Went below bottom!");
+    index -= n_cards_back;
+    offset = _array->offset_array(index);
+  }
+  assert(offset < N_words, "offset too large");
+  q -= offset;
+  return q;
+}
+
+inline HeapWord*
+G1BlockOffsetArray::
+forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n,
+                                       const void* addr) const {
+  if (csp() != NULL) {
+    if (addr >= csp()->top()) return csp()->top();
+    while (n <= addr) {
+      q = n;
+      oop obj = oop(q);
+      if (obj->klass() == NULL) return q;
+      n += obj->size();
+    }
+  } else {
+    while (n <= addr) {
+      q = n;
+      oop obj = oop(q);
+      if (obj->klass() == NULL) return q;
+      n += _sp->block_size(q);
+    }
+  }
+  assert(q <= n, "wrong order for q and addr");
+  assert(addr < n, "wrong order for addr and n");
+  return q;
+}
+
+inline HeapWord*
+G1BlockOffsetArray::forward_to_block_containing_addr(HeapWord* q,
+                                                     const void* addr) {
+  if (oop(q)->klass() == NULL) return q;
+  HeapWord* n = q + _sp->block_size(q);
+  // In the normal case, where the query "addr" is a card boundary, and the
+  // offset table chunks are the same size as cards, the block starting at
+  // "q" will contain addr, so the test below will fail, and we'll fall
+  // through quickly.
+  if (n <= addr) {
+    q = forward_to_block_containing_addr_slow(q, n, addr);
+  }
+  assert(q <= addr, "wrong order for current and arg");
+  return q;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// BlockOffsetArrayNonContigSpace inlines
+//////////////////////////////////////////////////////////////////////////
+inline void G1BlockOffsetArray::freed(HeapWord* blk_start, HeapWord* blk_end) {
+  // Verify that the BOT shows [blk_start, blk_end) to be one block.
+  verify_single_block(blk_start, blk_end);
+  // adjust _unallocated_block upward or downward
+  // as appropriate
+  if (BlockOffsetArrayUseUnallocatedBlock) {
+    assert(_unallocated_block <= _end,
+           "Inconsistent value for _unallocated_block");
+    if (blk_end >= _unallocated_block && blk_start <= _unallocated_block) {
+      // CMS-specific note: a block abutting _unallocated_block to
+      // its left is being freed, a new block is being added or
+      // we are resetting following a compaction
+      _unallocated_block = blk_start;
+    }
+  }
+}
+
+inline void G1BlockOffsetArray::freed(HeapWord* blk, size_t size) {
+  freed(blk, blk + size);
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,5355 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1CollectedHeap.cpp.incl"
+
+// turn it on so that the contents of the young list (scan-only /
+// to-be-collected) are printed at "strategic" points before / during
+// / after the collection --- this is useful for debugging
+#define SCAN_ONLY_VERBOSE 0
+// CURRENT STATUS
+// This file is under construction.  Search for "FIXME".
+
+// INVARIANTS/NOTES
+//
+// All allocation activity covered by the G1CollectedHeap interface is
+//   serialized by acquiring the HeapLock.  This happens in
+//   mem_allocate_work, which all such allocation functions call.
+//   (Note that this does not apply to TLAB allocation, which is not part
+//   of this interface: it is done by clients of this interface.)
+
+// Local to this file.
+
+// Finds the first HeapRegion.
+// No longer used, but might be handy someday.
+
+class FindFirstRegionClosure: public HeapRegionClosure {
+  HeapRegion* _a_region;
+public:
+  FindFirstRegionClosure() : _a_region(NULL) {}
+  bool doHeapRegion(HeapRegion* r) {
+    _a_region = r;
+    return true;
+  }
+  HeapRegion* result() { return _a_region; }
+};
+
+
+class RefineCardTableEntryClosure: public CardTableEntryClosure {
+  SuspendibleThreadSet* _sts;
+  G1RemSet* _g1rs;
+  ConcurrentG1Refine* _cg1r;
+  bool _concurrent;
+public:
+  RefineCardTableEntryClosure(SuspendibleThreadSet* sts,
+                              G1RemSet* g1rs,
+                              ConcurrentG1Refine* cg1r) :
+    _sts(sts), _g1rs(g1rs), _cg1r(cg1r), _concurrent(true)
+  {}
+  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
+    _g1rs->concurrentRefineOneCard(card_ptr, worker_i);
+    if (_concurrent && _sts->should_yield()) {
+      // Caller will actually yield.
+      return false;
+    }
+    // Otherwise, we finished successfully; return true.
+    return true;
+  }
+  void set_concurrent(bool b) { _concurrent = b; }
+};
+
+
+class ClearLoggedCardTableEntryClosure: public CardTableEntryClosure {
+  int _calls;
+  G1CollectedHeap* _g1h;
+  CardTableModRefBS* _ctbs;
+  int _histo[256];
+public:
+  ClearLoggedCardTableEntryClosure() :
+    _calls(0)
+  {
+    _g1h = G1CollectedHeap::heap();
+    _ctbs = (CardTableModRefBS*)_g1h->barrier_set();
+    for (int i = 0; i < 256; i++) _histo[i] = 0;
+  }
+  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
+    if (_g1h->is_in_reserved(_ctbs->addr_for(card_ptr))) {
+      _calls++;
+      unsigned char* ujb = (unsigned char*)card_ptr;
+      int ind = (int)(*ujb);
+      _histo[ind]++;
+      *card_ptr = -1;
+    }
+    return true;
+  }
+  int calls() { return _calls; }
+  void print_histo() {
+    gclog_or_tty->print_cr("Card table value histogram:");
+    for (int i = 0; i < 256; i++) {
+      if (_histo[i] != 0) {
+        gclog_or_tty->print_cr("  %d: %d", i, _histo[i]);
+      }
+    }
+  }
+};
+
+class RedirtyLoggedCardTableEntryClosure: public CardTableEntryClosure {
+  int _calls;
+  G1CollectedHeap* _g1h;
+  CardTableModRefBS* _ctbs;
+public:
+  RedirtyLoggedCardTableEntryClosure() :
+    _calls(0)
+  {
+    _g1h = G1CollectedHeap::heap();
+    _ctbs = (CardTableModRefBS*)_g1h->barrier_set();
+  }
+  bool do_card_ptr(jbyte* card_ptr, int worker_i) {
+    if (_g1h->is_in_reserved(_ctbs->addr_for(card_ptr))) {
+      _calls++;
+      *card_ptr = 0;
+    }
+    return true;
+  }
+  int calls() { return _calls; }
+};
+
+YoungList::YoungList(G1CollectedHeap* g1h)
+  : _g1h(g1h), _head(NULL),
+    _scan_only_head(NULL), _scan_only_tail(NULL), _curr_scan_only(NULL),
+    _length(0), _scan_only_length(0),
+    _last_sampled_rs_lengths(0),
+    _survivor_head(NULL), _survivors_tail(NULL), _survivor_length(0)
+{
+  guarantee( check_list_empty(false), "just making sure..." );
+}
+
+void YoungList::push_region(HeapRegion *hr) {
+  assert(!hr->is_young(), "should not already be young");
+  assert(hr->get_next_young_region() == NULL, "cause it should!");
+
+  hr->set_next_young_region(_head);
+  _head = hr;
+
+  hr->set_young();
+  double yg_surv_rate = _g1h->g1_policy()->predict_yg_surv_rate((int)_length);
+  ++_length;
+}
+
+void YoungList::add_survivor_region(HeapRegion* hr) {
+  assert(!hr->is_survivor(), "should not already be for survived");
+  assert(hr->get_next_young_region() == NULL, "cause it should!");
+
+  hr->set_next_young_region(_survivor_head);
+  if (_survivor_head == NULL) {
+    _survivors_tail = hr;
+  }
+  _survivor_head = hr;
+
+  hr->set_survivor();
+  ++_survivor_length;
+}
+
+HeapRegion* YoungList::pop_region() {
+  while (_head != NULL) {
+    assert( length() > 0, "list should not be empty" );
+    HeapRegion* ret = _head;
+    _head = ret->get_next_young_region();
+    ret->set_next_young_region(NULL);
+    --_length;
+    assert(ret->is_young(), "region should be very young");
+
+    // Replace 'Survivor' region type with 'Young'. So the region will
+    // be treated as a young region and will not be 'confused' with
+    // newly created survivor regions.
+    if (ret->is_survivor()) {
+      ret->set_young();
+    }
+
+    if (!ret->is_scan_only()) {
+      return ret;
+    }
+
+    // scan-only, we'll add it to the scan-only list
+    if (_scan_only_tail == NULL) {
+      guarantee( _scan_only_head == NULL, "invariant" );
+
+      _scan_only_head = ret;
+      _curr_scan_only = ret;
+    } else {
+      guarantee( _scan_only_head != NULL, "invariant" );
+      _scan_only_tail->set_next_young_region(ret);
+    }
+    guarantee( ret->get_next_young_region() == NULL, "invariant" );
+    _scan_only_tail = ret;
+
+    // no need to be tagged as scan-only any more
+    ret->set_young();
+
+    ++_scan_only_length;
+  }
+  assert( length() == 0, "list should be empty" );
+  return NULL;
+}
+
+void YoungList::empty_list(HeapRegion* list) {
+  while (list != NULL) {
+    HeapRegion* next = list->get_next_young_region();
+    list->set_next_young_region(NULL);
+    list->uninstall_surv_rate_group();
+    list->set_not_young();
+    list = next;
+  }
+}
+
+void YoungList::empty_list() {
+  assert(check_list_well_formed(), "young list should be well formed");
+
+  empty_list(_head);
+  _head = NULL;
+  _length = 0;
+
+  empty_list(_scan_only_head);
+  _scan_only_head = NULL;
+  _scan_only_tail = NULL;
+  _scan_only_length = 0;
+  _curr_scan_only = NULL;
+
+  empty_list(_survivor_head);
+  _survivor_head = NULL;
+  _survivors_tail = NULL;
+  _survivor_length = 0;
+
+  _last_sampled_rs_lengths = 0;
+
+  assert(check_list_empty(false), "just making sure...");
+}
+
+bool YoungList::check_list_well_formed() {
+  bool ret = true;
+
+  size_t length = 0;
+  HeapRegion* curr = _head;
+  HeapRegion* last = NULL;
+  while (curr != NULL) {
+    if (!curr->is_young() || curr->is_scan_only()) {
+      gclog_or_tty->print_cr("### YOUNG REGION "PTR_FORMAT"-"PTR_FORMAT" "
+                             "incorrectly tagged (%d, %d)",
+                             curr->bottom(), curr->end(),
+                             curr->is_young(), curr->is_scan_only());
+      ret = false;
+    }
+    ++length;
+    last = curr;
+    curr = curr->get_next_young_region();
+  }
+  ret = ret && (length == _length);
+
+  if (!ret) {
+    gclog_or_tty->print_cr("### YOUNG LIST seems not well formed!");
+    gclog_or_tty->print_cr("###   list has %d entries, _length is %d",
+                           length, _length);
+  }
+
+  bool scan_only_ret = true;
+  length = 0;
+  curr = _scan_only_head;
+  last = NULL;
+  while (curr != NULL) {
+    if (!curr->is_young() || curr->is_scan_only()) {
+      gclog_or_tty->print_cr("### SCAN-ONLY REGION "PTR_FORMAT"-"PTR_FORMAT" "
+                             "incorrectly tagged (%d, %d)",
+                             curr->bottom(), curr->end(),
+                             curr->is_young(), curr->is_scan_only());
+      scan_only_ret = false;
+    }
+    ++length;
+    last = curr;
+    curr = curr->get_next_young_region();
+  }
+  scan_only_ret = scan_only_ret && (length == _scan_only_length);
+
+  if ( (last != _scan_only_tail) ||
+       (_scan_only_head == NULL && _scan_only_tail != NULL) ||
+       (_scan_only_head != NULL && _scan_only_tail == NULL) ) {
+     gclog_or_tty->print_cr("## _scan_only_tail is set incorrectly");
+     scan_only_ret = false;
+  }
+
+  if (_curr_scan_only != NULL && _curr_scan_only != _scan_only_head) {
+    gclog_or_tty->print_cr("### _curr_scan_only is set incorrectly");
+    scan_only_ret = false;
+   }
+
+  if (!scan_only_ret) {
+    gclog_or_tty->print_cr("### SCAN-ONLY LIST seems not well formed!");
+    gclog_or_tty->print_cr("###   list has %d entries, _scan_only_length is %d",
+                  length, _scan_only_length);
+  }
+
+  return ret && scan_only_ret;
+}
+
+bool YoungList::check_list_empty(bool ignore_scan_only_list,
+                                 bool check_sample) {
+  bool ret = true;
+
+  if (_length != 0) {
+    gclog_or_tty->print_cr("### YOUNG LIST should have 0 length, not %d",
+                  _length);
+    ret = false;
+  }
+  if (check_sample && _last_sampled_rs_lengths != 0) {
+    gclog_or_tty->print_cr("### YOUNG LIST has non-zero last sampled RS lengths");
+    ret = false;
+  }
+  if (_head != NULL) {
+    gclog_or_tty->print_cr("### YOUNG LIST does not have a NULL head");
+    ret = false;
+  }
+  if (!ret) {
+    gclog_or_tty->print_cr("### YOUNG LIST does not seem empty");
+  }
+
+  if (ignore_scan_only_list)
+    return ret;
+
+  bool scan_only_ret = true;
+  if (_scan_only_length != 0) {
+    gclog_or_tty->print_cr("### SCAN-ONLY LIST should have 0 length, not %d",
+                  _scan_only_length);
+    scan_only_ret = false;
+  }
+  if (_scan_only_head != NULL) {
+    gclog_or_tty->print_cr("### SCAN-ONLY LIST does not have a NULL head");
+     scan_only_ret = false;
+  }
+  if (_scan_only_tail != NULL) {
+    gclog_or_tty->print_cr("### SCAN-ONLY LIST does not have a NULL tail");
+    scan_only_ret = false;
+  }
+  if (!scan_only_ret) {
+    gclog_or_tty->print_cr("### SCAN-ONLY LIST does not seem empty");
+  }
+
+  return ret && scan_only_ret;
+}
+
+void
+YoungList::rs_length_sampling_init() {
+  _sampled_rs_lengths = 0;
+  _curr               = _head;
+}
+
+bool
+YoungList::rs_length_sampling_more() {
+  return _curr != NULL;
+}
+
+void
+YoungList::rs_length_sampling_next() {
+  assert( _curr != NULL, "invariant" );
+  _sampled_rs_lengths += _curr->rem_set()->occupied();
+  _curr = _curr->get_next_young_region();
+  if (_curr == NULL) {
+    _last_sampled_rs_lengths = _sampled_rs_lengths;
+    // gclog_or_tty->print_cr("last sampled RS lengths = %d", _last_sampled_rs_lengths);
+  }
+}
+
+void
+YoungList::reset_auxilary_lists() {
+  // We could have just "moved" the scan-only list to the young list.
+  // However, the scan-only list is ordered according to the region
+  // age in descending order, so, by moving one entry at a time, we
+  // ensure that it is recreated in ascending order.
+
+  guarantee( is_empty(), "young list should be empty" );
+  assert(check_list_well_formed(), "young list should be well formed");
+
+  // Add survivor regions to SurvRateGroup.
+  _g1h->g1_policy()->note_start_adding_survivor_regions();
+  for (HeapRegion* curr = _survivor_head;
+       curr != NULL;
+       curr = curr->get_next_young_region()) {
+    _g1h->g1_policy()->set_region_survivors(curr);
+  }
+  _g1h->g1_policy()->note_stop_adding_survivor_regions();
+
+  if (_survivor_head != NULL) {
+    _head           = _survivor_head;
+    _length         = _survivor_length + _scan_only_length;
+    _survivors_tail->set_next_young_region(_scan_only_head);
+  } else {
+    _head           = _scan_only_head;
+    _length         = _scan_only_length;
+  }
+
+  for (HeapRegion* curr = _scan_only_head;
+       curr != NULL;
+       curr = curr->get_next_young_region()) {
+    curr->recalculate_age_in_surv_rate_group();
+  }
+  _scan_only_head   = NULL;
+  _scan_only_tail   = NULL;
+  _scan_only_length = 0;
+  _curr_scan_only   = NULL;
+
+  _survivor_head    = NULL;
+  _survivors_tail   = NULL;
+  _survivor_length  = 0;
+  _g1h->g1_policy()->finished_recalculating_age_indexes();
+
+  assert(check_list_well_formed(), "young list should be well formed");
+}
+
+void YoungList::print() {
+  HeapRegion* lists[] = {_head,   _scan_only_head, _survivor_head};
+  const char* names[] = {"YOUNG", "SCAN-ONLY",     "SURVIVOR"};
+
+  for (unsigned int list = 0; list < ARRAY_SIZE(lists); ++list) {
+    gclog_or_tty->print_cr("%s LIST CONTENTS", names[list]);
+    HeapRegion *curr = lists[list];
+    if (curr == NULL)
+      gclog_or_tty->print_cr("  empty");
+    while (curr != NULL) {
+      gclog_or_tty->print_cr("  [%08x-%08x], t: %08x, P: %08x, N: %08x, C: %08x, "
+                             "age: %4d, y: %d, s-o: %d, surv: %d",
+                             curr->bottom(), curr->end(),
+                             curr->top(),
+                             curr->prev_top_at_mark_start(),
+                             curr->next_top_at_mark_start(),
+                             curr->top_at_conc_mark_count(),
+                             curr->age_in_surv_rate_group_cond(),
+                             curr->is_young(),
+                             curr->is_scan_only(),
+                             curr->is_survivor());
+      curr = curr->get_next_young_region();
+    }
+  }
+
+  gclog_or_tty->print_cr("");
+}
+
+void G1CollectedHeap::stop_conc_gc_threads() {
+  _cg1r->cg1rThread()->stop();
+  _czft->stop();
+  _cmThread->stop();
+}
+
+
+void G1CollectedHeap::check_ct_logs_at_safepoint() {
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  CardTableModRefBS* ct_bs = (CardTableModRefBS*)barrier_set();
+
+  // Count the dirty cards at the start.
+  CountNonCleanMemRegionClosure count1(this);
+  ct_bs->mod_card_iterate(&count1);
+  int orig_count = count1.n();
+
+  // First clear the logged cards.
+  ClearLoggedCardTableEntryClosure clear;
+  dcqs.set_closure(&clear);
+  dcqs.apply_closure_to_all_completed_buffers();
+  dcqs.iterate_closure_all_threads(false);
+  clear.print_histo();
+
+  // Now ensure that there's no dirty cards.
+  CountNonCleanMemRegionClosure count2(this);
+  ct_bs->mod_card_iterate(&count2);
+  if (count2.n() != 0) {
+    gclog_or_tty->print_cr("Card table has %d entries; %d originally",
+                           count2.n(), orig_count);
+  }
+  guarantee(count2.n() == 0, "Card table should be clean.");
+
+  RedirtyLoggedCardTableEntryClosure redirty;
+  JavaThread::dirty_card_queue_set().set_closure(&redirty);
+  dcqs.apply_closure_to_all_completed_buffers();
+  dcqs.iterate_closure_all_threads(false);
+  gclog_or_tty->print_cr("Log entries = %d, dirty cards = %d.",
+                         clear.calls(), orig_count);
+  guarantee(redirty.calls() == clear.calls(),
+            "Or else mechanism is broken.");
+
+  CountNonCleanMemRegionClosure count3(this);
+  ct_bs->mod_card_iterate(&count3);
+  if (count3.n() != orig_count) {
+    gclog_or_tty->print_cr("Should have restored them all: orig = %d, final = %d.",
+                           orig_count, count3.n());
+    guarantee(count3.n() >= orig_count, "Should have restored them all.");
+  }
+
+  JavaThread::dirty_card_queue_set().set_closure(_refine_cte_cl);
+}
+
+// Private class members.
+
+G1CollectedHeap* G1CollectedHeap::_g1h;
+
+// Private methods.
+
+// Finds a HeapRegion that can be used to allocate a given size of block.
+
+
+HeapRegion* G1CollectedHeap::newAllocRegion_work(size_t word_size,
+                                                 bool do_expand,
+                                                 bool zero_filled) {
+  ConcurrentZFThread::note_region_alloc();
+  HeapRegion* res = alloc_free_region_from_lists(zero_filled);
+  if (res == NULL && do_expand) {
+    expand(word_size * HeapWordSize);
+    res = alloc_free_region_from_lists(zero_filled);
+    assert(res == NULL ||
+           (!res->isHumongous() &&
+            (!zero_filled ||
+             res->zero_fill_state() == HeapRegion::Allocated)),
+           "Alloc Regions must be zero filled (and non-H)");
+  }
+  if (res != NULL && res->is_empty()) _free_regions--;
+  assert(res == NULL ||
+         (!res->isHumongous() &&
+          (!zero_filled ||
+           res->zero_fill_state() == HeapRegion::Allocated)),
+         "Non-young alloc Regions must be zero filled (and non-H)");
+
+  if (G1TraceRegions) {
+    if (res != NULL) {
+      gclog_or_tty->print_cr("new alloc region %d:["PTR_FORMAT", "PTR_FORMAT"], "
+                             "top "PTR_FORMAT,
+                             res->hrs_index(), res->bottom(), res->end(), res->top());
+    }
+  }
+
+  return res;
+}
+
+HeapRegion* G1CollectedHeap::newAllocRegionWithExpansion(int purpose,
+                                                         size_t word_size,
+                                                         bool zero_filled) {
+  HeapRegion* alloc_region = NULL;
+  if (_gc_alloc_region_counts[purpose] < g1_policy()->max_regions(purpose)) {
+    alloc_region = newAllocRegion_work(word_size, true, zero_filled);
+    if (purpose == GCAllocForSurvived && alloc_region != NULL) {
+      _young_list->add_survivor_region(alloc_region);
+    }
+    ++_gc_alloc_region_counts[purpose];
+  } else {
+    g1_policy()->note_alloc_region_limit_reached(purpose);
+  }
+  return alloc_region;
+}
+
+// If could fit into free regions w/o expansion, try.
+// Otherwise, if can expand, do so.
+// Otherwise, if using ex regions might help, try with ex given back.
+HeapWord* G1CollectedHeap::humongousObjAllocate(size_t word_size) {
+  assert(regions_accounted_for(), "Region leakage!");
+
+  // We can't allocate H regions while cleanupComplete is running, since
+  // some of the regions we find to be empty might not yet be added to the
+  // unclean list.  (If we're already at a safepoint, this call is
+  // unnecessary, not to mention wrong.)
+  if (!SafepointSynchronize::is_at_safepoint())
+    wait_for_cleanup_complete();
+
+  size_t num_regions =
+    round_to(word_size, HeapRegion::GrainWords) / HeapRegion::GrainWords;
+
+  // Special case if < one region???
+
+  // Remember the ft size.
+  size_t x_size = expansion_regions();
+
+  HeapWord* res = NULL;
+  bool eliminated_allocated_from_lists = false;
+
+  // Can the allocation potentially fit in the free regions?
+  if (free_regions() >= num_regions) {
+    res = _hrs->obj_allocate(word_size);
+  }
+  if (res == NULL) {
+    // Try expansion.
+    size_t fs = _hrs->free_suffix();
+    if (fs + x_size >= num_regions) {
+      expand((num_regions - fs) * HeapRegion::GrainBytes);
+      res = _hrs->obj_allocate(word_size);
+      assert(res != NULL, "This should have worked.");
+    } else {
+      // Expansion won't help.  Are there enough free regions if we get rid
+      // of reservations?
+      size_t avail = free_regions();
+      if (avail >= num_regions) {
+        res = _hrs->obj_allocate(word_size);
+        if (res != NULL) {
+          remove_allocated_regions_from_lists();
+          eliminated_allocated_from_lists = true;
+        }
+      }
+    }
+  }
+  if (res != NULL) {
+    // Increment by the number of regions allocated.
+    // FIXME: Assumes regions all of size GrainBytes.
+#ifndef PRODUCT
+    mr_bs()->verify_clean_region(MemRegion(res, res + num_regions *
+                                           HeapRegion::GrainWords));
+#endif
+    if (!eliminated_allocated_from_lists)
+      remove_allocated_regions_from_lists();
+    _summary_bytes_used += word_size * HeapWordSize;
+    _free_regions -= num_regions;
+    _num_humongous_regions += (int) num_regions;
+  }
+  assert(regions_accounted_for(), "Region Leakage");
+  return res;
+}
+
+HeapWord*
+G1CollectedHeap::attempt_allocation_slow(size_t word_size,
+                                         bool permit_collection_pause) {
+  HeapWord* res = NULL;
+  HeapRegion* allocated_young_region = NULL;
+
+  assert( SafepointSynchronize::is_at_safepoint() ||
+          Heap_lock->owned_by_self(), "pre condition of the call" );
+
+  if (isHumongous(word_size)) {
+    // Allocation of a humongous object can, in a sense, complete a
+    // partial region, if the previous alloc was also humongous, and
+    // caused the test below to succeed.
+    if (permit_collection_pause)
+      do_collection_pause_if_appropriate(word_size);
+    res = humongousObjAllocate(word_size);
+    assert(_cur_alloc_region == NULL
+           || !_cur_alloc_region->isHumongous(),
+           "Prevent a regression of this bug.");
+
+  } else {
+    // If we do a collection pause, this will be reset to a non-NULL
+    // value.  If we don't, nulling here ensures that we allocate a new
+    // region below.
+    if (_cur_alloc_region != NULL) {
+      // We're finished with the _cur_alloc_region.
+      _summary_bytes_used += _cur_alloc_region->used();
+      _cur_alloc_region = NULL;
+    }
+    assert(_cur_alloc_region == NULL, "Invariant.");
+    // Completion of a heap region is perhaps a good point at which to do
+    // a collection pause.
+    if (permit_collection_pause)
+      do_collection_pause_if_appropriate(word_size);
+    // Make sure we have an allocation region available.
+    if (_cur_alloc_region == NULL) {
+      if (!SafepointSynchronize::is_at_safepoint())
+        wait_for_cleanup_complete();
+      bool next_is_young = should_set_young_locked();
+      // If the next region is not young, make sure it's zero-filled.
+      _cur_alloc_region = newAllocRegion(word_size, !next_is_young);
+      if (_cur_alloc_region != NULL) {
+        _summary_bytes_used -= _cur_alloc_region->used();
+        if (next_is_young) {
+          set_region_short_lived_locked(_cur_alloc_region);
+          allocated_young_region = _cur_alloc_region;
+        }
+      }
+    }
+    assert(_cur_alloc_region == NULL || !_cur_alloc_region->isHumongous(),
+           "Prevent a regression of this bug.");
+
+    // Now retry the allocation.
+    if (_cur_alloc_region != NULL) {
+      res = _cur_alloc_region->allocate(word_size);
+    }
+  }
+
+  // NOTE: fails frequently in PRT
+  assert(regions_accounted_for(), "Region leakage!");
+
+  if (res != NULL) {
+    if (!SafepointSynchronize::is_at_safepoint()) {
+      assert( permit_collection_pause, "invariant" );
+      assert( Heap_lock->owned_by_self(), "invariant" );
+      Heap_lock->unlock();
+    }
+
+    if (allocated_young_region != NULL) {
+      HeapRegion* hr = allocated_young_region;
+      HeapWord* bottom = hr->bottom();
+      HeapWord* end = hr->end();
+      MemRegion mr(bottom, end);
+      ((CardTableModRefBS*)_g1h->barrier_set())->dirty(mr);
+    }
+  }
+
+  assert( SafepointSynchronize::is_at_safepoint() ||
+          (res == NULL && Heap_lock->owned_by_self()) ||
+          (res != NULL && !Heap_lock->owned_by_self()),
+          "post condition of the call" );
+
+  return res;
+}
+
+HeapWord*
+G1CollectedHeap::mem_allocate(size_t word_size,
+                              bool   is_noref,
+                              bool   is_tlab,
+                              bool* gc_overhead_limit_was_exceeded) {
+  debug_only(check_for_valid_allocation_state());
+  assert(no_gc_in_progress(), "Allocation during gc not allowed");
+  HeapWord* result = NULL;
+
+  // Loop until the allocation is satisified,
+  // or unsatisfied after GC.
+  for (int try_count = 1; /* return or throw */; try_count += 1) {
+    int gc_count_before;
+    {
+      Heap_lock->lock();
+      result = attempt_allocation(word_size);
+      if (result != NULL) {
+        // attempt_allocation should have unlocked the heap lock
+        assert(is_in(result), "result not in heap");
+        return result;
+      }
+      // Read the gc count while the heap lock is held.
+      gc_count_before = SharedHeap::heap()->total_collections();
+      Heap_lock->unlock();
+    }
+
+    // Create the garbage collection operation...
+    VM_G1CollectForAllocation op(word_size,
+                                 gc_count_before);
+
+    // ...and get the VM thread to execute it.
+    VMThread::execute(&op);
+    if (op.prologue_succeeded()) {
+      result = op.result();
+      assert(result == NULL || is_in(result), "result not in heap");
+      return result;
+    }
+
+    // Give a warning if we seem to be looping forever.
+    if ((QueuedAllocationWarningCount > 0) &&
+        (try_count % QueuedAllocationWarningCount == 0)) {
+      warning("G1CollectedHeap::mem_allocate_work retries %d times",
+              try_count);
+    }
+  }
+}
+
+void G1CollectedHeap::abandon_cur_alloc_region() {
+  if (_cur_alloc_region != NULL) {
+    // We're finished with the _cur_alloc_region.
+    if (_cur_alloc_region->is_empty()) {
+      _free_regions++;
+      free_region(_cur_alloc_region);
+    } else {
+      _summary_bytes_used += _cur_alloc_region->used();
+    }
+    _cur_alloc_region = NULL;
+  }
+}
+
+class PostMCRemSetClearClosure: public HeapRegionClosure {
+  ModRefBarrierSet* _mr_bs;
+public:
+  PostMCRemSetClearClosure(ModRefBarrierSet* mr_bs) : _mr_bs(mr_bs) {}
+  bool doHeapRegion(HeapRegion* r) {
+    r->reset_gc_time_stamp();
+    if (r->continuesHumongous())
+      return false;
+    HeapRegionRemSet* hrrs = r->rem_set();
+    if (hrrs != NULL) hrrs->clear();
+    // You might think here that we could clear just the cards
+    // corresponding to the used region.  But no: if we leave a dirty card
+    // in a region we might allocate into, then it would prevent that card
+    // from being enqueued, and cause it to be missed.
+    // Re: the performance cost: we shouldn't be doing full GC anyway!
+    _mr_bs->clear(MemRegion(r->bottom(), r->end()));
+    return false;
+  }
+};
+
+
+class PostMCRemSetInvalidateClosure: public HeapRegionClosure {
+  ModRefBarrierSet* _mr_bs;
+public:
+  PostMCRemSetInvalidateClosure(ModRefBarrierSet* mr_bs) : _mr_bs(mr_bs) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->continuesHumongous()) return false;
+    if (r->used_region().word_size() != 0) {
+      _mr_bs->invalidate(r->used_region(), true /*whole heap*/);
+    }
+    return false;
+  }
+};
+
+void G1CollectedHeap::do_collection(bool full, bool clear_all_soft_refs,
+                                    size_t word_size) {
+  ResourceMark rm;
+
+  if (full && DisableExplicitGC) {
+    gclog_or_tty->print("\n\n\nDisabling Explicit GC\n\n\n");
+    return;
+  }
+
+  assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint");
+  assert(Thread::current() == VMThread::vm_thread(), "should be in vm thread");
+
+  if (GC_locker::is_active()) {
+    return; // GC is disabled (e.g. JNI GetXXXCritical operation)
+  }
+
+  {
+    IsGCActiveMark x;
+
+    // Timing
+    gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
+    TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
+    TraceTime t(full ? "Full GC (System.gc())" : "Full GC", PrintGC, true, gclog_or_tty);
+
+    double start = os::elapsedTime();
+    GCOverheadReporter::recordSTWStart(start);
+    g1_policy()->record_full_collection_start();
+
+    gc_prologue(true);
+    increment_total_collections();
+
+    size_t g1h_prev_used = used();
+    assert(used() == recalculate_used(), "Should be equal");
+
+    if (VerifyBeforeGC && total_collections() >= VerifyGCStartAt) {
+      HandleMark hm;  // Discard invalid handles created during verification
+      prepare_for_verify();
+      gclog_or_tty->print(" VerifyBeforeGC:");
+      Universe::verify(true);
+    }
+    assert(regions_accounted_for(), "Region leakage!");
+
+    COMPILER2_PRESENT(DerivedPointerTable::clear());
+
+    // We want to discover references, but not process them yet.
+    // This mode is disabled in
+    // instanceRefKlass::process_discovered_references if the
+    // generation does some collection work, or
+    // instanceRefKlass::enqueue_discovered_references if the
+    // generation returns without doing any work.
+    ref_processor()->disable_discovery();
+    ref_processor()->abandon_partial_discovery();
+    ref_processor()->verify_no_references_recorded();
+
+    // Abandon current iterations of concurrent marking and concurrent
+    // refinement, if any are in progress.
+    concurrent_mark()->abort();
+
+    // Make sure we'll choose a new allocation region afterwards.
+    abandon_cur_alloc_region();
+    assert(_cur_alloc_region == NULL, "Invariant.");
+    g1_rem_set()->as_HRInto_G1RemSet()->cleanupHRRS();
+    tear_down_region_lists();
+    set_used_regions_to_need_zero_fill();
+    if (g1_policy()->in_young_gc_mode()) {
+      empty_young_list();
+      g1_policy()->set_full_young_gcs(true);
+    }
+
+    // Temporarily make reference _discovery_ single threaded (non-MT).
+    ReferenceProcessorMTMutator rp_disc_ser(ref_processor(), false);
+
+    // Temporarily make refs discovery atomic
+    ReferenceProcessorAtomicMutator rp_disc_atomic(ref_processor(), true);
+
+    // Temporarily clear _is_alive_non_header
+    ReferenceProcessorIsAliveMutator rp_is_alive_null(ref_processor(), NULL);
+
+    ref_processor()->enable_discovery();
+
+    // Do collection work
+    {
+      HandleMark hm;  // Discard invalid handles created during gc
+      G1MarkSweep::invoke_at_safepoint(ref_processor(), clear_all_soft_refs);
+    }
+    // Because freeing humongous regions may have added some unclean
+    // regions, it is necessary to tear down again before rebuilding.
+    tear_down_region_lists();
+    rebuild_region_lists();
+
+    _summary_bytes_used = recalculate_used();
+
+    ref_processor()->enqueue_discovered_references();
+
+    COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
+
+    if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) {
+      HandleMark hm;  // Discard invalid handles created during verification
+      gclog_or_tty->print(" VerifyAfterGC:");
+      Universe::verify(false);
+    }
+    NOT_PRODUCT(ref_processor()->verify_no_references_recorded());
+
+    reset_gc_time_stamp();
+    // Since everything potentially moved, we will clear all remembered
+    // sets, and clear all cards.  Later we will also cards in the used
+    // portion of the heap after the resizing (which could be a shrinking.)
+    // We will also reset the GC time stamps of the regions.
+    PostMCRemSetClearClosure rs_clear(mr_bs());
+    heap_region_iterate(&rs_clear);
+
+    // Resize the heap if necessary.
+    resize_if_necessary_after_full_collection(full ? 0 : word_size);
+
+    // Since everything potentially moved, we will clear all remembered
+    // sets, but also dirty all cards corresponding to used regions.
+    PostMCRemSetInvalidateClosure rs_invalidate(mr_bs());
+    heap_region_iterate(&rs_invalidate);
+    if (_cg1r->use_cache()) {
+      _cg1r->clear_and_record_card_counts();
+      _cg1r->clear_hot_cache();
+    }
+
+    if (PrintGC) {
+      print_size_transition(gclog_or_tty, g1h_prev_used, used(), capacity());
+    }
+
+    if (true) { // FIXME
+      // Ask the permanent generation to adjust size for full collections
+      perm()->compute_new_size();
+    }
+
+    double end = os::elapsedTime();
+    GCOverheadReporter::recordSTWEnd(end);
+    g1_policy()->record_full_collection_end();
+
+    gc_epilogue(true);
+
+    // Abandon concurrent refinement.  This must happen last: in the
+    // dirty-card logging system, some cards may be dirty by weak-ref
+    // processing, and may be enqueued.  But the whole card table is
+    // dirtied, so this should abandon those logs, and set "do_traversal"
+    // to true.
+    concurrent_g1_refine()->set_pya_restart();
+
+    assert(regions_accounted_for(), "Region leakage!");
+  }
+
+  if (g1_policy()->in_young_gc_mode()) {
+    _young_list->reset_sampled_info();
+    assert( check_young_list_empty(false, false),
+            "young list should be empty at this point");
+  }
+}
+
+void G1CollectedHeap::do_full_collection(bool clear_all_soft_refs) {
+  do_collection(true, clear_all_soft_refs, 0);
+}
+
+// This code is mostly copied from TenuredGeneration.
+void
+G1CollectedHeap::
+resize_if_necessary_after_full_collection(size_t word_size) {
+  assert(MinHeapFreeRatio <= MaxHeapFreeRatio, "sanity check");
+
+  // Include the current allocation, if any, and bytes that will be
+  // pre-allocated to support collections, as "used".
+  const size_t used_after_gc = used();
+  const size_t capacity_after_gc = capacity();
+  const size_t free_after_gc = capacity_after_gc - used_after_gc;
+
+  // We don't have floating point command-line arguments
+  const double minimum_free_percentage = (double) MinHeapFreeRatio / 100;
+  const double maximum_used_percentage = 1.0 - minimum_free_percentage;
+  const double maximum_free_percentage = (double) MaxHeapFreeRatio / 100;
+  const double minimum_used_percentage = 1.0 - maximum_free_percentage;
+
+  size_t minimum_desired_capacity = (size_t) (used_after_gc / maximum_used_percentage);
+  size_t maximum_desired_capacity = (size_t) (used_after_gc / minimum_used_percentage);
+
+  // Don't shrink less than the initial size.
+  minimum_desired_capacity =
+    MAX2(minimum_desired_capacity,
+         collector_policy()->initial_heap_byte_size());
+  maximum_desired_capacity =
+    MAX2(maximum_desired_capacity,
+         collector_policy()->initial_heap_byte_size());
+
+  // We are failing here because minimum_desired_capacity is
+  assert(used_after_gc <= minimum_desired_capacity, "sanity check");
+  assert(minimum_desired_capacity <= maximum_desired_capacity, "sanity check");
+
+  if (PrintGC && Verbose) {
+    const double free_percentage = ((double)free_after_gc) / capacity();
+    gclog_or_tty->print_cr("Computing new size after full GC ");
+    gclog_or_tty->print_cr("  "
+                           "  minimum_free_percentage: %6.2f",
+                           minimum_free_percentage);
+    gclog_or_tty->print_cr("  "
+                           "  maximum_free_percentage: %6.2f",
+                           maximum_free_percentage);
+    gclog_or_tty->print_cr("  "
+                           "  capacity: %6.1fK"
+                           "  minimum_desired_capacity: %6.1fK"
+                           "  maximum_desired_capacity: %6.1fK",
+                           capacity() / (double) K,
+                           minimum_desired_capacity / (double) K,
+                           maximum_desired_capacity / (double) K);
+    gclog_or_tty->print_cr("  "
+                           "   free_after_gc   : %6.1fK"
+                           "   used_after_gc   : %6.1fK",
+                           free_after_gc / (double) K,
+                           used_after_gc / (double) K);
+    gclog_or_tty->print_cr("  "
+                           "   free_percentage: %6.2f",
+                           free_percentage);
+  }
+  if (capacity() < minimum_desired_capacity) {
+    // Don't expand unless it's significant
+    size_t expand_bytes = minimum_desired_capacity - capacity_after_gc;
+    expand(expand_bytes);
+    if (PrintGC && Verbose) {
+      gclog_or_tty->print_cr("    expanding:"
+                             "  minimum_desired_capacity: %6.1fK"
+                             "  expand_bytes: %6.1fK",
+                             minimum_desired_capacity / (double) K,
+                             expand_bytes / (double) K);
+    }
+
+    // No expansion, now see if we want to shrink
+  } else if (capacity() > maximum_desired_capacity) {
+    // Capacity too large, compute shrinking size
+    size_t shrink_bytes = capacity_after_gc - maximum_desired_capacity;
+    shrink(shrink_bytes);
+    if (PrintGC && Verbose) {
+      gclog_or_tty->print_cr("  "
+                             "  shrinking:"
+                             "  initSize: %.1fK"
+                             "  maximum_desired_capacity: %.1fK",
+                             collector_policy()->initial_heap_byte_size() / (double) K,
+                             maximum_desired_capacity / (double) K);
+      gclog_or_tty->print_cr("  "
+                             "  shrink_bytes: %.1fK",
+                             shrink_bytes / (double) K);
+    }
+  }
+}
+
+
+HeapWord*
+G1CollectedHeap::satisfy_failed_allocation(size_t word_size) {
+  HeapWord* result = NULL;
+
+  // In a G1 heap, we're supposed to keep allocation from failing by
+  // incremental pauses.  Therefore, at least for now, we'll favor
+  // expansion over collection.  (This might change in the future if we can
+  // do something smarter than full collection to satisfy a failed alloc.)
+
+  result = expand_and_allocate(word_size);
+  if (result != NULL) {
+    assert(is_in(result), "result not in heap");
+    return result;
+  }
+
+  // OK, I guess we have to try collection.
+
+  do_collection(false, false, word_size);
+
+  result = attempt_allocation(word_size, /*permit_collection_pause*/false);
+
+  if (result != NULL) {
+    assert(is_in(result), "result not in heap");
+    return result;
+  }
+
+  // Try collecting soft references.
+  do_collection(false, true, word_size);
+  result = attempt_allocation(word_size, /*permit_collection_pause*/false);
+  if (result != NULL) {
+    assert(is_in(result), "result not in heap");
+    return result;
+  }
+
+  // What else?  We might try synchronous finalization later.  If the total
+  // space available is large enough for the allocation, then a more
+  // complete compaction phase than we've tried so far might be
+  // appropriate.
+  return NULL;
+}
+
+// Attempting to expand the heap sufficiently
+// to support an allocation of the given "word_size".  If
+// successful, perform the allocation and return the address of the
+// allocated block, or else "NULL".
+
+HeapWord* G1CollectedHeap::expand_and_allocate(size_t word_size) {
+  size_t expand_bytes = word_size * HeapWordSize;
+  if (expand_bytes < MinHeapDeltaBytes) {
+    expand_bytes = MinHeapDeltaBytes;
+  }
+  expand(expand_bytes);
+  assert(regions_accounted_for(), "Region leakage!");
+  HeapWord* result = attempt_allocation(word_size, false /* permit_collection_pause */);
+  return result;
+}
+
+size_t G1CollectedHeap::free_region_if_totally_empty(HeapRegion* hr) {
+  size_t pre_used = 0;
+  size_t cleared_h_regions = 0;
+  size_t freed_regions = 0;
+  UncleanRegionList local_list;
+  free_region_if_totally_empty_work(hr, pre_used, cleared_h_regions,
+                                    freed_regions, &local_list);
+
+  finish_free_region_work(pre_used, cleared_h_regions, freed_regions,
+                          &local_list);
+  return pre_used;
+}
+
+void
+G1CollectedHeap::free_region_if_totally_empty_work(HeapRegion* hr,
+                                                   size_t& pre_used,
+                                                   size_t& cleared_h,
+                                                   size_t& freed_regions,
+                                                   UncleanRegionList* list,
+                                                   bool par) {
+  assert(!hr->continuesHumongous(), "should have filtered these out");
+  size_t res = 0;
+  if (!hr->popular() && hr->used() > 0 && hr->garbage_bytes() == hr->used()) {
+    if (!hr->is_young()) {
+      if (G1PolicyVerbose > 0)
+        gclog_or_tty->print_cr("Freeing empty region "PTR_FORMAT "(" SIZE_FORMAT " bytes)"
+                               " during cleanup", hr, hr->used());
+      free_region_work(hr, pre_used, cleared_h, freed_regions, list, par);
+    }
+  }
+}
+
+// FIXME: both this and shrink could probably be more efficient by
+// doing one "VirtualSpace::expand_by" call rather than several.
+void G1CollectedHeap::expand(size_t expand_bytes) {
+  size_t old_mem_size = _g1_storage.committed_size();
+  // We expand by a minimum of 1K.
+  expand_bytes = MAX2(expand_bytes, (size_t)K);
+  size_t aligned_expand_bytes =
+    ReservedSpace::page_align_size_up(expand_bytes);
+  aligned_expand_bytes = align_size_up(aligned_expand_bytes,
+                                       HeapRegion::GrainBytes);
+  expand_bytes = aligned_expand_bytes;
+  while (expand_bytes > 0) {
+    HeapWord* base = (HeapWord*)_g1_storage.high();
+    // Commit more storage.
+    bool successful = _g1_storage.expand_by(HeapRegion::GrainBytes);
+    if (!successful) {
+        expand_bytes = 0;
+    } else {
+      expand_bytes -= HeapRegion::GrainBytes;
+      // Expand the committed region.
+      HeapWord* high = (HeapWord*) _g1_storage.high();
+      _g1_committed.set_end(high);
+      // Create a new HeapRegion.
+      MemRegion mr(base, high);
+      bool is_zeroed = !_g1_max_committed.contains(base);
+      HeapRegion* hr = new HeapRegion(_bot_shared, mr, is_zeroed);
+
+      // Now update max_committed if necessary.
+      _g1_max_committed.set_end(MAX2(_g1_max_committed.end(), high));
+
+      // Add it to the HeapRegionSeq.
+      _hrs->insert(hr);
+      // Set the zero-fill state, according to whether it's already
+      // zeroed.
+      {
+        MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+        if (is_zeroed) {
+          hr->set_zero_fill_complete();
+          put_free_region_on_list_locked(hr);
+        } else {
+          hr->set_zero_fill_needed();
+          put_region_on_unclean_list_locked(hr);
+        }
+      }
+      _free_regions++;
+      // And we used up an expansion region to create it.
+      _expansion_regions--;
+      // Tell the cardtable about it.
+      Universe::heap()->barrier_set()->resize_covered_region(_g1_committed);
+      // And the offset table as well.
+      _bot_shared->resize(_g1_committed.word_size());
+    }
+  }
+  if (Verbose && PrintGC) {
+    size_t new_mem_size = _g1_storage.committed_size();
+    gclog_or_tty->print_cr("Expanding garbage-first heap from %ldK by %ldK to %ldK",
+                           old_mem_size/K, aligned_expand_bytes/K,
+                           new_mem_size/K);
+  }
+}
+
+void G1CollectedHeap::shrink_helper(size_t shrink_bytes)
+{
+  size_t old_mem_size = _g1_storage.committed_size();
+  size_t aligned_shrink_bytes =
+    ReservedSpace::page_align_size_down(shrink_bytes);
+  aligned_shrink_bytes = align_size_down(aligned_shrink_bytes,
+                                         HeapRegion::GrainBytes);
+  size_t num_regions_deleted = 0;
+  MemRegion mr = _hrs->shrink_by(aligned_shrink_bytes, num_regions_deleted);
+
+  assert(mr.end() == (HeapWord*)_g1_storage.high(), "Bad shrink!");
+  if (mr.byte_size() > 0)
+    _g1_storage.shrink_by(mr.byte_size());
+  assert(mr.start() == (HeapWord*)_g1_storage.high(), "Bad shrink!");
+
+  _g1_committed.set_end(mr.start());
+  _free_regions -= num_regions_deleted;
+  _expansion_regions += num_regions_deleted;
+
+  // Tell the cardtable about it.
+  Universe::heap()->barrier_set()->resize_covered_region(_g1_committed);
+
+  // And the offset table as well.
+  _bot_shared->resize(_g1_committed.word_size());
+
+  HeapRegionRemSet::shrink_heap(n_regions());
+
+  if (Verbose && PrintGC) {
+    size_t new_mem_size = _g1_storage.committed_size();
+    gclog_or_tty->print_cr("Shrinking garbage-first heap from %ldK by %ldK to %ldK",
+                           old_mem_size/K, aligned_shrink_bytes/K,
+                           new_mem_size/K);
+  }
+}
+
+void G1CollectedHeap::shrink(size_t shrink_bytes) {
+  release_gc_alloc_regions();
+  tear_down_region_lists();  // We will rebuild them in a moment.
+  shrink_helper(shrink_bytes);
+  rebuild_region_lists();
+}
+
+// Public methods.
+
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+
+G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) :
+  SharedHeap(policy_),
+  _g1_policy(policy_),
+  _ref_processor(NULL),
+  _process_strong_tasks(new SubTasksDone(G1H_PS_NumElements)),
+  _bot_shared(NULL),
+  _par_alloc_during_gc_lock(Mutex::leaf, "par alloc during GC lock"),
+  _objs_with_preserved_marks(NULL), _preserved_marks_of_objs(NULL),
+  _evac_failure_scan_stack(NULL) ,
+  _mark_in_progress(false),
+  _cg1r(NULL), _czft(NULL), _summary_bytes_used(0),
+  _cur_alloc_region(NULL),
+  _refine_cte_cl(NULL),
+  _free_region_list(NULL), _free_region_list_size(0),
+  _free_regions(0),
+  _popular_object_boundary(NULL),
+  _cur_pop_hr_index(0),
+  _popular_regions_to_be_evacuated(NULL),
+  _pop_obj_rc_at_copy(),
+  _full_collection(false),
+  _unclean_region_list(),
+  _unclean_regions_coming(false),
+  _young_list(new YoungList(this)),
+  _gc_time_stamp(0),
+  _surviving_young_words(NULL)
+{
+  _g1h = this; // To catch bugs.
+  if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) {
+    vm_exit_during_initialization("Failed necessary allocation.");
+  }
+  int n_queues = MAX2((int)ParallelGCThreads, 1);
+  _task_queues = new RefToScanQueueSet(n_queues);
+
+  int n_rem_sets = HeapRegionRemSet::num_par_rem_sets();
+  assert(n_rem_sets > 0, "Invariant.");
+
+  HeapRegionRemSetIterator** iter_arr =
+    NEW_C_HEAP_ARRAY(HeapRegionRemSetIterator*, n_queues);
+  for (int i = 0; i < n_queues; i++) {
+    iter_arr[i] = new HeapRegionRemSetIterator();
+  }
+  _rem_set_iterator = iter_arr;
+
+  for (int i = 0; i < n_queues; i++) {
+    RefToScanQueue* q = new RefToScanQueue();
+    q->initialize();
+    _task_queues->register_queue(i, q);
+  }
+
+  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+    _gc_alloc_regions[ap]       = NULL;
+    _gc_alloc_region_counts[ap] = 0;
+  }
+  guarantee(_task_queues != NULL, "task_queues allocation failure.");
+}
+
+jint G1CollectedHeap::initialize() {
+  os::enable_vtime();
+
+  // Necessary to satisfy locking discipline assertions.
+
+  MutexLocker x(Heap_lock);
+
+  // While there are no constraints in the GC code that HeapWordSize
+  // be any particular value, there are multiple other areas in the
+  // system which believe this to be true (e.g. oop->object_size in some
+  // cases incorrectly returns the size in wordSize units rather than
+  // HeapWordSize).
+  guarantee(HeapWordSize == wordSize, "HeapWordSize must equal wordSize");
+
+  size_t init_byte_size = collector_policy()->initial_heap_byte_size();
+  size_t max_byte_size = collector_policy()->max_heap_byte_size();
+
+  // Ensure that the sizes are properly aligned.
+  Universe::check_alignment(init_byte_size, HeapRegion::GrainBytes, "g1 heap");
+  Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap");
+
+  // We allocate this in any case, but only do no work if the command line
+  // param is off.
+  _cg1r = new ConcurrentG1Refine();
+
+  // Reserve the maximum.
+  PermanentGenerationSpec* pgs = collector_policy()->permanent_generation();
+  // Includes the perm-gen.
+  ReservedSpace heap_rs(max_byte_size + pgs->max_size(),
+                        HeapRegion::GrainBytes,
+                        false /*ism*/);
+
+  if (!heap_rs.is_reserved()) {
+    vm_exit_during_initialization("Could not reserve enough space for object heap");
+    return JNI_ENOMEM;
+  }
+
+  // It is important to do this in a way such that concurrent readers can't
+  // temporarily think somethings in the heap.  (I've actually seen this
+  // happen in asserts: DLD.)
+  _reserved.set_word_size(0);
+  _reserved.set_start((HeapWord*)heap_rs.base());
+  _reserved.set_end((HeapWord*)(heap_rs.base() + heap_rs.size()));
+
+  _expansion_regions = max_byte_size/HeapRegion::GrainBytes;
+
+  _num_humongous_regions = 0;
+
+  // Create the gen rem set (and barrier set) for the entire reserved region.
+  _rem_set = collector_policy()->create_rem_set(_reserved, 2);
+  set_barrier_set(rem_set()->bs());
+  if (barrier_set()->is_a(BarrierSet::ModRef)) {
+    _mr_bs = (ModRefBarrierSet*)_barrier_set;
+  } else {
+    vm_exit_during_initialization("G1 requires a mod ref bs.");
+    return JNI_ENOMEM;
+  }
+
+  // Also create a G1 rem set.
+  if (G1UseHRIntoRS) {
+    if (mr_bs()->is_a(BarrierSet::CardTableModRef)) {
+      _g1_rem_set = new HRInto_G1RemSet(this, (CardTableModRefBS*)mr_bs());
+    } else {
+      vm_exit_during_initialization("G1 requires a cardtable mod ref bs.");
+      return JNI_ENOMEM;
+    }
+  } else {
+    _g1_rem_set = new StupidG1RemSet(this);
+  }
+
+  // Carve out the G1 part of the heap.
+
+  ReservedSpace g1_rs   = heap_rs.first_part(max_byte_size);
+  _g1_reserved = MemRegion((HeapWord*)g1_rs.base(),
+                           g1_rs.size()/HeapWordSize);
+  ReservedSpace perm_gen_rs = heap_rs.last_part(max_byte_size);
+
+  _perm_gen = pgs->init(perm_gen_rs, pgs->init_size(), rem_set());
+
+  _g1_storage.initialize(g1_rs, 0);
+  _g1_committed = MemRegion((HeapWord*)_g1_storage.low(), (size_t) 0);
+  _g1_max_committed = _g1_committed;
+  _hrs = new HeapRegionSeq();
+  guarantee(_hrs != NULL, "Couldn't allocate HeapRegionSeq");
+  guarantee(_cur_alloc_region == NULL, "from constructor");
+
+  _bot_shared = new G1BlockOffsetSharedArray(_reserved,
+                                             heap_word_size(init_byte_size));
+
+  _g1h = this;
+
+  // Create the ConcurrentMark data structure and thread.
+  // (Must do this late, so that "max_regions" is defined.)
+  _cm       = new ConcurrentMark(heap_rs, (int) max_regions());
+  _cmThread = _cm->cmThread();
+
+  // ...and the concurrent zero-fill thread, if necessary.
+  if (G1ConcZeroFill) {
+    _czft = new ConcurrentZFThread();
+  }
+
+
+
+  // Allocate the popular regions; take them off free lists.
+  size_t pop_byte_size = G1NumPopularRegions * HeapRegion::GrainBytes;
+  expand(pop_byte_size);
+  _popular_object_boundary =
+    _g1_reserved.start() + (G1NumPopularRegions * HeapRegion::GrainWords);
+  for (int i = 0; i < G1NumPopularRegions; i++) {
+    HeapRegion* hr = newAllocRegion(HeapRegion::GrainWords);
+    //    assert(hr != NULL && hr->bottom() < _popular_object_boundary,
+    //     "Should be enough, and all should be below boundary.");
+    hr->set_popular(true);
+  }
+  assert(_cur_pop_hr_index == 0, "Start allocating at the first region.");
+
+  // Initialize the from_card cache structure of HeapRegionRemSet.
+  HeapRegionRemSet::init_heap(max_regions());
+
+  // Now expand into the rest of the initial heap size.
+  expand(init_byte_size - pop_byte_size);
+
+  // Perform any initialization actions delegated to the policy.
+  g1_policy()->init();
+
+  g1_policy()->note_start_of_mark_thread();
+
+  _refine_cte_cl =
+    new RefineCardTableEntryClosure(ConcurrentG1RefineThread::sts(),
+                                    g1_rem_set(),
+                                    concurrent_g1_refine());
+  JavaThread::dirty_card_queue_set().set_closure(_refine_cte_cl);
+
+  JavaThread::satb_mark_queue_set().initialize(SATB_Q_CBL_mon,
+                                               SATB_Q_FL_lock,
+                                               0,
+                                               Shared_SATB_Q_lock);
+  if (G1RSBarrierUseQueue) {
+    JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
+                                                  DirtyCardQ_FL_lock,
+                                                  G1DirtyCardQueueMax,
+                                                  Shared_DirtyCardQ_lock);
+  }
+  // In case we're keeping closure specialization stats, initialize those
+  // counts and that mechanism.
+  SpecializationStats::clear();
+
+  _gc_alloc_region_list = NULL;
+
+  // Do later initialization work for concurrent refinement.
+  _cg1r->init();
+
+  const char* group_names[] = { "CR", "ZF", "CM", "CL" };
+  GCOverheadReporter::initGCOverheadReporter(4, group_names);
+
+  return JNI_OK;
+}
+
+void G1CollectedHeap::ref_processing_init() {
+  SharedHeap::ref_processing_init();
+  MemRegion mr = reserved_region();
+  _ref_processor = ReferenceProcessor::create_ref_processor(
+                                         mr,    // span
+                                         false, // Reference discovery is not atomic
+                                                // (though it shouldn't matter here.)
+                                         true,  // mt_discovery
+                                         NULL,  // is alive closure: need to fill this in for efficiency
+                                         ParallelGCThreads,
+                                         ParallelRefProcEnabled,
+                                         true); // Setting next fields of discovered
+                                                // lists requires a barrier.
+}
+
+size_t G1CollectedHeap::capacity() const {
+  return _g1_committed.byte_size();
+}
+
+void G1CollectedHeap::iterate_dirty_card_closure(bool concurrent,
+                                                 int worker_i) {
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  int n_completed_buffers = 0;
+  while (dcqs.apply_closure_to_completed_buffer(worker_i, 0, true)) {
+    n_completed_buffers++;
+  }
+  g1_policy()->record_update_rs_processed_buffers(worker_i,
+                                                  (double) n_completed_buffers);
+  dcqs.clear_n_completed_buffers();
+  // Finish up the queue...
+  if (worker_i == 0) concurrent_g1_refine()->clean_up_cache(worker_i,
+                                                            g1_rem_set());
+  assert(!dcqs.completed_buffers_exist_dirty(), "Completed buffers exist!");
+}
+
+
+// Computes the sum of the storage used by the various regions.
+
+size_t G1CollectedHeap::used() const {
+  assert(Heap_lock->owner() != NULL,
+         "Should be owned on this thread's behalf.");
+  size_t result = _summary_bytes_used;
+  if (_cur_alloc_region != NULL)
+    result += _cur_alloc_region->used();
+  return result;
+}
+
+class SumUsedClosure: public HeapRegionClosure {
+  size_t _used;
+public:
+  SumUsedClosure() : _used(0) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->continuesHumongous()) {
+      _used += r->used();
+    }
+    return false;
+  }
+  size_t result() { return _used; }
+};
+
+size_t G1CollectedHeap::recalculate_used() const {
+  SumUsedClosure blk;
+  _hrs->iterate(&blk);
+  return blk.result();
+}
+
+#ifndef PRODUCT
+class SumUsedRegionsClosure: public HeapRegionClosure {
+  size_t _num;
+public:
+  // _num is set to 1 to account for the popular region
+  SumUsedRegionsClosure() : _num(G1NumPopularRegions) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->continuesHumongous() || r->used() > 0 || r->is_gc_alloc_region()) {
+      _num += 1;
+    }
+    return false;
+  }
+  size_t result() { return _num; }
+};
+
+size_t G1CollectedHeap::recalculate_used_regions() const {
+  SumUsedRegionsClosure blk;
+  _hrs->iterate(&blk);
+  return blk.result();
+}
+#endif // PRODUCT
+
+size_t G1CollectedHeap::unsafe_max_alloc() {
+  if (_free_regions > 0) return HeapRegion::GrainBytes;
+  // otherwise, is there space in the current allocation region?
+
+  // We need to store the current allocation region in a local variable
+  // here. The problem is that this method doesn't take any locks and
+  // there may be other threads which overwrite the current allocation
+  // region field. attempt_allocation(), for example, sets it to NULL
+  // and this can happen *after* the NULL check here but before the call
+  // to free(), resulting in a SIGSEGV. Note that this doesn't appear
+  // to be a problem in the optimized build, since the two loads of the
+  // current allocation region field are optimized away.
+  HeapRegion* car = _cur_alloc_region;
+
+  // FIXME: should iterate over all regions?
+  if (car == NULL) {
+    return 0;
+  }
+  return car->free();
+}
+
+void G1CollectedHeap::collect(GCCause::Cause cause) {
+  // The caller doesn't have the Heap_lock
+  assert(!Heap_lock->owned_by_self(), "this thread should not own the Heap_lock");
+  MutexLocker ml(Heap_lock);
+  collect_locked(cause);
+}
+
+void G1CollectedHeap::collect_as_vm_thread(GCCause::Cause cause) {
+  assert(Thread::current()->is_VM_thread(), "Precondition#1");
+  assert(Heap_lock->is_locked(), "Precondition#2");
+  GCCauseSetter gcs(this, cause);
+  switch (cause) {
+    case GCCause::_heap_inspection:
+    case GCCause::_heap_dump: {
+      HandleMark hm;
+      do_full_collection(false);         // don't clear all soft refs
+      break;
+    }
+    default: // XXX FIX ME
+      ShouldNotReachHere(); // Unexpected use of this function
+  }
+}
+
+
+void G1CollectedHeap::collect_locked(GCCause::Cause cause) {
+  // Don't want to do a GC until cleanup is completed.
+  wait_for_cleanup_complete();
+
+  // Read the GC count while holding the Heap_lock
+  int gc_count_before = SharedHeap::heap()->total_collections();
+  {
+    MutexUnlocker mu(Heap_lock);  // give up heap lock, execute gets it back
+    VM_G1CollectFull op(gc_count_before, cause);
+    VMThread::execute(&op);
+  }
+}
+
+bool G1CollectedHeap::is_in(const void* p) const {
+  if (_g1_committed.contains(p)) {
+    HeapRegion* hr = _hrs->addr_to_region(p);
+    return hr->is_in(p);
+  } else {
+    return _perm_gen->as_gen()->is_in(p);
+  }
+}
+
+// Iteration functions.
+
+// Iterates an OopClosure over all ref-containing fields of objects
+// within a HeapRegion.
+
+class IterateOopClosureRegionClosure: public HeapRegionClosure {
+  MemRegion _mr;
+  OopClosure* _cl;
+public:
+  IterateOopClosureRegionClosure(MemRegion mr, OopClosure* cl)
+    : _mr(mr), _cl(cl) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (! r->continuesHumongous()) {
+      r->oop_iterate(_cl);
+    }
+    return false;
+  }
+};
+
+void G1CollectedHeap::oop_iterate(OopClosure* cl) {
+  IterateOopClosureRegionClosure blk(_g1_committed, cl);
+  _hrs->iterate(&blk);
+}
+
+void G1CollectedHeap::oop_iterate(MemRegion mr, OopClosure* cl) {
+  IterateOopClosureRegionClosure blk(mr, cl);
+  _hrs->iterate(&blk);
+}
+
+// Iterates an ObjectClosure over all objects within a HeapRegion.
+
+class IterateObjectClosureRegionClosure: public HeapRegionClosure {
+  ObjectClosure* _cl;
+public:
+  IterateObjectClosureRegionClosure(ObjectClosure* cl) : _cl(cl) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (! r->continuesHumongous()) {
+      r->object_iterate(_cl);
+    }
+    return false;
+  }
+};
+
+void G1CollectedHeap::object_iterate(ObjectClosure* cl) {
+  IterateObjectClosureRegionClosure blk(cl);
+  _hrs->iterate(&blk);
+}
+
+void G1CollectedHeap::object_iterate_since_last_GC(ObjectClosure* cl) {
+  // FIXME: is this right?
+  guarantee(false, "object_iterate_since_last_GC not supported by G1 heap");
+}
+
+// Calls a SpaceClosure on a HeapRegion.
+
+class SpaceClosureRegionClosure: public HeapRegionClosure {
+  SpaceClosure* _cl;
+public:
+  SpaceClosureRegionClosure(SpaceClosure* cl) : _cl(cl) {}
+  bool doHeapRegion(HeapRegion* r) {
+    _cl->do_space(r);
+    return false;
+  }
+};
+
+void G1CollectedHeap::space_iterate(SpaceClosure* cl) {
+  SpaceClosureRegionClosure blk(cl);
+  _hrs->iterate(&blk);
+}
+
+void G1CollectedHeap::heap_region_iterate(HeapRegionClosure* cl) {
+  _hrs->iterate(cl);
+}
+
+void G1CollectedHeap::heap_region_iterate_from(HeapRegion* r,
+                                               HeapRegionClosure* cl) {
+  _hrs->iterate_from(r, cl);
+}
+
+void
+G1CollectedHeap::heap_region_iterate_from(int idx, HeapRegionClosure* cl) {
+  _hrs->iterate_from(idx, cl);
+}
+
+HeapRegion* G1CollectedHeap::region_at(size_t idx) { return _hrs->at(idx); }
+
+const int OverpartitionFactor = 4;
+void
+G1CollectedHeap::heap_region_par_iterate_chunked(HeapRegionClosure* cl,
+                                                 int worker,
+                                                 jint claim_value) {
+  // We break up the heap regions into blocks of size ParallelGCThreads (to
+  // decrease iteration costs).
+  const size_t nregions = n_regions();
+  const size_t n_thrds = (ParallelGCThreads > 0 ? ParallelGCThreads : 1);
+  const size_t partitions = n_thrds * OverpartitionFactor;
+  const size_t BlkSize = MAX2(nregions/partitions, (size_t)1);
+  const size_t n_blocks = (nregions + BlkSize - 1)/BlkSize;
+  assert(ParallelGCThreads > 0 || worker == 0, "Precondition");
+  const int init_idx = (int) (n_blocks/n_thrds * worker);
+  for (size_t blk = 0; blk < n_blocks; blk++) {
+    size_t idx = init_idx + blk;
+    if (idx >= n_blocks) idx = idx - n_blocks;
+    size_t reg_idx = idx * BlkSize;
+    assert(reg_idx < nregions, "Because we rounded blk up.");
+    HeapRegion* r = region_at(reg_idx);
+    if (r->claimHeapRegion(claim_value)) {
+      for (size_t j = 0; j < BlkSize; j++) {
+        size_t reg_idx2 = reg_idx + j;
+        if (reg_idx2 == nregions) break;
+        HeapRegion* r2 = region_at(reg_idx2);
+        if (j > 0) r2->set_claim_value(claim_value);
+        bool res = cl->doHeapRegion(r2);
+        guarantee(!res, "Should not abort.");
+      }
+    }
+  }
+}
+
+void G1CollectedHeap::collection_set_iterate(HeapRegionClosure* cl) {
+  HeapRegion* r = g1_policy()->collection_set();
+  while (r != NULL) {
+    HeapRegion* next = r->next_in_collection_set();
+    if (cl->doHeapRegion(r)) {
+      cl->incomplete();
+      return;
+    }
+    r = next;
+  }
+}
+
+void G1CollectedHeap::collection_set_iterate_from(HeapRegion* r,
+                                                  HeapRegionClosure *cl) {
+  assert(r->in_collection_set(),
+         "Start region must be a member of the collection set.");
+  HeapRegion* cur = r;
+  while (cur != NULL) {
+    HeapRegion* next = cur->next_in_collection_set();
+    if (cl->doHeapRegion(cur) && false) {
+      cl->incomplete();
+      return;
+    }
+    cur = next;
+  }
+  cur = g1_policy()->collection_set();
+  while (cur != r) {
+    HeapRegion* next = cur->next_in_collection_set();
+    if (cl->doHeapRegion(cur) && false) {
+      cl->incomplete();
+      return;
+    }
+    cur = next;
+  }
+}
+
+CompactibleSpace* G1CollectedHeap::first_compactible_space() {
+  return _hrs->length() > 0 ? _hrs->at(0) : NULL;
+}
+
+
+Space* G1CollectedHeap::space_containing(const void* addr) const {
+  Space* res = heap_region_containing(addr);
+  if (res == NULL)
+    res = perm_gen()->space_containing(addr);
+  return res;
+}
+
+HeapWord* G1CollectedHeap::block_start(const void* addr) const {
+  Space* sp = space_containing(addr);
+  if (sp != NULL) {
+    return sp->block_start(addr);
+  }
+  return NULL;
+}
+
+size_t G1CollectedHeap::block_size(const HeapWord* addr) const {
+  Space* sp = space_containing(addr);
+  assert(sp != NULL, "block_size of address outside of heap");
+  return sp->block_size(addr);
+}
+
+bool G1CollectedHeap::block_is_obj(const HeapWord* addr) const {
+  Space* sp = space_containing(addr);
+  return sp->block_is_obj(addr);
+}
+
+bool G1CollectedHeap::supports_tlab_allocation() const {
+  return true;
+}
+
+size_t G1CollectedHeap::tlab_capacity(Thread* ignored) const {
+  return HeapRegion::GrainBytes;
+}
+
+size_t G1CollectedHeap::unsafe_max_tlab_alloc(Thread* ignored) const {
+  // Return the remaining space in the cur alloc region, but not less than
+  // the min TLAB size.
+  // Also, no more than half the region size, since we can't allow tlabs to
+  // grow big enough to accomodate humongous objects.
+
+  // We need to story it locally, since it might change between when we
+  // test for NULL and when we use it later.
+  ContiguousSpace* cur_alloc_space = _cur_alloc_region;
+  if (cur_alloc_space == NULL) {
+    return HeapRegion::GrainBytes/2;
+  } else {
+    return MAX2(MIN2(cur_alloc_space->free(),
+                     (size_t)(HeapRegion::GrainBytes/2)),
+                (size_t)MinTLABSize);
+  }
+}
+
+HeapWord* G1CollectedHeap::allocate_new_tlab(size_t size) {
+  bool dummy;
+  return G1CollectedHeap::mem_allocate(size, false, true, &dummy);
+}
+
+bool G1CollectedHeap::allocs_are_zero_filled() {
+  return false;
+}
+
+size_t G1CollectedHeap::large_typearray_limit() {
+  // FIXME
+  return HeapRegion::GrainBytes/HeapWordSize;
+}
+
+size_t G1CollectedHeap::max_capacity() const {
+  return _g1_committed.byte_size();
+}
+
+jlong G1CollectedHeap::millis_since_last_gc() {
+  // assert(false, "NYI");
+  return 0;
+}
+
+
+void G1CollectedHeap::prepare_for_verify() {
+  if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) {
+    ensure_parsability(false);
+  }
+  g1_rem_set()->prepare_for_verify();
+}
+
+class VerifyLivenessOopClosure: public OopClosure {
+  G1CollectedHeap* g1h;
+public:
+  VerifyLivenessOopClosure(G1CollectedHeap* _g1h) {
+    g1h = _g1h;
+  }
+  void do_oop(narrowOop *p) {
+    guarantee(false, "NYI");
+  }
+  void do_oop(oop *p) {
+    oop obj = *p;
+    assert(obj == NULL || !g1h->is_obj_dead(obj),
+           "Dead object referenced by a not dead object");
+  }
+};
+
+class VerifyObjsInRegionClosure: public ObjectClosure {
+  G1CollectedHeap* _g1h;
+  size_t _live_bytes;
+  HeapRegion *_hr;
+public:
+  VerifyObjsInRegionClosure(HeapRegion *hr) : _live_bytes(0), _hr(hr) {
+    _g1h = G1CollectedHeap::heap();
+  }
+  void do_object(oop o) {
+    VerifyLivenessOopClosure isLive(_g1h);
+    assert(o != NULL, "Huh?");
+    if (!_g1h->is_obj_dead(o)) {
+      o->oop_iterate(&isLive);
+      if (!_hr->obj_allocated_since_prev_marking(o))
+        _live_bytes += (o->size() * HeapWordSize);
+    }
+  }
+  size_t live_bytes() { return _live_bytes; }
+};
+
+class PrintObjsInRegionClosure : public ObjectClosure {
+  HeapRegion *_hr;
+  G1CollectedHeap *_g1;
+public:
+  PrintObjsInRegionClosure(HeapRegion *hr) : _hr(hr) {
+    _g1 = G1CollectedHeap::heap();
+  };
+
+  void do_object(oop o) {
+    if (o != NULL) {
+      HeapWord *start = (HeapWord *) o;
+      size_t word_sz = o->size();
+      gclog_or_tty->print("\nPrinting obj "PTR_FORMAT" of size " SIZE_FORMAT
+                          " isMarkedPrev %d isMarkedNext %d isAllocSince %d\n",
+                          (void*) o, word_sz,
+                          _g1->isMarkedPrev(o),
+                          _g1->isMarkedNext(o),
+                          _hr->obj_allocated_since_prev_marking(o));
+      HeapWord *end = start + word_sz;
+      HeapWord *cur;
+      int *val;
+      for (cur = start; cur < end; cur++) {
+        val = (int *) cur;
+        gclog_or_tty->print("\t "PTR_FORMAT":"PTR_FORMAT"\n", val, *val);
+      }
+    }
+  }
+};
+
+class VerifyRegionClosure: public HeapRegionClosure {
+public:
+  bool _allow_dirty;
+  VerifyRegionClosure(bool allow_dirty)
+    : _allow_dirty(allow_dirty) {}
+  bool doHeapRegion(HeapRegion* r) {
+    guarantee(r->claim_value() == 0, "Should be unclaimed at verify points.");
+    if (r->isHumongous()) {
+      if (r->startsHumongous()) {
+        // Verify the single H object.
+        oop(r->bottom())->verify();
+        size_t word_sz = oop(r->bottom())->size();
+        guarantee(r->top() == r->bottom() + word_sz,
+                  "Only one object in a humongous region");
+      }
+    } else {
+      VerifyObjsInRegionClosure not_dead_yet_cl(r);
+      r->verify(_allow_dirty);
+      r->object_iterate(&not_dead_yet_cl);
+      guarantee(r->max_live_bytes() >= not_dead_yet_cl.live_bytes(),
+                "More live objects than counted in last complete marking.");
+    }
+    return false;
+  }
+};
+
+class VerifyRootsClosure: public OopsInGenClosure {
+private:
+  G1CollectedHeap* _g1h;
+  bool             _failures;
+
+public:
+  VerifyRootsClosure() :
+    _g1h(G1CollectedHeap::heap()), _failures(false) { }
+
+  bool failures() { return _failures; }
+
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  void do_oop(oop* p) {
+    oop obj = *p;
+    if (obj != NULL) {
+      if (_g1h->is_obj_dead(obj)) {
+        gclog_or_tty->print_cr("Root location "PTR_FORMAT" "
+                               "points to dead obj "PTR_FORMAT, p, (void*) obj);
+        obj->print_on(gclog_or_tty);
+        _failures = true;
+      }
+    }
+  }
+};
+
+void G1CollectedHeap::verify(bool allow_dirty, bool silent) {
+  if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) {
+    if (!silent) { gclog_or_tty->print("roots "); }
+    VerifyRootsClosure rootsCl;
+    process_strong_roots(false,
+                         SharedHeap::SO_AllClasses,
+                         &rootsCl,
+                         &rootsCl);
+    rem_set()->invalidate(perm_gen()->used_region(), false);
+    if (!silent) { gclog_or_tty->print("heapRegions "); }
+    VerifyRegionClosure blk(allow_dirty);
+    _hrs->iterate(&blk);
+    if (!silent) gclog_or_tty->print("remset ");
+    rem_set()->verify();
+    guarantee(!rootsCl.failures(), "should not have had failures");
+  } else {
+    if (!silent) gclog_or_tty->print("(SKIPPING roots, heapRegions, remset) ");
+  }
+}
+
+class PrintRegionClosure: public HeapRegionClosure {
+  outputStream* _st;
+public:
+  PrintRegionClosure(outputStream* st) : _st(st) {}
+  bool doHeapRegion(HeapRegion* r) {
+    r->print_on(_st);
+    return false;
+  }
+};
+
+void G1CollectedHeap::print() const { print_on(gclog_or_tty); }
+
+void G1CollectedHeap::print_on(outputStream* st) const {
+  PrintRegionClosure blk(st);
+  _hrs->iterate(&blk);
+}
+
+void G1CollectedHeap::print_gc_threads_on(outputStream* st) const {
+  if (ParallelGCThreads > 0) {
+    workers()->print_worker_threads();
+  }
+  st->print("\"G1 concurrent mark GC Thread\" ");
+  _cmThread->print();
+  st->cr();
+  st->print("\"G1 concurrent refinement GC Thread\" ");
+  _cg1r->cg1rThread()->print_on(st);
+  st->cr();
+  st->print("\"G1 zero-fill GC Thread\" ");
+  _czft->print_on(st);
+  st->cr();
+}
+
+void G1CollectedHeap::gc_threads_do(ThreadClosure* tc) const {
+  if (ParallelGCThreads > 0) {
+    workers()->threads_do(tc);
+  }
+  tc->do_thread(_cmThread);
+  tc->do_thread(_cg1r->cg1rThread());
+  tc->do_thread(_czft);
+}
+
+void G1CollectedHeap::print_tracing_info() const {
+  concurrent_g1_refine()->print_final_card_counts();
+
+  // We'll overload this to mean "trace GC pause statistics."
+  if (TraceGen0Time || TraceGen1Time) {
+    // The "G1CollectorPolicy" is keeping track of these stats, so delegate
+    // to that.
+    g1_policy()->print_tracing_info();
+  }
+  if (SummarizeG1RSStats) {
+    g1_rem_set()->print_summary_info();
+  }
+  if (SummarizeG1ConcMark) {
+    concurrent_mark()->print_summary_info();
+  }
+  if (SummarizeG1ZFStats) {
+    ConcurrentZFThread::print_summary_info();
+  }
+  if (G1SummarizePopularity) {
+    print_popularity_summary_info();
+  }
+  g1_policy()->print_yg_surv_rate_info();
+
+  GCOverheadReporter::printGCOverhead();
+
+  SpecializationStats::print();
+}
+
+
+int G1CollectedHeap::addr_to_arena_id(void* addr) const {
+  HeapRegion* hr = heap_region_containing(addr);
+  if (hr == NULL) {
+    return 0;
+  } else {
+    return 1;
+  }
+}
+
+G1CollectedHeap* G1CollectedHeap::heap() {
+  assert(_sh->kind() == CollectedHeap::G1CollectedHeap,
+         "not a garbage-first heap");
+  return _g1h;
+}
+
+void G1CollectedHeap::gc_prologue(bool full /* Ignored */) {
+  if (PrintHeapAtGC){
+    gclog_or_tty->print_cr(" {Heap before GC collections=%d:", total_collections());
+    Universe::print();
+  }
+  assert(InlineCacheBuffer::is_empty(), "should have cleaned up ICBuffer");
+  // Call allocation profiler
+  AllocationProfiler::iterate_since_last_gc();
+  // Fill TLAB's and such
+  ensure_parsability(true);
+}
+
+void G1CollectedHeap::gc_epilogue(bool full /* Ignored */) {
+  // FIXME: what is this about?
+  // I'm ignoring the "fill_newgen()" call if "alloc_event_enabled"
+  // is set.
+  COMPILER2_PRESENT(assert(DerivedPointerTable::is_empty(),
+                        "derived pointer present"));
+
+  if (PrintHeapAtGC){
+    gclog_or_tty->print_cr(" Heap after GC collections=%d:", total_collections());
+    Universe::print();
+    gclog_or_tty->print("} ");
+  }
+}
+
+void G1CollectedHeap::do_collection_pause() {
+  // Read the GC count while holding the Heap_lock
+  // we need to do this _before_ wait_for_cleanup_complete(), to
+  // ensure that we do not give up the heap lock and potentially
+  // pick up the wrong count
+  int gc_count_before = SharedHeap::heap()->total_collections();
+
+  // Don't want to do a GC pause while cleanup is being completed!
+  wait_for_cleanup_complete();
+
+  g1_policy()->record_stop_world_start();
+  {
+    MutexUnlocker mu(Heap_lock);  // give up heap lock, execute gets it back
+    VM_G1IncCollectionPause op(gc_count_before);
+    VMThread::execute(&op);
+  }
+}
+
+void
+G1CollectedHeap::doConcurrentMark() {
+  if (G1ConcMark) {
+    MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
+    if (!_cmThread->in_progress()) {
+      _cmThread->set_started();
+      CGC_lock->notify();
+    }
+  }
+}
+
+class VerifyMarkedObjsClosure: public ObjectClosure {
+    G1CollectedHeap* _g1h;
+    public:
+    VerifyMarkedObjsClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
+    void do_object(oop obj) {
+      assert(obj->mark()->is_marked() ? !_g1h->is_obj_dead(obj) : true,
+             "markandsweep mark should agree with concurrent deadness");
+    }
+};
+
+void
+G1CollectedHeap::checkConcurrentMark() {
+    VerifyMarkedObjsClosure verifycl(this);
+    doConcurrentMark();
+    //    MutexLockerEx x(getMarkBitMapLock(),
+    //              Mutex::_no_safepoint_check_flag);
+    object_iterate(&verifycl);
+}
+
+void G1CollectedHeap::do_sync_mark() {
+  _cm->checkpointRootsInitial();
+  _cm->markFromRoots();
+  _cm->checkpointRootsFinal(false);
+}
+
+// <NEW PREDICTION>
+
+double G1CollectedHeap::predict_region_elapsed_time_ms(HeapRegion *hr,
+                                                       bool young) {
+  return _g1_policy->predict_region_elapsed_time_ms(hr, young);
+}
+
+void G1CollectedHeap::check_if_region_is_too_expensive(double
+                                                           predicted_time_ms) {
+  _g1_policy->check_if_region_is_too_expensive(predicted_time_ms);
+}
+
+size_t G1CollectedHeap::pending_card_num() {
+  size_t extra_cards = 0;
+  JavaThread *curr = Threads::first();
+  while (curr != NULL) {
+    DirtyCardQueue& dcq = curr->dirty_card_queue();
+    extra_cards += dcq.size();
+    curr = curr->next();
+  }
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  size_t buffer_size = dcqs.buffer_size();
+  size_t buffer_num = dcqs.completed_buffers_num();
+  return buffer_size * buffer_num + extra_cards;
+}
+
+size_t G1CollectedHeap::max_pending_card_num() {
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  size_t buffer_size = dcqs.buffer_size();
+  size_t buffer_num  = dcqs.completed_buffers_num();
+  int thread_num  = Threads::number_of_threads();
+  return (buffer_num + thread_num) * buffer_size;
+}
+
+size_t G1CollectedHeap::cards_scanned() {
+  HRInto_G1RemSet* g1_rset = (HRInto_G1RemSet*) g1_rem_set();
+  return g1_rset->cardsScanned();
+}
+
+void
+G1CollectedHeap::setup_surviving_young_words() {
+  guarantee( _surviving_young_words == NULL, "pre-condition" );
+  size_t array_length = g1_policy()->young_cset_length();
+  _surviving_young_words = NEW_C_HEAP_ARRAY(size_t, array_length);
+  if (_surviving_young_words == NULL) {
+    vm_exit_out_of_memory(sizeof(size_t) * array_length,
+                          "Not enough space for young surv words summary.");
+  }
+  memset(_surviving_young_words, 0, array_length * sizeof(size_t));
+  for (size_t i = 0;  i < array_length; ++i) {
+    guarantee( _surviving_young_words[i] == 0, "invariant" );
+  }
+}
+
+void
+G1CollectedHeap::update_surviving_young_words(size_t* surv_young_words) {
+  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+  size_t array_length = g1_policy()->young_cset_length();
+  for (size_t i = 0; i < array_length; ++i)
+    _surviving_young_words[i] += surv_young_words[i];
+}
+
+void
+G1CollectedHeap::cleanup_surviving_young_words() {
+  guarantee( _surviving_young_words != NULL, "pre-condition" );
+  FREE_C_HEAP_ARRAY(size_t, _surviving_young_words);
+  _surviving_young_words = NULL;
+}
+
+// </NEW PREDICTION>
+
+void
+G1CollectedHeap::do_collection_pause_at_safepoint(HeapRegion* popular_region) {
+  char verbose_str[128];
+  sprintf(verbose_str, "GC pause ");
+  if (popular_region != NULL)
+    strcat(verbose_str, "(popular)");
+  else if (g1_policy()->in_young_gc_mode()) {
+    if (g1_policy()->full_young_gcs())
+      strcat(verbose_str, "(young)");
+    else
+      strcat(verbose_str, "(partial)");
+  }
+  bool reset_should_initiate_conc_mark = false;
+  if (popular_region != NULL && g1_policy()->should_initiate_conc_mark()) {
+    // we currently do not allow an initial mark phase to be piggy-backed
+    // on a popular pause
+    reset_should_initiate_conc_mark = true;
+    g1_policy()->unset_should_initiate_conc_mark();
+  }
+  if (g1_policy()->should_initiate_conc_mark())
+    strcat(verbose_str, " (initial-mark)");
+
+  GCCauseSetter x(this, (popular_region == NULL ?
+                         GCCause::_g1_inc_collection_pause :
+                         GCCause::_g1_pop_region_collection_pause));
+
+  // if PrintGCDetails is on, we'll print long statistics information
+  // in the collector policy code, so let's not print this as the output
+  // is messy if we do.
+  gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
+  TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
+  TraceTime t(verbose_str, PrintGC && !PrintGCDetails, true, gclog_or_tty);
+
+  ResourceMark rm;
+  assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint");
+  assert(Thread::current() == VMThread::vm_thread(), "should be in vm thread");
+  guarantee(!is_gc_active(), "collection is not reentrant");
+  assert(regions_accounted_for(), "Region leakage!");
+  ++_gc_time_stamp;
+
+  if (g1_policy()->in_young_gc_mode()) {
+    assert(check_young_list_well_formed(),
+                "young list should be well formed");
+  }
+
+  if (GC_locker::is_active()) {
+    return; // GC is disabled (e.g. JNI GetXXXCritical operation)
+  }
+
+  bool abandoned = false;
+  { // Call to jvmpi::post_class_unload_events must occur outside of active GC
+    IsGCActiveMark x;
+
+    gc_prologue(false);
+    increment_total_collections();
+
+#if G1_REM_SET_LOGGING
+    gclog_or_tty->print_cr("\nJust chose CS, heap:");
+    print();
+#endif
+
+    if (VerifyBeforeGC && total_collections() >= VerifyGCStartAt) {
+      HandleMark hm;  // Discard invalid handles created during verification
+      prepare_for_verify();
+      gclog_or_tty->print(" VerifyBeforeGC:");
+      Universe::verify(false);
+    }
+
+    COMPILER2_PRESENT(DerivedPointerTable::clear());
+
+    // We want to turn off ref discovere, if necessary, and turn it back on
+    // on again later if we do.
+    bool was_enabled = ref_processor()->discovery_enabled();
+    if (was_enabled) ref_processor()->disable_discovery();
+
+    // Forget the current alloc region (we might even choose it to be part
+    // of the collection set!).
+    abandon_cur_alloc_region();
+
+    // The elapsed time induced by the start time below deliberately elides
+    // the possible verification above.
+    double start_time_sec = os::elapsedTime();
+    GCOverheadReporter::recordSTWStart(start_time_sec);
+    size_t start_used_bytes = used();
+    if (!G1ConcMark) {
+      do_sync_mark();
+    }
+
+    g1_policy()->record_collection_pause_start(start_time_sec,
+                                               start_used_bytes);
+
+#if SCAN_ONLY_VERBOSE
+    _young_list->print();
+#endif // SCAN_ONLY_VERBOSE
+
+    if (g1_policy()->should_initiate_conc_mark()) {
+      concurrent_mark()->checkpointRootsInitialPre();
+    }
+    save_marks();
+
+    // We must do this before any possible evacuation that should propogate
+    // marks, including evacuation of popular objects in a popular pause.
+    if (mark_in_progress()) {
+      double start_time_sec = os::elapsedTime();
+
+      _cm->drainAllSATBBuffers();
+      double finish_mark_ms = (os::elapsedTime() - start_time_sec) * 1000.0;
+      g1_policy()->record_satb_drain_time(finish_mark_ms);
+
+    }
+    // Record the number of elements currently on the mark stack, so we
+    // only iterate over these.  (Since evacuation may add to the mark
+    // stack, doing more exposes race conditions.)  If no mark is in
+    // progress, this will be zero.
+    _cm->set_oops_do_bound();
+
+    assert(regions_accounted_for(), "Region leakage.");
+
+    bool abandoned = false;
+
+    if (mark_in_progress())
+      concurrent_mark()->newCSet();
+
+    // Now choose the CS.
+    if (popular_region == NULL) {
+      g1_policy()->choose_collection_set();
+    } else {
+      // We may be evacuating a single region (for popularity).
+      g1_policy()->record_popular_pause_preamble_start();
+      popularity_pause_preamble(popular_region);
+      g1_policy()->record_popular_pause_preamble_end();
+      abandoned = (g1_policy()->collection_set() == NULL);
+      // Now we allow more regions to be added (we have to collect
+      // all popular regions).
+      if (!abandoned) {
+        g1_policy()->choose_collection_set(popular_region);
+      }
+    }
+    // We may abandon a pause if we find no region that will fit in the MMU
+    // pause.
+    abandoned = (g1_policy()->collection_set() == NULL);
+
+    // Nothing to do if we were unable to choose a collection set.
+    if (!abandoned) {
+#if G1_REM_SET_LOGGING
+      gclog_or_tty->print_cr("\nAfter pause, heap:");
+      print();
+#endif
+
+      setup_surviving_young_words();
+
+      // Set up the gc allocation regions.
+      get_gc_alloc_regions();
+
+      // Actually do the work...
+      evacuate_collection_set();
+      free_collection_set(g1_policy()->collection_set());
+      g1_policy()->clear_collection_set();
+
+      if (popular_region != NULL) {
+        // We have to wait until now, because we don't want the region to
+        // be rescheduled for pop-evac during RS update.
+        popular_region->set_popular_pending(false);
+      }
+
+      release_gc_alloc_regions();
+
+      cleanup_surviving_young_words();
+
+      if (g1_policy()->in_young_gc_mode()) {
+        _young_list->reset_sampled_info();
+        assert(check_young_list_empty(true),
+               "young list should be empty");
+
+#if SCAN_ONLY_VERBOSE
+        _young_list->print();
+#endif // SCAN_ONLY_VERBOSE
+
+        _young_list->reset_auxilary_lists();
+      }
+    } else {
+      COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
+    }
+
+    if (evacuation_failed()) {
+      _summary_bytes_used = recalculate_used();
+    } else {
+      // The "used" of the the collection set have already been subtracted
+      // when they were freed.  Add in the bytes evacuated.
+      _summary_bytes_used += g1_policy()->bytes_in_to_space();
+    }
+
+    if (g1_policy()->in_young_gc_mode() &&
+        g1_policy()->should_initiate_conc_mark()) {
+      concurrent_mark()->checkpointRootsInitialPost();
+      set_marking_started();
+      doConcurrentMark();
+    }
+
+#if SCAN_ONLY_VERBOSE
+    _young_list->print();
+#endif // SCAN_ONLY_VERBOSE
+
+    double end_time_sec = os::elapsedTime();
+    g1_policy()->record_pause_time((end_time_sec - start_time_sec)*1000.0);
+    GCOverheadReporter::recordSTWEnd(end_time_sec);
+    g1_policy()->record_collection_pause_end(popular_region != NULL,
+                                             abandoned);
+
+    assert(regions_accounted_for(), "Region leakage.");
+
+    if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) {
+      HandleMark hm;  // Discard invalid handles created during verification
+      gclog_or_tty->print(" VerifyAfterGC:");
+      Universe::verify(false);
+    }
+
+    if (was_enabled) ref_processor()->enable_discovery();
+
+    {
+      size_t expand_bytes = g1_policy()->expansion_amount();
+      if (expand_bytes > 0) {
+        size_t bytes_before = capacity();
+        expand(expand_bytes);
+      }
+    }
+
+    if (mark_in_progress())
+      concurrent_mark()->update_g1_committed();
+
+    gc_epilogue(false);
+  }
+
+  assert(verify_region_lists(), "Bad region lists.");
+
+  if (reset_should_initiate_conc_mark)
+    g1_policy()->set_should_initiate_conc_mark();
+
+  if (ExitAfterGCNum > 0 && total_collections() == ExitAfterGCNum) {
+    gclog_or_tty->print_cr("Stopping after GC #%d", ExitAfterGCNum);
+    print_tracing_info();
+    vm_exit(-1);
+  }
+}
+
+void G1CollectedHeap::set_gc_alloc_region(int purpose, HeapRegion* r) {
+  assert(purpose >= 0 && purpose < GCAllocPurposeCount, "invalid purpose");
+  HeapWord* original_top = NULL;
+  if (r != NULL)
+    original_top = r->top();
+
+  // We will want to record the used space in r as being there before gc.
+  // One we install it as a GC alloc region it's eligible for allocation.
+  // So record it now and use it later.
+  size_t r_used = 0;
+  if (r != NULL) {
+    r_used = r->used();
+
+    if (ParallelGCThreads > 0) {
+      // need to take the lock to guard against two threads calling
+      // get_gc_alloc_region concurrently (very unlikely but...)
+      MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+      r->save_marks();
+    }
+  }
+  HeapRegion* old_alloc_region = _gc_alloc_regions[purpose];
+  _gc_alloc_regions[purpose] = r;
+  if (old_alloc_region != NULL) {
+    // Replace aliases too.
+    for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+      if (_gc_alloc_regions[ap] == old_alloc_region) {
+        _gc_alloc_regions[ap] = r;
+      }
+    }
+  }
+  if (r != NULL) {
+    push_gc_alloc_region(r);
+    if (mark_in_progress() && original_top != r->next_top_at_mark_start()) {
+      // We are using a region as a GC alloc region after it has been used
+      // as a mutator allocation region during the current marking cycle.
+      // The mutator-allocated objects are currently implicitly marked, but
+      // when we move hr->next_top_at_mark_start() forward at the the end
+      // of the GC pause, they won't be.  We therefore mark all objects in
+      // the "gap".  We do this object-by-object, since marking densely
+      // does not currently work right with marking bitmap iteration.  This
+      // means we rely on TLAB filling at the start of pauses, and no
+      // "resuscitation" of filled TLAB's.  If we want to do this, we need
+      // to fix the marking bitmap iteration.
+      HeapWord* curhw = r->next_top_at_mark_start();
+      HeapWord* t = original_top;
+
+      while (curhw < t) {
+        oop cur = (oop)curhw;
+        // We'll assume parallel for generality.  This is rare code.
+        concurrent_mark()->markAndGrayObjectIfNecessary(cur); // can't we just mark them?
+        curhw = curhw + cur->size();
+      }
+      assert(curhw == t, "Should have parsed correctly.");
+    }
+    if (G1PolicyVerbose > 1) {
+      gclog_or_tty->print("New alloc region ["PTR_FORMAT", "PTR_FORMAT", " PTR_FORMAT") "
+                          "for survivors:", r->bottom(), original_top, r->end());
+      r->print();
+    }
+    g1_policy()->record_before_bytes(r_used);
+  }
+}
+
+void G1CollectedHeap::push_gc_alloc_region(HeapRegion* hr) {
+  assert(Thread::current()->is_VM_thread() ||
+         par_alloc_during_gc_lock()->owned_by_self(), "Precondition");
+  assert(!hr->is_gc_alloc_region() && !hr->in_collection_set(),
+         "Precondition.");
+  hr->set_is_gc_alloc_region(true);
+  hr->set_next_gc_alloc_region(_gc_alloc_region_list);
+  _gc_alloc_region_list = hr;
+}
+
+#ifdef G1_DEBUG
+class FindGCAllocRegion: public HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->is_gc_alloc_region()) {
+      gclog_or_tty->print_cr("Region %d ["PTR_FORMAT"...] is still a gc_alloc_region.",
+                             r->hrs_index(), r->bottom());
+    }
+    return false;
+  }
+};
+#endif // G1_DEBUG
+
+void G1CollectedHeap::forget_alloc_region_list() {
+  assert(Thread::current()->is_VM_thread(), "Precondition");
+  while (_gc_alloc_region_list != NULL) {
+    HeapRegion* r = _gc_alloc_region_list;
+    assert(r->is_gc_alloc_region(), "Invariant.");
+    _gc_alloc_region_list = r->next_gc_alloc_region();
+    r->set_next_gc_alloc_region(NULL);
+    r->set_is_gc_alloc_region(false);
+    if (r->is_empty()) {
+      ++_free_regions;
+    }
+  }
+#ifdef G1_DEBUG
+  FindGCAllocRegion fa;
+  heap_region_iterate(&fa);
+#endif // G1_DEBUG
+}
+
+
+bool G1CollectedHeap::check_gc_alloc_regions() {
+  // TODO: allocation regions check
+  return true;
+}
+
+void G1CollectedHeap::get_gc_alloc_regions() {
+  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+    // Create new GC alloc regions.
+    HeapRegion* alloc_region = _gc_alloc_regions[ap];
+    // Clear this alloc region, so that in case it turns out to be
+    // unacceptable, we end up with no allocation region, rather than a bad
+    // one.
+    _gc_alloc_regions[ap] = NULL;
+    if (alloc_region == NULL || alloc_region->in_collection_set()) {
+      // Can't re-use old one.  Allocate a new one.
+      alloc_region = newAllocRegionWithExpansion(ap, 0);
+    }
+    if (alloc_region != NULL) {
+      set_gc_alloc_region(ap, alloc_region);
+    }
+  }
+  // Set alternative regions for allocation purposes that have reached
+  // thier limit.
+  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+    GCAllocPurpose alt_purpose = g1_policy()->alternative_purpose(ap);
+    if (_gc_alloc_regions[ap] == NULL && alt_purpose != ap) {
+      _gc_alloc_regions[ap] = _gc_alloc_regions[alt_purpose];
+    }
+  }
+  assert(check_gc_alloc_regions(), "alloc regions messed up");
+}
+
+void G1CollectedHeap::release_gc_alloc_regions() {
+  // We keep a separate list of all regions that have been alloc regions in
+  // the current collection pause.  Forget that now.
+  forget_alloc_region_list();
+
+  // The current alloc regions contain objs that have survived
+  // collection. Make them no longer GC alloc regions.
+  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+    HeapRegion* r = _gc_alloc_regions[ap];
+    if (r != NULL && r->is_empty()) {
+      {
+        MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+        r->set_zero_fill_complete();
+        put_free_region_on_list_locked(r);
+      }
+    }
+    // set_gc_alloc_region will also NULLify all aliases to the region
+    set_gc_alloc_region(ap, NULL);
+    _gc_alloc_region_counts[ap] = 0;
+  }
+}
+
+void G1CollectedHeap::init_for_evac_failure(OopsInHeapRegionClosure* cl) {
+  _drain_in_progress = false;
+  set_evac_failure_closure(cl);
+  _evac_failure_scan_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(40, true);
+}
+
+void G1CollectedHeap::finalize_for_evac_failure() {
+  assert(_evac_failure_scan_stack != NULL &&
+         _evac_failure_scan_stack->length() == 0,
+         "Postcondition");
+  assert(!_drain_in_progress, "Postcondition");
+  // Don't have to delete, since the scan stack is a resource object.
+  _evac_failure_scan_stack = NULL;
+}
+
+
+
+// *** Sequential G1 Evacuation
+
+HeapWord* G1CollectedHeap::allocate_during_gc(GCAllocPurpose purpose, size_t word_size) {
+  HeapRegion* alloc_region = _gc_alloc_regions[purpose];
+  // let the caller handle alloc failure
+  if (alloc_region == NULL) return NULL;
+  assert(isHumongous(word_size) || !alloc_region->isHumongous(),
+         "Either the object is humongous or the region isn't");
+  HeapWord* block = alloc_region->allocate(word_size);
+  if (block == NULL) {
+    block = allocate_during_gc_slow(purpose, alloc_region, false, word_size);
+  }
+  return block;
+}
+
+class G1IsAliveClosure: public BoolObjectClosure {
+  G1CollectedHeap* _g1;
+public:
+  G1IsAliveClosure(G1CollectedHeap* g1) : _g1(g1) {}
+  void do_object(oop p) { assert(false, "Do not call."); }
+  bool do_object_b(oop p) {
+    // It is reachable if it is outside the collection set, or is inside
+    // and forwarded.
+
+#ifdef G1_DEBUG
+    gclog_or_tty->print_cr("is alive "PTR_FORMAT" in CS %d forwarded %d overall %d",
+                           (void*) p, _g1->obj_in_cs(p), p->is_forwarded(),
+                           !_g1->obj_in_cs(p) || p->is_forwarded());
+#endif // G1_DEBUG
+
+    return !_g1->obj_in_cs(p) || p->is_forwarded();
+  }
+};
+
+class G1KeepAliveClosure: public OopClosure {
+  G1CollectedHeap* _g1;
+public:
+  G1KeepAliveClosure(G1CollectedHeap* g1) : _g1(g1) {}
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  void do_oop(oop* p) {
+    oop obj = *p;
+#ifdef G1_DEBUG
+    if (PrintGC && Verbose) {
+      gclog_or_tty->print_cr("keep alive *"PTR_FORMAT" = "PTR_FORMAT" "PTR_FORMAT,
+                             p, (void*) obj, (void*) *p);
+    }
+#endif // G1_DEBUG
+
+    if (_g1->obj_in_cs(obj)) {
+      assert( obj->is_forwarded(), "invariant" );
+      *p = obj->forwardee();
+
+#ifdef G1_DEBUG
+      gclog_or_tty->print_cr("     in CSet: moved "PTR_FORMAT" -> "PTR_FORMAT,
+                             (void*) obj, (void*) *p);
+#endif // G1_DEBUG
+    }
+  }
+};
+
+class RecreateRSetEntriesClosure: public OopClosure {
+private:
+  G1CollectedHeap* _g1;
+  G1RemSet* _g1_rem_set;
+  HeapRegion* _from;
+public:
+  RecreateRSetEntriesClosure(G1CollectedHeap* g1, HeapRegion* from) :
+    _g1(g1), _g1_rem_set(g1->g1_rem_set()), _from(from)
+  {}
+
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  void do_oop(oop* p) {
+    assert(_from->is_in_reserved(p), "paranoia");
+    if (*p != NULL) {
+      _g1_rem_set->write_ref(_from, p);
+    }
+  }
+};
+
+class RemoveSelfPointerClosure: public ObjectClosure {
+private:
+  G1CollectedHeap* _g1;
+  ConcurrentMark* _cm;
+  HeapRegion* _hr;
+  HeapWord* _last_self_forwarded_end;
+  size_t _prev_marked_bytes;
+  size_t _next_marked_bytes;
+public:
+  RemoveSelfPointerClosure(G1CollectedHeap* g1, HeapRegion* hr) :
+    _g1(g1), _cm(_g1->concurrent_mark()), _hr(hr),
+    _last_self_forwarded_end(_hr->bottom()),
+    _prev_marked_bytes(0), _next_marked_bytes(0)
+  {}
+
+  size_t prev_marked_bytes() { return _prev_marked_bytes; }
+  size_t next_marked_bytes() { return _next_marked_bytes; }
+
+  void fill_remainder() {
+    HeapWord* limit = _hr->top();
+    MemRegion mr(_last_self_forwarded_end, limit);
+    if (!mr.is_empty()) {
+      SharedHeap::fill_region_with_object(mr);
+      _cm->clearRangeBothMaps(mr);
+      _hr->declare_filled_region_to_BOT(mr);
+    }
+  }
+
+  void do_object(oop obj) {
+    if (obj->is_forwarded()) {
+      if (obj->forwardee() == obj) {
+        assert(!_g1->is_obj_dead(obj), "We should not be preserving dead objs.");
+        _cm->markPrev(obj);
+        assert(_cm->isPrevMarked(obj), "Should be marked!");
+        _prev_marked_bytes += (obj->size() * HeapWordSize);
+        if (_g1->mark_in_progress() && !_g1->is_obj_ill(obj)) {
+          _cm->markAndGrayObjectIfNecessary(obj);
+        }
+        HeapWord* obj_start = (HeapWord*)obj;
+        if (obj_start > _last_self_forwarded_end) {
+          MemRegion mr(_last_self_forwarded_end, obj_start);
+          SharedHeap::fill_region_with_object(mr);
+          assert(_cm->isPrevMarked(obj), "Should be marked!");
+          _cm->clearRangeBothMaps(mr);
+          assert(_cm->isPrevMarked(obj), "Should be marked!");
+          _hr->declare_filled_region_to_BOT(mr);
+        }
+        _last_self_forwarded_end = obj_start + obj->size();
+        obj->set_mark(markOopDesc::prototype());
+
+        // While we were processing RSet buffers during the
+        // collection, we actually didn't scan any cards on the
+        // collection set, since we didn't want to update remebered
+        // sets with entries that point into the collection set, given
+        // that live objects fromthe collection set are about to move
+        // and such entries will be stale very soon. This change also
+        // dealt with a reliability issue which involved scanning a
+        // card in the collection set and coming across an array that
+        // was being chunked and looking malformed. The problem is
+        // that, if evacuation fails, we might have remembered set
+        // entries missing given that we skipped cards on the
+        // collection set. So, we'll recreate such entries now.
+        RecreateRSetEntriesClosure cl(_g1, _hr);
+        obj->oop_iterate(&cl);
+
+        assert(_cm->isPrevMarked(obj), "Should be marked!");
+      }
+    }
+  }
+};
+
+void G1CollectedHeap::remove_self_forwarding_pointers() {
+  HeapRegion* cur = g1_policy()->collection_set();
+
+  while (cur != NULL) {
+    assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
+
+    if (cur->evacuation_failed()) {
+      RemoveSelfPointerClosure rspc(_g1h, cur);
+      assert(cur->in_collection_set(), "bad CS");
+      cur->object_iterate(&rspc);
+      rspc.fill_remainder();
+
+      // A number of manipulations to make the TAMS be the current top,
+      // and the marked bytes be the ones observed in the iteration.
+      if (_g1h->concurrent_mark()->at_least_one_mark_complete()) {
+        // The comments below are the postconditions achieved by the
+        // calls.  Note especially the last such condition, which says that
+        // the count of marked bytes has been properly restored.
+        cur->note_start_of_marking(false);
+        // _next_top_at_mark_start == top, _next_marked_bytes == 0
+        cur->add_to_marked_bytes(rspc.prev_marked_bytes());
+        // _next_marked_bytes == prev_marked_bytes.
+        cur->note_end_of_marking();
+        // _prev_top_at_mark_start == top(),
+        // _prev_marked_bytes == prev_marked_bytes
+      }
+      // If there is no mark in progress, we modified the _next variables
+      // above needlessly, but harmlessly.
+      if (_g1h->mark_in_progress()) {
+        cur->note_start_of_marking(false);
+        // _next_top_at_mark_start == top, _next_marked_bytes == 0
+        // _next_marked_bytes == next_marked_bytes.
+      }
+
+      // Now make sure the region has the right index in the sorted array.
+      g1_policy()->note_change_in_marked_bytes(cur);
+    }
+    cur = cur->next_in_collection_set();
+  }
+  assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
+
+  // Now restore saved marks, if any.
+  if (_objs_with_preserved_marks != NULL) {
+    assert(_preserved_marks_of_objs != NULL, "Both or none.");
+    assert(_objs_with_preserved_marks->length() ==
+           _preserved_marks_of_objs->length(), "Both or none.");
+    guarantee(_objs_with_preserved_marks->length() ==
+              _preserved_marks_of_objs->length(), "Both or none.");
+    for (int i = 0; i < _objs_with_preserved_marks->length(); i++) {
+      oop obj   = _objs_with_preserved_marks->at(i);
+      markOop m = _preserved_marks_of_objs->at(i);
+      obj->set_mark(m);
+    }
+    // Delete the preserved marks growable arrays (allocated on the C heap).
+    delete _objs_with_preserved_marks;
+    delete _preserved_marks_of_objs;
+    _objs_with_preserved_marks = NULL;
+    _preserved_marks_of_objs = NULL;
+  }
+}
+
+void G1CollectedHeap::push_on_evac_failure_scan_stack(oop obj) {
+  _evac_failure_scan_stack->push(obj);
+}
+
+void G1CollectedHeap::drain_evac_failure_scan_stack() {
+  assert(_evac_failure_scan_stack != NULL, "precondition");
+
+  while (_evac_failure_scan_stack->length() > 0) {
+     oop obj = _evac_failure_scan_stack->pop();
+     _evac_failure_closure->set_region(heap_region_containing(obj));
+     obj->oop_iterate_backwards(_evac_failure_closure);
+  }
+}
+
+void G1CollectedHeap::handle_evacuation_failure(oop old) {
+  markOop m = old->mark();
+  // forward to self
+  assert(!old->is_forwarded(), "precondition");
+
+  old->forward_to(old);
+  handle_evacuation_failure_common(old, m);
+}
+
+oop
+G1CollectedHeap::handle_evacuation_failure_par(OopsInHeapRegionClosure* cl,
+                                               oop old) {
+  markOop m = old->mark();
+  oop forward_ptr = old->forward_to_atomic(old);
+  if (forward_ptr == NULL) {
+    // Forward-to-self succeeded.
+    if (_evac_failure_closure != cl) {
+      MutexLockerEx x(EvacFailureStack_lock, Mutex::_no_safepoint_check_flag);
+      assert(!_drain_in_progress,
+             "Should only be true while someone holds the lock.");
+      // Set the global evac-failure closure to the current thread's.
+      assert(_evac_failure_closure == NULL, "Or locking has failed.");
+      set_evac_failure_closure(cl);
+      // Now do the common part.
+      handle_evacuation_failure_common(old, m);
+      // Reset to NULL.
+      set_evac_failure_closure(NULL);
+    } else {
+      // The lock is already held, and this is recursive.
+      assert(_drain_in_progress, "This should only be the recursive case.");
+      handle_evacuation_failure_common(old, m);
+    }
+    return old;
+  } else {
+    // Someone else had a place to copy it.
+    return forward_ptr;
+  }
+}
+
+void G1CollectedHeap::handle_evacuation_failure_common(oop old, markOop m) {
+  set_evacuation_failed(true);
+
+  preserve_mark_if_necessary(old, m);
+
+  HeapRegion* r = heap_region_containing(old);
+  if (!r->evacuation_failed()) {
+    r->set_evacuation_failed(true);
+    if (G1TraceRegions) {
+      gclog_or_tty->print("evacuation failed in heap region "PTR_FORMAT" "
+                          "["PTR_FORMAT","PTR_FORMAT")\n",
+                          r, r->bottom(), r->end());
+    }
+  }
+
+  push_on_evac_failure_scan_stack(old);
+
+  if (!_drain_in_progress) {
+    // prevent recursion in copy_to_survivor_space()
+    _drain_in_progress = true;
+    drain_evac_failure_scan_stack();
+    _drain_in_progress = false;
+  }
+}
+
+void G1CollectedHeap::preserve_mark_if_necessary(oop obj, markOop m) {
+  if (m != markOopDesc::prototype()) {
+    if (_objs_with_preserved_marks == NULL) {
+      assert(_preserved_marks_of_objs == NULL, "Both or none.");
+      _objs_with_preserved_marks =
+        new (ResourceObj::C_HEAP) GrowableArray<oop>(40, true);
+      _preserved_marks_of_objs =
+        new (ResourceObj::C_HEAP) GrowableArray<markOop>(40, true);
+    }
+    _objs_with_preserved_marks->push(obj);
+    _preserved_marks_of_objs->push(m);
+  }
+}
+
+// *** Parallel G1 Evacuation
+
+HeapWord* G1CollectedHeap::par_allocate_during_gc(GCAllocPurpose purpose,
+                                                  size_t word_size) {
+  HeapRegion* alloc_region = _gc_alloc_regions[purpose];
+  // let the caller handle alloc failure
+  if (alloc_region == NULL) return NULL;
+
+  HeapWord* block = alloc_region->par_allocate(word_size);
+  if (block == NULL) {
+    MutexLockerEx x(par_alloc_during_gc_lock(),
+                    Mutex::_no_safepoint_check_flag);
+    block = allocate_during_gc_slow(purpose, alloc_region, true, word_size);
+  }
+  return block;
+}
+
+HeapWord*
+G1CollectedHeap::allocate_during_gc_slow(GCAllocPurpose purpose,
+                                         HeapRegion*    alloc_region,
+                                         bool           par,
+                                         size_t         word_size) {
+  HeapWord* block = NULL;
+  // In the parallel case, a previous thread to obtain the lock may have
+  // already assigned a new gc_alloc_region.
+  if (alloc_region != _gc_alloc_regions[purpose]) {
+    assert(par, "But should only happen in parallel case.");
+    alloc_region = _gc_alloc_regions[purpose];
+    if (alloc_region == NULL) return NULL;
+    block = alloc_region->par_allocate(word_size);
+    if (block != NULL) return block;
+    // Otherwise, continue; this new region is empty, too.
+  }
+  assert(alloc_region != NULL, "We better have an allocation region");
+  // Another thread might have obtained alloc_region for the given
+  // purpose, and might be attempting to allocate in it, and might
+  // succeed.  Therefore, we can't do the "finalization" stuff on the
+  // region below until we're sure the last allocation has happened.
+  // We ensure this by allocating the remaining space with a garbage
+  // object.
+  if (par) par_allocate_remaining_space(alloc_region);
+  // Now we can do the post-GC stuff on the region.
+  alloc_region->note_end_of_copying();
+  g1_policy()->record_after_bytes(alloc_region->used());
+
+  if (_gc_alloc_region_counts[purpose] >= g1_policy()->max_regions(purpose)) {
+    // Cannot allocate more regions for the given purpose.
+    GCAllocPurpose alt_purpose = g1_policy()->alternative_purpose(purpose);
+    // Is there an alternative?
+    if (purpose != alt_purpose) {
+      HeapRegion* alt_region = _gc_alloc_regions[alt_purpose];
+      // Has not the alternative region been aliased?
+      if (alloc_region != alt_region) {
+        // Try to allocate in the alternative region.
+        if (par) {
+          block = alt_region->par_allocate(word_size);
+        } else {
+          block = alt_region->allocate(word_size);
+        }
+        // Make an alias.
+        _gc_alloc_regions[purpose] = _gc_alloc_regions[alt_purpose];
+      }
+      if (block != NULL) {
+        return block;
+      }
+      // Both the allocation region and the alternative one are full
+      // and aliased, replace them with a new allocation region.
+      purpose = alt_purpose;
+    } else {
+      set_gc_alloc_region(purpose, NULL);
+      return NULL;
+    }
+  }
+
+  // Now allocate a new region for allocation.
+  alloc_region = newAllocRegionWithExpansion(purpose, word_size, false /*zero_filled*/);
+
+  // let the caller handle alloc failure
+  if (alloc_region != NULL) {
+
+    assert(check_gc_alloc_regions(), "alloc regions messed up");
+    assert(alloc_region->saved_mark_at_top(),
+           "Mark should have been saved already.");
+    // We used to assert that the region was zero-filled here, but no
+    // longer.
+
+    // This must be done last: once it's installed, other regions may
+    // allocate in it (without holding the lock.)
+    set_gc_alloc_region(purpose, alloc_region);
+
+    if (par) {
+      block = alloc_region->par_allocate(word_size);
+    } else {
+      block = alloc_region->allocate(word_size);
+    }
+    // Caller handles alloc failure.
+  } else {
+    // This sets other apis using the same old alloc region to NULL, also.
+    set_gc_alloc_region(purpose, NULL);
+  }
+  return block;  // May be NULL.
+}
+
+void G1CollectedHeap::par_allocate_remaining_space(HeapRegion* r) {
+  HeapWord* block = NULL;
+  size_t free_words;
+  do {
+    free_words = r->free()/HeapWordSize;
+    // If there's too little space, no one can allocate, so we're done.
+    if (free_words < (size_t)oopDesc::header_size()) return;
+    // Otherwise, try to claim it.
+    block = r->par_allocate(free_words);
+  } while (block == NULL);
+  SharedHeap::fill_region_with_object(MemRegion(block, free_words));
+}
+
+#define use_local_bitmaps         1
+#define verify_local_bitmaps      0
+
+#ifndef PRODUCT
+
+class GCLabBitMap;
+class GCLabBitMapClosure: public BitMapClosure {
+private:
+  ConcurrentMark* _cm;
+  GCLabBitMap*    _bitmap;
+
+public:
+  GCLabBitMapClosure(ConcurrentMark* cm,
+                     GCLabBitMap* bitmap) {
+    _cm     = cm;
+    _bitmap = bitmap;
+  }
+
+  virtual bool do_bit(size_t offset);
+};
+
+#endif // PRODUCT
+
+#define oop_buffer_length 256
+
+class GCLabBitMap: public BitMap {
+private:
+  ConcurrentMark* _cm;
+
+  int       _shifter;
+  size_t    _bitmap_word_covers_words;
+
+  // beginning of the heap
+  HeapWord* _heap_start;
+
+  // this is the actual start of the GCLab
+  HeapWord* _real_start_word;
+
+  // this is the actual end of the GCLab
+  HeapWord* _real_end_word;
+
+  // this is the first word, possibly located before the actual start
+  // of the GCLab, that corresponds to the first bit of the bitmap
+  HeapWord* _start_word;
+
+  // size of a GCLab in words
+  size_t _gclab_word_size;
+
+  static int shifter() {
+    return MinObjAlignment - 1;
+  }
+
+  // how many heap words does a single bitmap word corresponds to?
+  static size_t bitmap_word_covers_words() {
+    return BitsPerWord << shifter();
+  }
+
+  static size_t gclab_word_size() {
+    return ParallelGCG1AllocBufferSize / HeapWordSize;
+  }
+
+  static size_t bitmap_size_in_bits() {
+    size_t bits_in_bitmap = gclab_word_size() >> shifter();
+    // We are going to ensure that the beginning of a word in this
+    // bitmap also corresponds to the beginning of a word in the
+    // global marking bitmap. To handle the case where a GCLab
+    // starts from the middle of the bitmap, we need to add enough
+    // space (i.e. up to a bitmap word) to ensure that we have
+    // enough bits in the bitmap.
+    return bits_in_bitmap + BitsPerWord - 1;
+  }
+public:
+  GCLabBitMap(HeapWord* heap_start)
+    : BitMap(bitmap_size_in_bits()),
+      _cm(G1CollectedHeap::heap()->concurrent_mark()),
+      _shifter(shifter()),
+      _bitmap_word_covers_words(bitmap_word_covers_words()),
+      _heap_start(heap_start),
+      _gclab_word_size(gclab_word_size()),
+      _real_start_word(NULL),
+      _real_end_word(NULL),
+      _start_word(NULL)
+  {
+    guarantee( size_in_words() >= bitmap_size_in_words(),
+               "just making sure");
+  }
+
+  inline unsigned heapWordToOffset(HeapWord* addr) {
+    unsigned offset = (unsigned) pointer_delta(addr, _start_word) >> _shifter;
+    assert(offset < size(), "offset should be within bounds");
+    return offset;
+  }
+
+  inline HeapWord* offsetToHeapWord(size_t offset) {
+    HeapWord* addr =  _start_word + (offset << _shifter);
+    assert(_real_start_word <= addr && addr < _real_end_word, "invariant");
+    return addr;
+  }
+
+  bool fields_well_formed() {
+    bool ret1 = (_real_start_word == NULL) &&
+                (_real_end_word == NULL) &&
+                (_start_word == NULL);
+    if (ret1)
+      return true;
+
+    bool ret2 = _real_start_word >= _start_word &&
+      _start_word < _real_end_word &&
+      (_real_start_word + _gclab_word_size) == _real_end_word &&
+      (_start_word + _gclab_word_size + _bitmap_word_covers_words)
+                                                              > _real_end_word;
+    return ret2;
+  }
+
+  inline bool mark(HeapWord* addr) {
+    guarantee(use_local_bitmaps, "invariant");
+    assert(fields_well_formed(), "invariant");
+
+    if (addr >= _real_start_word && addr < _real_end_word) {
+      assert(!isMarked(addr), "should not have already been marked");
+
+      // first mark it on the bitmap
+      at_put(heapWordToOffset(addr), true);
+
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  inline bool isMarked(HeapWord* addr) {
+    guarantee(use_local_bitmaps, "invariant");
+    assert(fields_well_formed(), "invariant");
+
+    return at(heapWordToOffset(addr));
+  }
+
+  void set_buffer(HeapWord* start) {
+    guarantee(use_local_bitmaps, "invariant");
+    clear();
+
+    assert(start != NULL, "invariant");
+    _real_start_word = start;
+    _real_end_word   = start + _gclab_word_size;
+
+    size_t diff =
+      pointer_delta(start, _heap_start) % _bitmap_word_covers_words;
+    _start_word = start - diff;
+
+    assert(fields_well_formed(), "invariant");
+  }
+
+#ifndef PRODUCT
+  void verify() {
+    // verify that the marks have been propagated
+    GCLabBitMapClosure cl(_cm, this);
+    iterate(&cl);
+  }
+#endif // PRODUCT
+
+  void retire() {
+    guarantee(use_local_bitmaps, "invariant");
+    assert(fields_well_formed(), "invariant");
+
+    if (_start_word != NULL) {
+      CMBitMap*       mark_bitmap = _cm->nextMarkBitMap();
+
+      // this means that the bitmap was set up for the GCLab
+      assert(_real_start_word != NULL && _real_end_word != NULL, "invariant");
+
+      mark_bitmap->mostly_disjoint_range_union(this,
+                                0, // always start from the start of the bitmap
+                                _start_word,
+                                size_in_words());
+      _cm->grayRegionIfNecessary(MemRegion(_real_start_word, _real_end_word));
+
+#ifndef PRODUCT
+      if (use_local_bitmaps && verify_local_bitmaps)
+        verify();
+#endif // PRODUCT
+    } else {
+      assert(_real_start_word == NULL && _real_end_word == NULL, "invariant");
+    }
+  }
+
+  static size_t bitmap_size_in_words() {
+    return (bitmap_size_in_bits() + BitsPerWord - 1) / BitsPerWord;
+  }
+};
+
+#ifndef PRODUCT
+
+bool GCLabBitMapClosure::do_bit(size_t offset) {
+  HeapWord* addr = _bitmap->offsetToHeapWord(offset);
+  guarantee(_cm->isMarked(oop(addr)), "it should be!");
+  return true;
+}
+
+#endif // PRODUCT
+
+class G1ParGCAllocBuffer: public ParGCAllocBuffer {
+private:
+  bool        _retired;
+  bool        _during_marking;
+  GCLabBitMap _bitmap;
+
+public:
+  G1ParGCAllocBuffer() :
+    ParGCAllocBuffer(ParallelGCG1AllocBufferSize / HeapWordSize),
+    _during_marking(G1CollectedHeap::heap()->mark_in_progress()),
+    _bitmap(G1CollectedHeap::heap()->reserved_region().start()),
+    _retired(false)
+  { }
+
+  inline bool mark(HeapWord* addr) {
+    guarantee(use_local_bitmaps, "invariant");
+    assert(_during_marking, "invariant");
+    return _bitmap.mark(addr);
+  }
+
+  inline void set_buf(HeapWord* buf) {
+    if (use_local_bitmaps && _during_marking)
+      _bitmap.set_buffer(buf);
+    ParGCAllocBuffer::set_buf(buf);
+    _retired = false;
+  }
+
+  inline void retire(bool end_of_gc, bool retain) {
+    if (_retired)
+      return;
+    if (use_local_bitmaps && _during_marking) {
+      _bitmap.retire();
+    }
+    ParGCAllocBuffer::retire(end_of_gc, retain);
+    _retired = true;
+  }
+};
+
+
+class G1ParScanThreadState : public StackObj {
+protected:
+  G1CollectedHeap* _g1h;
+  RefToScanQueue*  _refs;
+
+  typedef GrowableArray<oop*> OverflowQueue;
+  OverflowQueue* _overflowed_refs;
+
+  G1ParGCAllocBuffer _alloc_buffers[GCAllocPurposeCount];
+
+  size_t           _alloc_buffer_waste;
+  size_t           _undo_waste;
+
+  OopsInHeapRegionClosure*      _evac_failure_cl;
+  G1ParScanHeapEvacClosure*     _evac_cl;
+  G1ParScanPartialArrayClosure* _partial_scan_cl;
+
+  int _hash_seed;
+  int _queue_num;
+
+  int _term_attempts;
+#if G1_DETAILED_STATS
+  int _pushes, _pops, _steals, _steal_attempts;
+  int _overflow_pushes;
+#endif
+
+  double _start;
+  double _start_strong_roots;
+  double _strong_roots_time;
+  double _start_term;
+  double _term_time;
+
+  // Map from young-age-index (0 == not young, 1 is youngest) to
+  // surviving words. base is what we get back from the malloc call
+  size_t* _surviving_young_words_base;
+  // this points into the array, as we use the first few entries for padding
+  size_t* _surviving_young_words;
+
+#define PADDING_ELEM_NUM (64 / sizeof(size_t))
+
+  void   add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; }
+
+  void   add_to_undo_waste(size_t waste)         { _undo_waste += waste; }
+
+public:
+  G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num)
+    : _g1h(g1h),
+      _refs(g1h->task_queue(queue_num)),
+      _hash_seed(17), _queue_num(queue_num),
+      _term_attempts(0),
+#if G1_DETAILED_STATS
+      _pushes(0), _pops(0), _steals(0),
+      _steal_attempts(0),  _overflow_pushes(0),
+#endif
+      _strong_roots_time(0), _term_time(0),
+      _alloc_buffer_waste(0), _undo_waste(0)
+  {
+    // we allocate G1YoungSurvRateNumRegions plus one entries, since
+    // we "sacrifice" entry 0 to keep track of surviving bytes for
+    // non-young regions (where the age is -1)
+    // We also add a few elements at the beginning and at the end in
+    // an attempt to eliminate cache contention
+    size_t real_length = 1 + _g1h->g1_policy()->young_cset_length();
+    size_t array_length = PADDING_ELEM_NUM +
+                          real_length +
+                          PADDING_ELEM_NUM;
+    _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length);
+    if (_surviving_young_words_base == NULL)
+      vm_exit_out_of_memory(array_length * sizeof(size_t),
+                            "Not enough space for young surv histo.");
+    _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM;
+    memset(_surviving_young_words, 0, real_length * sizeof(size_t));
+
+    _overflowed_refs = new OverflowQueue(10);
+
+    _start = os::elapsedTime();
+  }
+
+  ~G1ParScanThreadState() {
+    FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base);
+  }
+
+  RefToScanQueue*   refs()            { return _refs;             }
+  OverflowQueue*    overflowed_refs() { return _overflowed_refs;  }
+
+  inline G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) {
+    return &_alloc_buffers[purpose];
+  }
+
+  size_t alloc_buffer_waste()                    { return _alloc_buffer_waste; }
+  size_t undo_waste()                            { return _undo_waste; }
+
+  void push_on_queue(oop* ref) {
+    if (!refs()->push(ref)) {
+      overflowed_refs()->push(ref);
+      IF_G1_DETAILED_STATS(note_overflow_push());
+    } else {
+      IF_G1_DETAILED_STATS(note_push());
+    }
+  }
+
+  void pop_from_queue(oop*& ref) {
+    if (!refs()->pop_local(ref)) {
+      ref = NULL;
+    } else {
+      IF_G1_DETAILED_STATS(note_pop());
+    }
+  }
+
+  void pop_from_overflow_queue(oop*& ref) {
+    ref = overflowed_refs()->pop();
+  }
+
+  int refs_to_scan()                             { return refs()->size();                 }
+  int overflowed_refs_to_scan()                  { return overflowed_refs()->length();    }
+
+  HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz) {
+
+    HeapWord* obj = NULL;
+    if (word_sz * 100 <
+        (size_t)(ParallelGCG1AllocBufferSize / HeapWordSize) *
+                                                  ParallelGCBufferWastePct) {
+      G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose);
+      add_to_alloc_buffer_waste(alloc_buf->words_remaining());
+      alloc_buf->retire(false, false);
+
+      HeapWord* buf =
+        _g1h->par_allocate_during_gc(purpose, ParallelGCG1AllocBufferSize / HeapWordSize);
+      if (buf == NULL) return NULL; // Let caller handle allocation failure.
+      // Otherwise.
+      alloc_buf->set_buf(buf);
+
+      obj = alloc_buf->allocate(word_sz);
+      assert(obj != NULL, "buffer was definitely big enough...");
+    }
+    else {
+      obj = _g1h->par_allocate_during_gc(purpose, word_sz);
+    }
+    return obj;
+  }
+
+  HeapWord* allocate(GCAllocPurpose purpose, size_t word_sz) {
+    HeapWord* obj = alloc_buffer(purpose)->allocate(word_sz);
+    if (obj != NULL) return obj;
+    return allocate_slow(purpose, word_sz);
+  }
+
+  void undo_allocation(GCAllocPurpose purpose, HeapWord* obj, size_t word_sz) {
+    if (alloc_buffer(purpose)->contains(obj)) {
+      guarantee(alloc_buffer(purpose)->contains(obj + word_sz - 1),
+                "should contain whole object");
+      alloc_buffer(purpose)->undo_allocation(obj, word_sz);
+    }
+    else {
+      SharedHeap::fill_region_with_object(MemRegion(obj, word_sz));
+      add_to_undo_waste(word_sz);
+    }
+  }
+
+  void set_evac_failure_closure(OopsInHeapRegionClosure* evac_failure_cl) {
+    _evac_failure_cl = evac_failure_cl;
+  }
+  OopsInHeapRegionClosure* evac_failure_closure() {
+    return _evac_failure_cl;
+  }
+
+  void set_evac_closure(G1ParScanHeapEvacClosure* evac_cl) {
+    _evac_cl = evac_cl;
+  }
+
+  void set_partial_scan_closure(G1ParScanPartialArrayClosure* partial_scan_cl) {
+    _partial_scan_cl = partial_scan_cl;
+  }
+
+  int* hash_seed() { return &_hash_seed; }
+  int  queue_num() { return _queue_num; }
+
+  int term_attempts()   { return _term_attempts; }
+  void note_term_attempt()  { _term_attempts++; }
+
+#if G1_DETAILED_STATS
+  int pushes()          { return _pushes; }
+  int pops()            { return _pops; }
+  int steals()          { return _steals; }
+  int steal_attempts()  { return _steal_attempts; }
+  int overflow_pushes() { return _overflow_pushes; }
+
+  void note_push()          { _pushes++; }
+  void note_pop()           { _pops++; }
+  void note_steal()         { _steals++; }
+  void note_steal_attempt() { _steal_attempts++; }
+  void note_overflow_push() { _overflow_pushes++; }
+#endif
+
+  void start_strong_roots() {
+    _start_strong_roots = os::elapsedTime();
+  }
+  void end_strong_roots() {
+    _strong_roots_time += (os::elapsedTime() - _start_strong_roots);
+  }
+  double strong_roots_time() { return _strong_roots_time; }
+
+  void start_term_time() {
+    note_term_attempt();
+    _start_term = os::elapsedTime();
+  }
+  void end_term_time() {
+    _term_time += (os::elapsedTime() - _start_term);
+  }
+  double term_time() { return _term_time; }
+
+  double elapsed() {
+    return os::elapsedTime() - _start;
+  }
+
+  size_t* surviving_young_words() {
+    // We add on to hide entry 0 which accumulates surviving words for
+    // age -1 regions (i.e. non-young ones)
+    return _surviving_young_words;
+  }
+
+  void retire_alloc_buffers() {
+    for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+      size_t waste = _alloc_buffers[ap].words_remaining();
+      add_to_alloc_buffer_waste(waste);
+      _alloc_buffers[ap].retire(true, false);
+    }
+  }
+
+  void trim_queue() {
+    while (refs_to_scan() > 0 || overflowed_refs_to_scan() > 0) {
+      oop *ref_to_scan = NULL;
+      if (overflowed_refs_to_scan() == 0) {
+        pop_from_queue(ref_to_scan);
+      } else {
+        pop_from_overflow_queue(ref_to_scan);
+      }
+      if (ref_to_scan != NULL) {
+        if ((intptr_t)ref_to_scan & G1_PARTIAL_ARRAY_MASK) {
+          _partial_scan_cl->do_oop_nv(ref_to_scan);
+        } else {
+          // Note: we can use "raw" versions of "region_containing" because
+          // "obj_to_scan" is definitely in the heap, and is not in a
+          // humongous region.
+          HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan);
+          _evac_cl->set_region(r);
+          _evac_cl->do_oop_nv(ref_to_scan);
+        }
+      }
+    }
+  }
+};
+
+
+G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+  _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
+  _par_scan_state(par_scan_state) { }
+
+// This closure is applied to the fields of the objects that have just been copied.
+// Should probably be made inline and moved in g1OopClosures.inline.hpp.
+void G1ParScanClosure::do_oop_nv(oop* p) {
+  oop obj = *p;
+  if (obj != NULL) {
+    if (_g1->obj_in_cs(obj)) {
+      if (obj->is_forwarded()) {
+        *p = obj->forwardee();
+      } else {
+        _par_scan_state->push_on_queue(p);
+        return;
+      }
+    }
+    _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
+  }
+}
+
+void G1ParCopyHelper::mark_forwardee(oop* p) {
+  // This is called _after_ do_oop_work has been called, hence after
+  // the object has been relocated to its new location and *p points
+  // to its new location.
+
+  oop thisOop = *p;
+  if (thisOop != NULL) {
+    assert((_g1->evacuation_failed()) || (!_g1->obj_in_cs(thisOop)),
+           "shouldn't still be in the CSet if evacuation didn't fail.");
+    HeapWord* addr = (HeapWord*)thisOop;
+    if (_g1->is_in_g1_reserved(addr))
+      _cm->grayRoot(oop(addr));
+  }
+}
+
+oop G1ParCopyHelper::copy_to_survivor_space(oop old) {
+  size_t    word_sz = old->size();
+  HeapRegion* from_region = _g1->heap_region_containing_raw(old);
+  // +1 to make the -1 indexes valid...
+  int       young_index = from_region->young_index_in_cset()+1;
+  assert( (from_region->is_young() && young_index > 0) ||
+          (!from_region->is_young() && young_index == 0), "invariant" );
+  G1CollectorPolicy* g1p = _g1->g1_policy();
+  markOop m = old->mark();
+  GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, m->age(),
+                                                             word_sz);
+  HeapWord* obj_ptr = _par_scan_state->allocate(alloc_purpose, word_sz);
+  oop       obj     = oop(obj_ptr);
+
+  if (obj_ptr == NULL) {
+    // This will either forward-to-self, or detect that someone else has
+    // installed a forwarding pointer.
+    OopsInHeapRegionClosure* cl = _par_scan_state->evac_failure_closure();
+    return _g1->handle_evacuation_failure_par(cl, old);
+  }
+
+  oop forward_ptr = old->forward_to_atomic(obj);
+  if (forward_ptr == NULL) {
+    Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz);
+    obj->set_mark(m);
+    if (g1p->track_object_age(alloc_purpose)) {
+      obj->incr_age();
+    }
+    // preserve "next" mark bit
+    if (_g1->mark_in_progress() && !_g1->is_obj_ill(old)) {
+      if (!use_local_bitmaps ||
+          !_par_scan_state->alloc_buffer(alloc_purpose)->mark(obj_ptr)) {
+        // if we couldn't mark it on the local bitmap (this happens when
+        // the object was not allocated in the GCLab), we have to bite
+        // the bullet and do the standard parallel mark
+        _cm->markAndGrayObjectIfNecessary(obj);
+      }
+#if 1
+      if (_g1->isMarkedNext(old)) {
+        _cm->nextMarkBitMap()->parClear((HeapWord*)old);
+      }
+#endif
+    }
+
+    size_t* surv_young_words = _par_scan_state->surviving_young_words();
+    surv_young_words[young_index] += word_sz;
+
+    if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
+      arrayOop(old)->set_length(0);
+      _par_scan_state->push_on_queue((oop*) ((intptr_t)old | G1_PARTIAL_ARRAY_MASK));
+    } else {
+      _scanner->set_region(_g1->heap_region_containing(obj));
+      obj->oop_iterate_backwards(_scanner);
+    }
+  } else {
+    _par_scan_state->undo_allocation(alloc_purpose, obj_ptr, word_sz);
+    obj = forward_ptr;
+  }
+  return obj;
+}
+
+template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
+void G1ParCopyClosure<do_gen_barrier, barrier, do_mark_forwardee>::do_oop_work(oop* p) {
+  oop obj = *p;
+  assert(barrier != G1BarrierRS || obj != NULL,
+         "Precondition: G1BarrierRS implies obj is nonNull");
+
+  if (obj != NULL) {
+    if (_g1->obj_in_cs(obj)) {
+#if G1_REM_SET_LOGGING
+      gclog_or_tty->print_cr("Loc "PTR_FORMAT" contains pointer "PTR_FORMAT" into CS.",
+                             p, (void*) obj);
+#endif
+      if (obj->is_forwarded()) {
+        *p = obj->forwardee();
+      } else {
+        *p = copy_to_survivor_space(obj);
+      }
+      // When scanning the RS, we only care about objs in CS.
+      if (barrier == G1BarrierRS) {
+        _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
+      }
+    }
+    // When scanning moved objs, must look at all oops.
+    if (barrier == G1BarrierEvac) {
+      _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
+    }
+
+    if (do_gen_barrier) {
+      par_do_barrier(p);
+    }
+  }
+}
+
+template void G1ParCopyClosure<false, G1BarrierEvac, false>::do_oop_work(oop* p);
+
+template <class T> void G1ParScanPartialArrayClosure::process_array_chunk(
+  oop obj, int start, int end) {
+  // process our set of indices (include header in first chunk)
+  assert(start < end, "invariant");
+  T* const base      = (T*)objArrayOop(obj)->base();
+  T* const start_addr = base + start;
+  T* const end_addr   = base + end;
+  MemRegion mr((HeapWord*)start_addr, (HeapWord*)end_addr);
+  _scanner.set_region(_g1->heap_region_containing(obj));
+  obj->oop_iterate(&_scanner, mr);
+}
+
+void G1ParScanPartialArrayClosure::do_oop_nv(oop* p) {
+  assert(!UseCompressedOops, "Needs to be fixed to work with compressed oops");
+  oop old = oop((intptr_t)p & ~G1_PARTIAL_ARRAY_MASK);
+  assert(old->is_objArray(), "must be obj array");
+  assert(old->is_forwarded(), "must be forwarded");
+  assert(Universe::heap()->is_in_reserved(old), "must be in heap.");
+
+  objArrayOop obj = objArrayOop(old->forwardee());
+  assert((void*)old != (void*)old->forwardee(), "self forwarding here?");
+  // Process ParGCArrayScanChunk elements now
+  // and push the remainder back onto queue
+  int start     = arrayOop(old)->length();
+  int end       = obj->length();
+  int remainder = end - start;
+  assert(start <= end, "just checking");
+  if (remainder > 2 * ParGCArrayScanChunk) {
+    // Test above combines last partial chunk with a full chunk
+    end = start + ParGCArrayScanChunk;
+    arrayOop(old)->set_length(end);
+    // Push remainder.
+    _par_scan_state->push_on_queue((oop*) ((intptr_t) old | G1_PARTIAL_ARRAY_MASK));
+  } else {
+    // Restore length so that the heap remains parsable in
+    // case of evacuation failure.
+    arrayOop(old)->set_length(end);
+  }
+
+  // process our set of indices (include header in first chunk)
+  process_array_chunk<oop>(obj, start, end);
+  oop* start_addr = start == 0 ? (oop*)obj : obj->obj_at_addr<oop>(start);
+  oop* end_addr   = (oop*)(obj->base()) + end; // obj_at_addr(end) asserts end < length
+  MemRegion mr((HeapWord*)start_addr, (HeapWord*)end_addr);
+  _scanner.set_region(_g1->heap_region_containing(obj));
+  obj->oop_iterate(&_scanner, mr);
+}
+
+int G1ScanAndBalanceClosure::_nq = 0;
+
+class G1ParEvacuateFollowersClosure : public VoidClosure {
+protected:
+  G1CollectedHeap*              _g1h;
+  G1ParScanThreadState*         _par_scan_state;
+  RefToScanQueueSet*            _queues;
+  ParallelTaskTerminator*       _terminator;
+
+  G1ParScanThreadState*   par_scan_state() { return _par_scan_state; }
+  RefToScanQueueSet*      queues()         { return _queues; }
+  ParallelTaskTerminator* terminator()     { return _terminator; }
+
+public:
+  G1ParEvacuateFollowersClosure(G1CollectedHeap* g1h,
+                                G1ParScanThreadState* par_scan_state,
+                                RefToScanQueueSet* queues,
+                                ParallelTaskTerminator* terminator)
+    : _g1h(g1h), _par_scan_state(par_scan_state),
+      _queues(queues), _terminator(terminator) {}
+
+  void do_void() {
+    G1ParScanThreadState* pss = par_scan_state();
+    while (true) {
+      oop* ref_to_scan;
+      pss->trim_queue();
+      IF_G1_DETAILED_STATS(pss->note_steal_attempt());
+      if (queues()->steal(pss->queue_num(),
+                          pss->hash_seed(),
+                          ref_to_scan)) {
+        IF_G1_DETAILED_STATS(pss->note_steal());
+        pss->push_on_queue(ref_to_scan);
+        continue;
+      }
+      pss->start_term_time();
+      if (terminator()->offer_termination()) break;
+      pss->end_term_time();
+    }
+    pss->end_term_time();
+    pss->retire_alloc_buffers();
+  }
+};
+
+class G1ParTask : public AbstractGangTask {
+protected:
+  G1CollectedHeap*       _g1h;
+  RefToScanQueueSet      *_queues;
+  ParallelTaskTerminator _terminator;
+
+  Mutex _stats_lock;
+  Mutex* stats_lock() { return &_stats_lock; }
+
+  size_t getNCards() {
+    return (_g1h->capacity() + G1BlockOffsetSharedArray::N_bytes - 1)
+      / G1BlockOffsetSharedArray::N_bytes;
+  }
+
+public:
+  G1ParTask(G1CollectedHeap* g1h, int workers, RefToScanQueueSet *task_queues)
+    : AbstractGangTask("G1 collection"),
+      _g1h(g1h),
+      _queues(task_queues),
+      _terminator(workers, _queues),
+      _stats_lock(Mutex::leaf, "parallel G1 stats lock", true)
+  {}
+
+  RefToScanQueueSet* queues() { return _queues; }
+
+  RefToScanQueue *work_queue(int i) {
+    return queues()->queue(i);
+  }
+
+  void work(int i) {
+    ResourceMark rm;
+    HandleMark   hm;
+
+    G1ParScanThreadState pss(_g1h, i);
+    G1ParScanHeapEvacClosure     scan_evac_cl(_g1h, &pss);
+    G1ParScanHeapEvacClosure     evac_failure_cl(_g1h, &pss);
+    G1ParScanPartialArrayClosure partial_scan_cl(_g1h, &pss);
+
+    pss.set_evac_closure(&scan_evac_cl);
+    pss.set_evac_failure_closure(&evac_failure_cl);
+    pss.set_partial_scan_closure(&partial_scan_cl);
+
+    G1ParScanExtRootClosure         only_scan_root_cl(_g1h, &pss);
+    G1ParScanPermClosure            only_scan_perm_cl(_g1h, &pss);
+    G1ParScanHeapRSClosure          only_scan_heap_rs_cl(_g1h, &pss);
+    G1ParScanAndMarkExtRootClosure  scan_mark_root_cl(_g1h, &pss);
+    G1ParScanAndMarkPermClosure     scan_mark_perm_cl(_g1h, &pss);
+    G1ParScanAndMarkHeapRSClosure   scan_mark_heap_rs_cl(_g1h, &pss);
+
+    OopsInHeapRegionClosure        *scan_root_cl;
+    OopsInHeapRegionClosure        *scan_perm_cl;
+    OopsInHeapRegionClosure        *scan_so_cl;
+
+    if (_g1h->g1_policy()->should_initiate_conc_mark()) {
+      scan_root_cl = &scan_mark_root_cl;
+      scan_perm_cl = &scan_mark_perm_cl;
+      scan_so_cl   = &scan_mark_heap_rs_cl;
+    } else {
+      scan_root_cl = &only_scan_root_cl;
+      scan_perm_cl = &only_scan_perm_cl;
+      scan_so_cl   = &only_scan_heap_rs_cl;
+    }
+
+    pss.start_strong_roots();
+    _g1h->g1_process_strong_roots(/* not collecting perm */ false,
+                                  SharedHeap::SO_AllClasses,
+                                  scan_root_cl,
+                                  &only_scan_heap_rs_cl,
+                                  scan_so_cl,
+                                  scan_perm_cl,
+                                  i);
+    pss.end_strong_roots();
+    {
+      double start = os::elapsedTime();
+      G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator);
+      evac.do_void();
+      double elapsed_ms = (os::elapsedTime()-start)*1000.0;
+      double term_ms = pss.term_time()*1000.0;
+      _g1h->g1_policy()->record_obj_copy_time(i, elapsed_ms-term_ms);
+      _g1h->g1_policy()->record_termination_time(i, term_ms);
+    }
+    _g1h->update_surviving_young_words(pss.surviving_young_words()+1);
+
+    // Clean up any par-expanded rem sets.
+    HeapRegionRemSet::par_cleanup();
+
+    MutexLocker x(stats_lock());
+    if (ParallelGCVerbose) {
+      gclog_or_tty->print("Thread %d complete:\n", i);
+#if G1_DETAILED_STATS
+      gclog_or_tty->print("  Pushes: %7d    Pops: %7d   Overflows: %7d   Steals %7d (in %d attempts)\n",
+                          pss.pushes(),
+                          pss.pops(),
+                          pss.overflow_pushes(),
+                          pss.steals(),
+                          pss.steal_attempts());
+#endif
+      double elapsed      = pss.elapsed();
+      double strong_roots = pss.strong_roots_time();
+      double term         = pss.term_time();
+      gclog_or_tty->print("  Elapsed: %7.2f ms.\n"
+                          "    Strong roots: %7.2f ms (%6.2f%%)\n"
+                          "    Termination:  %7.2f ms (%6.2f%%) (in %d entries)\n",
+                          elapsed * 1000.0,
+                          strong_roots * 1000.0, (strong_roots*100.0/elapsed),
+                          term * 1000.0, (term*100.0/elapsed),
+                          pss.term_attempts());
+      size_t total_waste = pss.alloc_buffer_waste() + pss.undo_waste();
+      gclog_or_tty->print("  Waste: %8dK\n"
+                 "    Alloc Buffer: %8dK\n"
+                 "    Undo: %8dK\n",
+                 (total_waste * HeapWordSize) / K,
+                 (pss.alloc_buffer_waste() * HeapWordSize) / K,
+                 (pss.undo_waste() * HeapWordSize) / K);
+    }
+
+    assert(pss.refs_to_scan() == 0, "Task queue should be empty");
+    assert(pss.overflowed_refs_to_scan() == 0, "Overflow queue should be empty");
+  }
+};
+
+// *** Common G1 Evacuation Stuff
+
+class G1CountClosure: public OopsInHeapRegionClosure {
+public:
+  int n;
+  G1CountClosure() : n(0) {}
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  void do_oop(oop* p) {
+    oop obj = *p;
+    assert(obj != NULL && G1CollectedHeap::heap()->obj_in_cs(obj),
+           "Rem set closure called on non-rem-set pointer.");
+    n++;
+  }
+};
+
+static G1CountClosure count_closure;
+
+void
+G1CollectedHeap::
+g1_process_strong_roots(bool collecting_perm_gen,
+                        SharedHeap::ScanningOption so,
+                        OopClosure* scan_non_heap_roots,
+                        OopsInHeapRegionClosure* scan_rs,
+                        OopsInHeapRegionClosure* scan_so,
+                        OopsInGenClosure* scan_perm,
+                        int worker_i) {
+  // First scan the strong roots, including the perm gen.
+  double ext_roots_start = os::elapsedTime();
+  double closure_app_time_sec = 0.0;
+
+  BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots);
+  BufferingOopsInGenClosure buf_scan_perm(scan_perm);
+  buf_scan_perm.set_generation(perm_gen());
+
+  process_strong_roots(collecting_perm_gen, so,
+                       &buf_scan_non_heap_roots,
+                       &buf_scan_perm);
+  // Finish up any enqueued closure apps.
+  buf_scan_non_heap_roots.done();
+  buf_scan_perm.done();
+  double ext_roots_end = os::elapsedTime();
+  g1_policy()->reset_obj_copy_time(worker_i);
+  double obj_copy_time_sec =
+    buf_scan_non_heap_roots.closure_app_seconds() +
+    buf_scan_perm.closure_app_seconds();
+  g1_policy()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0);
+  double ext_root_time_ms =
+    ((ext_roots_end - ext_roots_start) - obj_copy_time_sec) * 1000.0;
+  g1_policy()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
+
+  // Scan strong roots in mark stack.
+  if (!_process_strong_tasks->is_task_claimed(G1H_PS_mark_stack_oops_do)) {
+    concurrent_mark()->oops_do(scan_non_heap_roots);
+  }
+  double mark_stack_scan_ms = (os::elapsedTime() - ext_roots_end) * 1000.0;
+  g1_policy()->record_mark_stack_scan_time(worker_i, mark_stack_scan_ms);
+
+  // XXX What should this be doing in the parallel case?
+  g1_policy()->record_collection_pause_end_CH_strong_roots();
+  if (G1VerifyRemSet) {
+    // :::: FIXME ::::
+    // The stupid remembered set doesn't know how to filter out dead
+    // objects, which the smart one does, and so when it is created
+    // and then compared the number of entries in each differs and
+    // the verification code fails.
+    guarantee(false, "verification code is broken, see note");
+
+    // Let's make sure that the current rem set agrees with the stupidest
+    // one possible!
+    bool refs_enabled = ref_processor()->discovery_enabled();
+    if (refs_enabled) ref_processor()->disable_discovery();
+    StupidG1RemSet stupid(this);
+    count_closure.n = 0;
+    stupid.oops_into_collection_set_do(&count_closure, worker_i);
+    int stupid_n = count_closure.n;
+    count_closure.n = 0;
+    g1_rem_set()->oops_into_collection_set_do(&count_closure, worker_i);
+    guarantee(count_closure.n == stupid_n, "Old and new rem sets differ.");
+    gclog_or_tty->print_cr("\nFound %d pointers in heap RS.", count_closure.n);
+    if (refs_enabled) ref_processor()->enable_discovery();
+  }
+  if (scan_so != NULL) {
+    scan_scan_only_set(scan_so, worker_i);
+  }
+  // Now scan the complement of the collection set.
+  if (scan_rs != NULL) {
+    g1_rem_set()->oops_into_collection_set_do(scan_rs, worker_i);
+  }
+  // Finish with the ref_processor roots.
+  if (!_process_strong_tasks->is_task_claimed(G1H_PS_refProcessor_oops_do)) {
+    ref_processor()->oops_do(scan_non_heap_roots);
+  }
+  g1_policy()->record_collection_pause_end_G1_strong_roots();
+  _process_strong_tasks->all_tasks_completed();
+}
+
+void
+G1CollectedHeap::scan_scan_only_region(HeapRegion* r,
+                                       OopsInHeapRegionClosure* oc,
+                                       int worker_i) {
+  HeapWord* startAddr = r->bottom();
+  HeapWord* endAddr = r->used_region().end();
+
+  oc->set_region(r);
+
+  HeapWord* p = r->bottom();
+  HeapWord* t = r->top();
+  guarantee( p == r->next_top_at_mark_start(), "invariant" );
+  while (p < t) {
+    oop obj = oop(p);
+    p += obj->oop_iterate(oc);
+  }
+}
+
+void
+G1CollectedHeap::scan_scan_only_set(OopsInHeapRegionClosure* oc,
+                                    int worker_i) {
+  double start = os::elapsedTime();
+
+  BufferingOopsInHeapRegionClosure boc(oc);
+
+  FilterInHeapRegionAndIntoCSClosure scan_only(this, &boc);
+  FilterAndMarkInHeapRegionAndIntoCSClosure scan_and_mark(this, &boc, concurrent_mark());
+
+  OopsInHeapRegionClosure *foc;
+  if (g1_policy()->should_initiate_conc_mark())
+    foc = &scan_and_mark;
+  else
+    foc = &scan_only;
+
+  HeapRegion* hr;
+  int n = 0;
+  while ((hr = _young_list->par_get_next_scan_only_region()) != NULL) {
+    scan_scan_only_region(hr, foc, worker_i);
+    ++n;
+  }
+  boc.done();
+
+  double closure_app_s = boc.closure_app_seconds();
+  g1_policy()->record_obj_copy_time(worker_i, closure_app_s * 1000.0);
+  double ms = (os::elapsedTime() - start - closure_app_s)*1000.0;
+  g1_policy()->record_scan_only_time(worker_i, ms, n);
+}
+
+void
+G1CollectedHeap::g1_process_weak_roots(OopClosure* root_closure,
+                                       OopClosure* non_root_closure) {
+  SharedHeap::process_weak_roots(root_closure, non_root_closure);
+}
+
+
+class SaveMarksClosure: public HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion* r) {
+    r->save_marks();
+    return false;
+  }
+};
+
+void G1CollectedHeap::save_marks() {
+  if (ParallelGCThreads == 0) {
+    SaveMarksClosure sm;
+    heap_region_iterate(&sm);
+  }
+  // We do this even in the parallel case
+  perm_gen()->save_marks();
+}
+
+void G1CollectedHeap::evacuate_collection_set() {
+  set_evacuation_failed(false);
+
+  g1_rem_set()->prepare_for_oops_into_collection_set_do();
+  concurrent_g1_refine()->set_use_cache(false);
+  int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1);
+
+  set_par_threads(n_workers);
+  G1ParTask g1_par_task(this, n_workers, _task_queues);
+
+  init_for_evac_failure(NULL);
+
+  change_strong_roots_parity();  // In preparation for parallel strong roots.
+  rem_set()->prepare_for_younger_refs_iterate(true);
+  double start_par = os::elapsedTime();
+
+  if (ParallelGCThreads > 0) {
+    // The individual threads will set their evac-failure closures.
+    workers()->run_task(&g1_par_task);
+  } else {
+    g1_par_task.work(0);
+  }
+
+  double par_time = (os::elapsedTime() - start_par) * 1000.0;
+  g1_policy()->record_par_time(par_time);
+  set_par_threads(0);
+  // Is this the right thing to do here?  We don't save marks
+  // on individual heap regions when we allocate from
+  // them in parallel, so this seems like the correct place for this.
+  all_alloc_regions_note_end_of_copying();
+  {
+    G1IsAliveClosure is_alive(this);
+    G1KeepAliveClosure keep_alive(this);
+    JNIHandles::weak_oops_do(&is_alive, &keep_alive);
+  }
+
+  g1_rem_set()->cleanup_after_oops_into_collection_set_do();
+  concurrent_g1_refine()->set_use_cache(true);
+
+  finalize_for_evac_failure();
+
+  // Must do this before removing self-forwarding pointers, which clears
+  // the per-region evac-failure flags.
+  concurrent_mark()->complete_marking_in_collection_set();
+
+  if (evacuation_failed()) {
+    remove_self_forwarding_pointers();
+
+    if (PrintGCDetails) {
+      gclog_or_tty->print(" (evacuation failed)");
+    } else if (PrintGC) {
+      gclog_or_tty->print("--");
+    }
+  }
+
+  COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
+}
+
+void G1CollectedHeap::free_region(HeapRegion* hr) {
+  size_t pre_used = 0;
+  size_t cleared_h_regions = 0;
+  size_t freed_regions = 0;
+  UncleanRegionList local_list;
+
+  HeapWord* start = hr->bottom();
+  HeapWord* end   = hr->prev_top_at_mark_start();
+  size_t used_bytes = hr->used();
+  size_t live_bytes = hr->max_live_bytes();
+  if (used_bytes > 0) {
+    guarantee( live_bytes <= used_bytes, "invariant" );
+  } else {
+    guarantee( live_bytes == 0, "invariant" );
+  }
+
+  size_t garbage_bytes = used_bytes - live_bytes;
+  if (garbage_bytes > 0)
+    g1_policy()->decrease_known_garbage_bytes(garbage_bytes);
+
+  free_region_work(hr, pre_used, cleared_h_regions, freed_regions,
+                   &local_list);
+  finish_free_region_work(pre_used, cleared_h_regions, freed_regions,
+                          &local_list);
+}
+
+void
+G1CollectedHeap::free_region_work(HeapRegion* hr,
+                                  size_t& pre_used,
+                                  size_t& cleared_h_regions,
+                                  size_t& freed_regions,
+                                  UncleanRegionList* list,
+                                  bool par) {
+  assert(!hr->popular(), "should not free popular regions");
+  pre_used += hr->used();
+  if (hr->isHumongous()) {
+    assert(hr->startsHumongous(),
+           "Only the start of a humongous region should be freed.");
+    int ind = _hrs->find(hr);
+    assert(ind != -1, "Should have an index.");
+    // Clear the start region.
+    hr->hr_clear(par, true /*clear_space*/);
+    list->insert_before_head(hr);
+    cleared_h_regions++;
+    freed_regions++;
+    // Clear any continued regions.
+    ind++;
+    while ((size_t)ind < n_regions()) {
+      HeapRegion* hrc = _hrs->at(ind);
+      if (!hrc->continuesHumongous()) break;
+      // Otherwise, does continue the H region.
+      assert(hrc->humongous_start_region() == hr, "Huh?");
+      hrc->hr_clear(par, true /*clear_space*/);
+      cleared_h_regions++;
+      freed_regions++;
+      list->insert_before_head(hrc);
+      ind++;
+    }
+  } else {
+    hr->hr_clear(par, true /*clear_space*/);
+    list->insert_before_head(hr);
+    freed_regions++;
+    // If we're using clear2, this should not be enabled.
+    // assert(!hr->in_cohort(), "Can't be both free and in a cohort.");
+  }
+}
+
+void G1CollectedHeap::finish_free_region_work(size_t pre_used,
+                                              size_t cleared_h_regions,
+                                              size_t freed_regions,
+                                              UncleanRegionList* list) {
+  if (list != NULL && list->sz() > 0) {
+    prepend_region_list_on_unclean_list(list);
+  }
+  // Acquire a lock, if we're parallel, to update possibly-shared
+  // variables.
+  Mutex* lock = (n_par_threads() > 0) ? ParGCRareEvent_lock : NULL;
+  {
+    MutexLockerEx x(lock, Mutex::_no_safepoint_check_flag);
+    _summary_bytes_used -= pre_used;
+    _num_humongous_regions -= (int) cleared_h_regions;
+    _free_regions += freed_regions;
+  }
+}
+
+
+void G1CollectedHeap::dirtyCardsForYoungRegions(CardTableModRefBS* ct_bs, HeapRegion* list) {
+  while (list != NULL) {
+    guarantee( list->is_young(), "invariant" );
+
+    HeapWord* bottom = list->bottom();
+    HeapWord* end = list->end();
+    MemRegion mr(bottom, end);
+    ct_bs->dirty(mr);
+
+    list = list->get_next_young_region();
+  }
+}
+
+void G1CollectedHeap::cleanUpCardTable() {
+  CardTableModRefBS* ct_bs = (CardTableModRefBS*) (barrier_set());
+  double start = os::elapsedTime();
+
+  ct_bs->clear(_g1_committed);
+
+  // now, redirty the cards of the scan-only and survivor regions
+  // (it seemed faster to do it this way, instead of iterating over
+  // all regions and then clearing / dirtying as approprite)
+  dirtyCardsForYoungRegions(ct_bs, _young_list->first_scan_only_region());
+  dirtyCardsForYoungRegions(ct_bs, _young_list->first_survivor_region());
+
+  double elapsed = os::elapsedTime() - start;
+  g1_policy()->record_clear_ct_time( elapsed * 1000.0);
+}
+
+
+void G1CollectedHeap::do_collection_pause_if_appropriate(size_t word_size) {
+  // First do any popular regions.
+  HeapRegion* hr;
+  while ((hr = popular_region_to_evac()) != NULL) {
+    evac_popular_region(hr);
+  }
+  // Now do heuristic pauses.
+  if (g1_policy()->should_do_collection_pause(word_size)) {
+    do_collection_pause();
+  }
+}
+
+void G1CollectedHeap::free_collection_set(HeapRegion* cs_head) {
+  double young_time_ms     = 0.0;
+  double non_young_time_ms = 0.0;
+
+  G1CollectorPolicy* policy = g1_policy();
+
+  double start_sec = os::elapsedTime();
+  bool non_young = true;
+
+  HeapRegion* cur = cs_head;
+  int age_bound = -1;
+  size_t rs_lengths = 0;
+
+  while (cur != NULL) {
+    if (non_young) {
+      if (cur->is_young()) {
+        double end_sec = os::elapsedTime();
+        double elapsed_ms = (end_sec - start_sec) * 1000.0;
+        non_young_time_ms += elapsed_ms;
+
+        start_sec = os::elapsedTime();
+        non_young = false;
+      }
+    } else {
+      if (!cur->is_on_free_list()) {
+        double end_sec = os::elapsedTime();
+        double elapsed_ms = (end_sec - start_sec) * 1000.0;
+        young_time_ms += elapsed_ms;
+
+        start_sec = os::elapsedTime();
+        non_young = true;
+      }
+    }
+
+    rs_lengths += cur->rem_set()->occupied();
+
+    HeapRegion* next = cur->next_in_collection_set();
+    assert(cur->in_collection_set(), "bad CS");
+    cur->set_next_in_collection_set(NULL);
+    cur->set_in_collection_set(false);
+
+    if (cur->is_young()) {
+      int index = cur->young_index_in_cset();
+      guarantee( index != -1, "invariant" );
+      guarantee( (size_t)index < policy->young_cset_length(), "invariant" );
+      size_t words_survived = _surviving_young_words[index];
+      cur->record_surv_words_in_group(words_survived);
+    } else {
+      int index = cur->young_index_in_cset();
+      guarantee( index == -1, "invariant" );
+    }
+
+    assert( (cur->is_young() && cur->young_index_in_cset() > -1) ||
+            (!cur->is_young() && cur->young_index_in_cset() == -1),
+            "invariant" );
+
+    if (!cur->evacuation_failed()) {
+      // And the region is empty.
+      assert(!cur->is_empty(),
+             "Should not have empty regions in a CS.");
+      free_region(cur);
+    } else {
+      guarantee( !cur->is_scan_only(), "should not be scan only" );
+      cur->uninstall_surv_rate_group();
+      if (cur->is_young())
+        cur->set_young_index_in_cset(-1);
+      cur->set_not_young();
+      cur->set_evacuation_failed(false);
+    }
+    cur = next;
+  }
+
+  policy->record_max_rs_lengths(rs_lengths);
+  policy->cset_regions_freed();
+
+  double end_sec = os::elapsedTime();
+  double elapsed_ms = (end_sec - start_sec) * 1000.0;
+  if (non_young)
+    non_young_time_ms += elapsed_ms;
+  else
+    young_time_ms += elapsed_ms;
+
+  policy->record_young_free_cset_time_ms(young_time_ms);
+  policy->record_non_young_free_cset_time_ms(non_young_time_ms);
+}
+
+HeapRegion*
+G1CollectedHeap::alloc_region_from_unclean_list_locked(bool zero_filled) {
+  assert(ZF_mon->owned_by_self(), "Precondition");
+  HeapRegion* res = pop_unclean_region_list_locked();
+  if (res != NULL) {
+    assert(!res->continuesHumongous() &&
+           res->zero_fill_state() != HeapRegion::Allocated,
+           "Only free regions on unclean list.");
+    if (zero_filled) {
+      res->ensure_zero_filled_locked();
+      res->set_zero_fill_allocated();
+    }
+  }
+  return res;
+}
+
+HeapRegion* G1CollectedHeap::alloc_region_from_unclean_list(bool zero_filled) {
+  MutexLockerEx zx(ZF_mon, Mutex::_no_safepoint_check_flag);
+  return alloc_region_from_unclean_list_locked(zero_filled);
+}
+
+void G1CollectedHeap::put_region_on_unclean_list(HeapRegion* r) {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  put_region_on_unclean_list_locked(r);
+  if (should_zf()) ZF_mon->notify_all(); // Wake up ZF thread.
+}
+
+void G1CollectedHeap::set_unclean_regions_coming(bool b) {
+  MutexLockerEx x(Cleanup_mon);
+  set_unclean_regions_coming_locked(b);
+}
+
+void G1CollectedHeap::set_unclean_regions_coming_locked(bool b) {
+  assert(Cleanup_mon->owned_by_self(), "Precondition");
+  _unclean_regions_coming = b;
+  // Wake up mutator threads that might be waiting for completeCleanup to
+  // finish.
+  if (!b) Cleanup_mon->notify_all();
+}
+
+void G1CollectedHeap::wait_for_cleanup_complete() {
+  MutexLockerEx x(Cleanup_mon);
+  wait_for_cleanup_complete_locked();
+}
+
+void G1CollectedHeap::wait_for_cleanup_complete_locked() {
+  assert(Cleanup_mon->owned_by_self(), "precondition");
+  while (_unclean_regions_coming) {
+    Cleanup_mon->wait();
+  }
+}
+
+void
+G1CollectedHeap::put_region_on_unclean_list_locked(HeapRegion* r) {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  _unclean_region_list.insert_before_head(r);
+}
+
+void
+G1CollectedHeap::prepend_region_list_on_unclean_list(UncleanRegionList* list) {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  prepend_region_list_on_unclean_list_locked(list);
+  if (should_zf()) ZF_mon->notify_all(); // Wake up ZF thread.
+}
+
+void
+G1CollectedHeap::
+prepend_region_list_on_unclean_list_locked(UncleanRegionList* list) {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  _unclean_region_list.prepend_list(list);
+}
+
+HeapRegion* G1CollectedHeap::pop_unclean_region_list_locked() {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  HeapRegion* res = _unclean_region_list.pop();
+  if (res != NULL) {
+    // Inform ZF thread that there's a new unclean head.
+    if (_unclean_region_list.hd() != NULL && should_zf())
+      ZF_mon->notify_all();
+  }
+  return res;
+}
+
+HeapRegion* G1CollectedHeap::peek_unclean_region_list_locked() {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  return _unclean_region_list.hd();
+}
+
+
+bool G1CollectedHeap::move_cleaned_region_to_free_list_locked() {
+  assert(ZF_mon->owned_by_self(), "Precondition");
+  HeapRegion* r = peek_unclean_region_list_locked();
+  if (r != NULL && r->zero_fill_state() == HeapRegion::ZeroFilled) {
+    // Result of below must be equal to "r", since we hold the lock.
+    (void)pop_unclean_region_list_locked();
+    put_free_region_on_list_locked(r);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool G1CollectedHeap::move_cleaned_region_to_free_list() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  return move_cleaned_region_to_free_list_locked();
+}
+
+
+void G1CollectedHeap::put_free_region_on_list_locked(HeapRegion* r) {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  assert(_free_region_list_size == free_region_list_length(), "Inv");
+  assert(r->zero_fill_state() == HeapRegion::ZeroFilled,
+        "Regions on free list must be zero filled");
+  assert(!r->isHumongous(), "Must not be humongous.");
+  assert(r->is_empty(), "Better be empty");
+  assert(!r->is_on_free_list(),
+         "Better not already be on free list");
+  assert(!r->is_on_unclean_list(),
+         "Better not already be on unclean list");
+  r->set_on_free_list(true);
+  r->set_next_on_free_list(_free_region_list);
+  _free_region_list = r;
+  _free_region_list_size++;
+  assert(_free_region_list_size == free_region_list_length(), "Inv");
+}
+
+void G1CollectedHeap::put_free_region_on_list(HeapRegion* r) {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  put_free_region_on_list_locked(r);
+}
+
+HeapRegion* G1CollectedHeap::pop_free_region_list_locked() {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  assert(_free_region_list_size == free_region_list_length(), "Inv");
+  HeapRegion* res = _free_region_list;
+  if (res != NULL) {
+    _free_region_list = res->next_from_free_list();
+    _free_region_list_size--;
+    res->set_on_free_list(false);
+    res->set_next_on_free_list(NULL);
+    assert(_free_region_list_size == free_region_list_length(), "Inv");
+  }
+  return res;
+}
+
+
+HeapRegion* G1CollectedHeap::alloc_free_region_from_lists(bool zero_filled) {
+  // By self, or on behalf of self.
+  assert(Heap_lock->is_locked(), "Precondition");
+  HeapRegion* res = NULL;
+  bool first = true;
+  while (res == NULL) {
+    if (zero_filled || !first) {
+      MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+      res = pop_free_region_list_locked();
+      if (res != NULL) {
+        assert(!res->zero_fill_is_allocated(),
+               "No allocated regions on free list.");
+        res->set_zero_fill_allocated();
+      } else if (!first) {
+        break;  // We tried both, time to return NULL.
+      }
+    }
+
+    if (res == NULL) {
+      res = alloc_region_from_unclean_list(zero_filled);
+    }
+    assert(res == NULL ||
+           !zero_filled ||
+           res->zero_fill_is_allocated(),
+           "We must have allocated the region we're returning");
+    first = false;
+  }
+  return res;
+}
+
+void G1CollectedHeap::remove_allocated_regions_from_lists() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  {
+    HeapRegion* prev = NULL;
+    HeapRegion* cur = _unclean_region_list.hd();
+    while (cur != NULL) {
+      HeapRegion* next = cur->next_from_unclean_list();
+      if (cur->zero_fill_is_allocated()) {
+        // Remove from the list.
+        if (prev == NULL) {
+          (void)_unclean_region_list.pop();
+        } else {
+          _unclean_region_list.delete_after(prev);
+        }
+        cur->set_on_unclean_list(false);
+        cur->set_next_on_unclean_list(NULL);
+      } else {
+        prev = cur;
+      }
+      cur = next;
+    }
+    assert(_unclean_region_list.sz() == unclean_region_list_length(),
+           "Inv");
+  }
+
+  {
+    HeapRegion* prev = NULL;
+    HeapRegion* cur = _free_region_list;
+    while (cur != NULL) {
+      HeapRegion* next = cur->next_from_free_list();
+      if (cur->zero_fill_is_allocated()) {
+        // Remove from the list.
+        if (prev == NULL) {
+          _free_region_list = cur->next_from_free_list();
+        } else {
+          prev->set_next_on_free_list(cur->next_from_free_list());
+        }
+        cur->set_on_free_list(false);
+        cur->set_next_on_free_list(NULL);
+        _free_region_list_size--;
+      } else {
+        prev = cur;
+      }
+      cur = next;
+    }
+    assert(_free_region_list_size == free_region_list_length(), "Inv");
+  }
+}
+
+bool G1CollectedHeap::verify_region_lists() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  return verify_region_lists_locked();
+}
+
+bool G1CollectedHeap::verify_region_lists_locked() {
+  HeapRegion* unclean = _unclean_region_list.hd();
+  while (unclean != NULL) {
+    guarantee(unclean->is_on_unclean_list(), "Well, it is!");
+    guarantee(!unclean->is_on_free_list(), "Well, it shouldn't be!");
+    guarantee(unclean->zero_fill_state() != HeapRegion::Allocated,
+              "Everything else is possible.");
+    unclean = unclean->next_from_unclean_list();
+  }
+  guarantee(_unclean_region_list.sz() == unclean_region_list_length(), "Inv");
+
+  HeapRegion* free_r = _free_region_list;
+  while (free_r != NULL) {
+    assert(free_r->is_on_free_list(), "Well, it is!");
+    assert(!free_r->is_on_unclean_list(), "Well, it shouldn't be!");
+    switch (free_r->zero_fill_state()) {
+    case HeapRegion::NotZeroFilled:
+    case HeapRegion::ZeroFilling:
+      guarantee(false, "Should not be on free list.");
+      break;
+    default:
+      // Everything else is possible.
+      break;
+    }
+    free_r = free_r->next_from_free_list();
+  }
+  guarantee(_free_region_list_size == free_region_list_length(), "Inv");
+  // If we didn't do an assertion...
+  return true;
+}
+
+size_t G1CollectedHeap::free_region_list_length() {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  size_t len = 0;
+  HeapRegion* cur = _free_region_list;
+  while (cur != NULL) {
+    len++;
+    cur = cur->next_from_free_list();
+  }
+  return len;
+}
+
+size_t G1CollectedHeap::unclean_region_list_length() {
+  assert(ZF_mon->owned_by_self(), "precondition.");
+  return _unclean_region_list.length();
+}
+
+size_t G1CollectedHeap::n_regions() {
+  return _hrs->length();
+}
+
+size_t G1CollectedHeap::max_regions() {
+  return
+    (size_t)align_size_up(g1_reserved_obj_bytes(), HeapRegion::GrainBytes) /
+    HeapRegion::GrainBytes;
+}
+
+size_t G1CollectedHeap::free_regions() {
+  /* Possibly-expensive assert.
+  assert(_free_regions == count_free_regions(),
+         "_free_regions is off.");
+  */
+  return _free_regions;
+}
+
+bool G1CollectedHeap::should_zf() {
+  return _free_region_list_size < (size_t) G1ConcZFMaxRegions;
+}
+
+class RegionCounter: public HeapRegionClosure {
+  size_t _n;
+public:
+  RegionCounter() : _n(0) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->is_empty() && !r->popular()) {
+      assert(!r->isHumongous(), "H regions should not be empty.");
+      _n++;
+    }
+    return false;
+  }
+  int res() { return (int) _n; }
+};
+
+size_t G1CollectedHeap::count_free_regions() {
+  RegionCounter rc;
+  heap_region_iterate(&rc);
+  size_t n = rc.res();
+  if (_cur_alloc_region != NULL && _cur_alloc_region->is_empty())
+    n--;
+  return n;
+}
+
+size_t G1CollectedHeap::count_free_regions_list() {
+  size_t n = 0;
+  size_t o = 0;
+  ZF_mon->lock_without_safepoint_check();
+  HeapRegion* cur = _free_region_list;
+  while (cur != NULL) {
+    cur = cur->next_from_free_list();
+    n++;
+  }
+  size_t m = unclean_region_list_length();
+  ZF_mon->unlock();
+  return n + m;
+}
+
+bool G1CollectedHeap::should_set_young_locked() {
+  assert(heap_lock_held_for_gc(),
+              "the heap lock should already be held by or for this thread");
+  return  (g1_policy()->in_young_gc_mode() &&
+           g1_policy()->should_add_next_region_to_young_list());
+}
+
+void G1CollectedHeap::set_region_short_lived_locked(HeapRegion* hr) {
+  assert(heap_lock_held_for_gc(),
+              "the heap lock should already be held by or for this thread");
+  _young_list->push_region(hr);
+  g1_policy()->set_region_short_lived(hr);
+}
+
+class NoYoungRegionsClosure: public HeapRegionClosure {
+private:
+  bool _success;
+public:
+  NoYoungRegionsClosure() : _success(true) { }
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->is_young()) {
+      gclog_or_tty->print_cr("Region ["PTR_FORMAT", "PTR_FORMAT") tagged as young",
+                             r->bottom(), r->end());
+      _success = false;
+    }
+    return false;
+  }
+  bool success() { return _success; }
+};
+
+bool G1CollectedHeap::check_young_list_empty(bool ignore_scan_only_list,
+                                             bool check_sample) {
+  bool ret = true;
+
+  ret = _young_list->check_list_empty(ignore_scan_only_list, check_sample);
+  if (!ignore_scan_only_list) {
+    NoYoungRegionsClosure closure;
+    heap_region_iterate(&closure);
+    ret = ret && closure.success();
+  }
+
+  return ret;
+}
+
+void G1CollectedHeap::empty_young_list() {
+  assert(heap_lock_held_for_gc(),
+              "the heap lock should already be held by or for this thread");
+  assert(g1_policy()->in_young_gc_mode(), "should be in young GC mode");
+
+  _young_list->empty_list();
+}
+
+bool G1CollectedHeap::all_alloc_regions_no_allocs_since_save_marks() {
+  bool no_allocs = true;
+  for (int ap = 0; ap < GCAllocPurposeCount && no_allocs; ++ap) {
+    HeapRegion* r = _gc_alloc_regions[ap];
+    no_allocs = r == NULL || r->saved_mark_at_top();
+  }
+  return no_allocs;
+}
+
+void G1CollectedHeap::all_alloc_regions_note_end_of_copying() {
+  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+    HeapRegion* r = _gc_alloc_regions[ap];
+    if (r != NULL) {
+      // Check for aliases.
+      bool has_processed_alias = false;
+      for (int i = 0; i < ap; ++i) {
+        if (_gc_alloc_regions[i] == r) {
+          has_processed_alias = true;
+          break;
+        }
+      }
+      if (!has_processed_alias) {
+        r->note_end_of_copying();
+        g1_policy()->record_after_bytes(r->used());
+      }
+    }
+  }
+}
+
+
+// Done at the start of full GC.
+void G1CollectedHeap::tear_down_region_lists() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  while (pop_unclean_region_list_locked() != NULL) ;
+  assert(_unclean_region_list.hd() == NULL && _unclean_region_list.sz() == 0,
+         "Postconditions of loop.")
+  while (pop_free_region_list_locked() != NULL) ;
+  assert(_free_region_list == NULL, "Postcondition of loop.");
+  if (_free_region_list_size != 0) {
+    gclog_or_tty->print_cr("Size is %d.", _free_region_list_size);
+    print();
+  }
+  assert(_free_region_list_size == 0, "Postconditions of loop.");
+}
+
+
+class RegionResetter: public HeapRegionClosure {
+  G1CollectedHeap* _g1;
+  int _n;
+public:
+  RegionResetter() : _g1(G1CollectedHeap::heap()), _n(0) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->continuesHumongous()) return false;
+    if (r->top() > r->bottom()) {
+      if (r->top() < r->end()) {
+        Copy::fill_to_words(r->top(),
+                          pointer_delta(r->end(), r->top()));
+      }
+      r->set_zero_fill_allocated();
+    } else {
+      assert(r->is_empty(), "tautology");
+      if (r->popular()) {
+        if (r->zero_fill_state() != HeapRegion::Allocated) {
+          r->ensure_zero_filled_locked();
+          r->set_zero_fill_allocated();
+        }
+      } else {
+        _n++;
+        switch (r->zero_fill_state()) {
+        case HeapRegion::NotZeroFilled:
+        case HeapRegion::ZeroFilling:
+          _g1->put_region_on_unclean_list_locked(r);
+          break;
+        case HeapRegion::Allocated:
+          r->set_zero_fill_complete();
+          // no break; go on to put on free list.
+        case HeapRegion::ZeroFilled:
+          _g1->put_free_region_on_list_locked(r);
+          break;
+        }
+      }
+    }
+    return false;
+  }
+
+  int getFreeRegionCount() {return _n;}
+};
+
+// Done at the end of full GC.
+void G1CollectedHeap::rebuild_region_lists() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  // This needs to go at the end of the full GC.
+  RegionResetter rs;
+  heap_region_iterate(&rs);
+  _free_regions = rs.getFreeRegionCount();
+  // Tell the ZF thread it may have work to do.
+  if (should_zf()) ZF_mon->notify_all();
+}
+
+class UsedRegionsNeedZeroFillSetter: public HeapRegionClosure {
+  G1CollectedHeap* _g1;
+  int _n;
+public:
+  UsedRegionsNeedZeroFillSetter() : _g1(G1CollectedHeap::heap()), _n(0) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->continuesHumongous()) return false;
+    if (r->top() > r->bottom()) {
+      // There are assertions in "set_zero_fill_needed()" below that
+      // require top() == bottom(), so this is technically illegal.
+      // We'll skirt the law here, by making that true temporarily.
+      DEBUG_ONLY(HeapWord* save_top = r->top();
+                 r->set_top(r->bottom()));
+      r->set_zero_fill_needed();
+      DEBUG_ONLY(r->set_top(save_top));
+    }
+    return false;
+  }
+};
+
+// Done at the start of full GC.
+void G1CollectedHeap::set_used_regions_to_need_zero_fill() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  // This needs to go at the end of the full GC.
+  UsedRegionsNeedZeroFillSetter rs;
+  heap_region_iterate(&rs);
+}
+
+class CountObjClosure: public ObjectClosure {
+  size_t _n;
+public:
+  CountObjClosure() : _n(0) {}
+  void do_object(oop obj) { _n++; }
+  size_t n() { return _n; }
+};
+
+size_t G1CollectedHeap::pop_object_used_objs() {
+  size_t sum_objs = 0;
+  for (int i = 0; i < G1NumPopularRegions; i++) {
+    CountObjClosure cl;
+    _hrs->at(i)->object_iterate(&cl);
+    sum_objs += cl.n();
+  }
+  return sum_objs;
+}
+
+size_t G1CollectedHeap::pop_object_used_bytes() {
+  size_t sum_bytes = 0;
+  for (int i = 0; i < G1NumPopularRegions; i++) {
+    sum_bytes += _hrs->at(i)->used();
+  }
+  return sum_bytes;
+}
+
+
+static int nq = 0;
+
+HeapWord* G1CollectedHeap::allocate_popular_object(size_t word_size) {
+  while (_cur_pop_hr_index < G1NumPopularRegions) {
+    HeapRegion* cur_pop_region = _hrs->at(_cur_pop_hr_index);
+    HeapWord* res = cur_pop_region->allocate(word_size);
+    if (res != NULL) {
+      // We account for popular objs directly in the used summary:
+      _summary_bytes_used += (word_size * HeapWordSize);
+      return res;
+    }
+    // Otherwise, try the next region (first making sure that we remember
+    // the last "top" value as the "next_top_at_mark_start", so that
+    // objects made popular during markings aren't automatically considered
+    // live).
+    cur_pop_region->note_end_of_copying();
+    // Otherwise, try the next region.
+    _cur_pop_hr_index++;
+  }
+  // XXX: For now !!!
+  vm_exit_out_of_memory(word_size,
+                        "Not enough pop obj space (To Be Fixed)");
+  return NULL;
+}
+
+class HeapRegionList: public CHeapObj {
+  public:
+  HeapRegion* hr;
+  HeapRegionList* next;
+};
+
+void G1CollectedHeap::schedule_popular_region_evac(HeapRegion* r) {
+  // This might happen during parallel GC, so protect by this lock.
+  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+  // We don't schedule regions whose evacuations are already pending, or
+  // are already being evacuated.
+  if (!r->popular_pending() && !r->in_collection_set()) {
+    r->set_popular_pending(true);
+    if (G1TracePopularity) {
+      gclog_or_tty->print_cr("Scheduling region "PTR_FORMAT" "
+                             "["PTR_FORMAT", "PTR_FORMAT") for pop-object evacuation.",
+                             r, r->bottom(), r->end());
+    }
+    HeapRegionList* hrl = new HeapRegionList;
+    hrl->hr = r;
+    hrl->next = _popular_regions_to_be_evacuated;
+    _popular_regions_to_be_evacuated = hrl;
+  }
+}
+
+HeapRegion* G1CollectedHeap::popular_region_to_evac() {
+  MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+  HeapRegion* res = NULL;
+  while (_popular_regions_to_be_evacuated != NULL && res == NULL) {
+    HeapRegionList* hrl = _popular_regions_to_be_evacuated;
+    _popular_regions_to_be_evacuated = hrl->next;
+    res = hrl->hr;
+    // The G1RSPopLimit may have increased, so recheck here...
+    if (res->rem_set()->occupied() < (size_t) G1RSPopLimit) {
+      // Hah: don't need to schedule.
+      if (G1TracePopularity) {
+        gclog_or_tty->print_cr("Unscheduling region "PTR_FORMAT" "
+                               "["PTR_FORMAT", "PTR_FORMAT") "
+                               "for pop-object evacuation (size %d < limit %d)",
+                               res, res->bottom(), res->end(),
+                               res->rem_set()->occupied(), G1RSPopLimit);
+      }
+      res->set_popular_pending(false);
+      res = NULL;
+    }
+    // We do not reset res->popular() here; if we did so, it would allow
+    // the region to be "rescheduled" for popularity evacuation.  Instead,
+    // this is done in the collection pause, with the world stopped.
+    // So the invariant is that the regions in the list have the popularity
+    // boolean set, but having the boolean set does not imply membership
+    // on the list (though there can at most one such pop-pending region
+    // not on the list at any time).
+    delete hrl;
+  }
+  return res;
+}
+
+void G1CollectedHeap::evac_popular_region(HeapRegion* hr) {
+  while (true) {
+    // Don't want to do a GC pause while cleanup is being completed!
+    wait_for_cleanup_complete();
+
+    // Read the GC count while holding the Heap_lock
+    int gc_count_before = SharedHeap::heap()->total_collections();
+    g1_policy()->record_stop_world_start();
+
+    {
+      MutexUnlocker mu(Heap_lock);  // give up heap lock, execute gets it back
+      VM_G1PopRegionCollectionPause op(gc_count_before, hr);
+      VMThread::execute(&op);
+
+      // If the prolog succeeded, we didn't do a GC for this.
+      if (op.prologue_succeeded()) break;
+    }
+    // Otherwise we didn't.  We should recheck the size, though, since
+    // the limit may have increased...
+    if (hr->rem_set()->occupied() < (size_t) G1RSPopLimit) {
+      hr->set_popular_pending(false);
+      break;
+    }
+  }
+}
+
+void G1CollectedHeap::atomic_inc_obj_rc(oop obj) {
+  Atomic::inc(obj_rc_addr(obj));
+}
+
+class CountRCClosure: public OopsInHeapRegionClosure {
+  G1CollectedHeap* _g1h;
+  bool _parallel;
+public:
+  CountRCClosure(G1CollectedHeap* g1h) :
+    _g1h(g1h), _parallel(ParallelGCThreads > 0)
+  {}
+  void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  void do_oop(oop* p) {
+    oop obj = *p;
+    assert(obj != NULL, "Precondition.");
+    if (_parallel) {
+      // We go sticky at the limit to avoid excess contention.
+      // If we want to track the actual RC's further, we'll need to keep a
+      // per-thread hash table or something for the popular objects.
+      if (_g1h->obj_rc(obj) < G1ObjPopLimit) {
+        _g1h->atomic_inc_obj_rc(obj);
+      }
+    } else {
+      _g1h->inc_obj_rc(obj);
+    }
+  }
+};
+
+class EvacPopObjClosure: public ObjectClosure {
+  G1CollectedHeap* _g1h;
+  size_t _pop_objs;
+  size_t _max_rc;
+public:
+  EvacPopObjClosure(G1CollectedHeap* g1h) :
+    _g1h(g1h), _pop_objs(0), _max_rc(0) {}
+
+  void do_object(oop obj) {
+    size_t rc = _g1h->obj_rc(obj);
+    _max_rc = MAX2(rc, _max_rc);
+    if (rc >= (size_t) G1ObjPopLimit) {
+      _g1h->_pop_obj_rc_at_copy.add((double)rc);
+      size_t word_sz = obj->size();
+      HeapWord* new_pop_loc = _g1h->allocate_popular_object(word_sz);
+      oop new_pop_obj = (oop)new_pop_loc;
+      Copy::aligned_disjoint_words((HeapWord*)obj, new_pop_loc, word_sz);
+      obj->forward_to(new_pop_obj);
+      G1ScanAndBalanceClosure scan_and_balance(_g1h);
+      new_pop_obj->oop_iterate_backwards(&scan_and_balance);
+      // preserve "next" mark bit if marking is in progress.
+      if (_g1h->mark_in_progress() && !_g1h->is_obj_ill(obj)) {
+        _g1h->concurrent_mark()->markAndGrayObjectIfNecessary(new_pop_obj);
+      }
+
+      if (G1TracePopularity) {
+        gclog_or_tty->print_cr("Found obj " PTR_FORMAT " of word size " SIZE_FORMAT
+                               " pop (%d), move to " PTR_FORMAT,
+                               (void*) obj, word_sz,
+                               _g1h->obj_rc(obj), (void*) new_pop_obj);
+      }
+      _pop_objs++;
+    }
+  }
+  size_t pop_objs() { return _pop_objs; }
+  size_t max_rc() { return _max_rc; }
+};
+
+class G1ParCountRCTask : public AbstractGangTask {
+  G1CollectedHeap* _g1h;
+  BitMap _bm;
+
+  size_t getNCards() {
+    return (_g1h->capacity() + G1BlockOffsetSharedArray::N_bytes - 1)
+      / G1BlockOffsetSharedArray::N_bytes;
+  }
+  CountRCClosure _count_rc_closure;
+public:
+  G1ParCountRCTask(G1CollectedHeap* g1h) :
+    AbstractGangTask("G1 Par RC Count task"),
+    _g1h(g1h), _bm(getNCards()), _count_rc_closure(g1h)
+  {}
+
+  void work(int i) {
+    ResourceMark rm;
+    HandleMark   hm;
+    _g1h->g1_rem_set()->oops_into_collection_set_do(&_count_rc_closure, i);
+  }
+};
+
+void G1CollectedHeap::popularity_pause_preamble(HeapRegion* popular_region) {
+  // We're evacuating a single region (for popularity).
+  if (G1TracePopularity) {
+    gclog_or_tty->print_cr("Doing pop region pause for ["PTR_FORMAT", "PTR_FORMAT")",
+                           popular_region->bottom(), popular_region->end());
+  }
+  g1_policy()->set_single_region_collection_set(popular_region);
+  size_t max_rc;
+  if (!compute_reference_counts_and_evac_popular(popular_region,
+                                                 &max_rc)) {
+    // We didn't evacuate any popular objects.
+    // We increase the RS popularity limit, to prevent this from
+    // happening in the future.
+    if (G1RSPopLimit < (1 << 30)) {
+      G1RSPopLimit *= 2;
+    }
+    // For now, interesting enough for a message:
+#if 1
+    gclog_or_tty->print_cr("In pop region pause for ["PTR_FORMAT", "PTR_FORMAT"), "
+                           "failed to find a pop object (max = %d).",
+                           popular_region->bottom(), popular_region->end(),
+                           max_rc);
+    gclog_or_tty->print_cr("Increased G1RSPopLimit to %d.", G1RSPopLimit);
+#endif // 0
+    // Also, we reset the collection set to NULL, to make the rest of
+    // the collection do nothing.
+    assert(popular_region->next_in_collection_set() == NULL,
+           "should be single-region.");
+    popular_region->set_in_collection_set(false);
+    popular_region->set_popular_pending(false);
+    g1_policy()->clear_collection_set();
+  }
+}
+
+bool G1CollectedHeap::
+compute_reference_counts_and_evac_popular(HeapRegion* popular_region,
+                                          size_t* max_rc) {
+  HeapWord* rc_region_bot;
+  HeapWord* rc_region_end;
+
+  // Set up the reference count region.
+  HeapRegion* rc_region = newAllocRegion(HeapRegion::GrainWords);
+  if (rc_region != NULL) {
+    rc_region_bot = rc_region->bottom();
+    rc_region_end = rc_region->end();
+  } else {
+    rc_region_bot = NEW_C_HEAP_ARRAY(HeapWord, HeapRegion::GrainWords);
+    if (rc_region_bot == NULL) {
+      vm_exit_out_of_memory(HeapRegion::GrainWords,
+                            "No space for RC region.");
+    }
+    rc_region_end = rc_region_bot + HeapRegion::GrainWords;
+  }
+
+  if (G1TracePopularity)
+    gclog_or_tty->print_cr("RC region is ["PTR_FORMAT", "PTR_FORMAT")",
+                           rc_region_bot, rc_region_end);
+  if (rc_region_bot > popular_region->bottom()) {
+    _rc_region_above = true;
+    _rc_region_diff =
+      pointer_delta(rc_region_bot, popular_region->bottom(), 1);
+  } else {
+    assert(rc_region_bot < popular_region->bottom(), "Can't be equal.");
+    _rc_region_above = false;
+    _rc_region_diff =
+      pointer_delta(popular_region->bottom(), rc_region_bot, 1);
+  }
+  g1_policy()->record_pop_compute_rc_start();
+  // Count external references.
+  g1_rem_set()->prepare_for_oops_into_collection_set_do();
+  if (ParallelGCThreads > 0) {
+
+    set_par_threads(workers()->total_workers());
+    G1ParCountRCTask par_count_rc_task(this);
+    workers()->run_task(&par_count_rc_task);
+    set_par_threads(0);
+
+  } else {
+    CountRCClosure count_rc_closure(this);
+    g1_rem_set()->oops_into_collection_set_do(&count_rc_closure, 0);
+  }
+  g1_rem_set()->cleanup_after_oops_into_collection_set_do();
+  g1_policy()->record_pop_compute_rc_end();
+
+  // Now evacuate popular objects.
+  g1_policy()->record_pop_evac_start();
+  EvacPopObjClosure evac_pop_obj_cl(this);
+  popular_region->object_iterate(&evac_pop_obj_cl);
+  *max_rc = evac_pop_obj_cl.max_rc();
+
+  // Make sure the last "top" value of the current popular region is copied
+  // as the "next_top_at_mark_start", so that objects made popular during
+  // markings aren't automatically considered live.
+  HeapRegion* cur_pop_region = _hrs->at(_cur_pop_hr_index);
+  cur_pop_region->note_end_of_copying();
+
+  if (rc_region != NULL) {
+    free_region(rc_region);
+  } else {
+    FREE_C_HEAP_ARRAY(HeapWord, rc_region_bot);
+  }
+  g1_policy()->record_pop_evac_end();
+
+  return evac_pop_obj_cl.pop_objs() > 0;
+}
+
+class CountPopObjInfoClosure: public HeapRegionClosure {
+  size_t _objs;
+  size_t _bytes;
+
+  class CountObjClosure: public ObjectClosure {
+    int _n;
+  public:
+    CountObjClosure() : _n(0) {}
+    void do_object(oop obj) { _n++; }
+    size_t n() { return _n; }
+  };
+
+public:
+  CountPopObjInfoClosure() : _objs(0), _bytes(0) {}
+  bool doHeapRegion(HeapRegion* r) {
+    _bytes += r->used();
+    CountObjClosure blk;
+    r->object_iterate(&blk);
+    _objs += blk.n();
+    return false;
+  }
+  size_t objs() { return _objs; }
+  size_t bytes() { return _bytes; }
+};
+
+
+void G1CollectedHeap::print_popularity_summary_info() const {
+  CountPopObjInfoClosure blk;
+  for (int i = 0; i <= _cur_pop_hr_index; i++) {
+    blk.doHeapRegion(_hrs->at(i));
+  }
+  gclog_or_tty->print_cr("\nPopular objects: %d objs, %d bytes.",
+                         blk.objs(), blk.bytes());
+  gclog_or_tty->print_cr("   RC at copy = [avg = %5.2f, max = %5.2f, sd = %5.2f].",
+                _pop_obj_rc_at_copy.avg(),
+                _pop_obj_rc_at_copy.maximum(),
+                _pop_obj_rc_at_copy.sd());
+}
+
+void G1CollectedHeap::set_refine_cte_cl_concurrency(bool concurrent) {
+  _refine_cte_cl->set_concurrent(concurrent);
+}
+
+#ifndef PRODUCT
+
+class PrintHeapRegionClosure: public HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion *r) {
+    gclog_or_tty->print("Region: "PTR_FORMAT":", r);
+    if (r != NULL) {
+      if (r->is_on_free_list())
+        gclog_or_tty->print("Free ");
+      if (r->is_young())
+        gclog_or_tty->print("Young ");
+      if (r->isHumongous())
+        gclog_or_tty->print("Is Humongous ");
+      r->print();
+    }
+    return false;
+  }
+};
+
+class SortHeapRegionClosure : public HeapRegionClosure {
+  size_t young_regions,free_regions, unclean_regions;
+  size_t hum_regions, count;
+  size_t unaccounted, cur_unclean, cur_alloc;
+  size_t total_free;
+  HeapRegion* cur;
+public:
+  SortHeapRegionClosure(HeapRegion *_cur) : cur(_cur), young_regions(0),
+    free_regions(0), unclean_regions(0),
+    hum_regions(0),
+    count(0), unaccounted(0),
+    cur_alloc(0), total_free(0)
+  {}
+  bool doHeapRegion(HeapRegion *r) {
+    count++;
+    if (r->is_on_free_list()) free_regions++;
+    else if (r->is_on_unclean_list()) unclean_regions++;
+    else if (r->isHumongous())  hum_regions++;
+    else if (r->is_young()) young_regions++;
+    else if (r == cur) cur_alloc++;
+    else unaccounted++;
+    return false;
+  }
+  void print() {
+    total_free = free_regions + unclean_regions;
+    gclog_or_tty->print("%d regions\n", count);
+    gclog_or_tty->print("%d free: free_list = %d unclean = %d\n",
+                        total_free, free_regions, unclean_regions);
+    gclog_or_tty->print("%d humongous %d young\n",
+                        hum_regions, young_regions);
+    gclog_or_tty->print("%d cur_alloc\n", cur_alloc);
+    gclog_or_tty->print("UHOH unaccounted = %d\n", unaccounted);
+  }
+};
+
+void G1CollectedHeap::print_region_counts() {
+  SortHeapRegionClosure sc(_cur_alloc_region);
+  PrintHeapRegionClosure cl;
+  heap_region_iterate(&cl);
+  heap_region_iterate(&sc);
+  sc.print();
+  print_region_accounting_info();
+};
+
+bool G1CollectedHeap::regions_accounted_for() {
+  // TODO: regions accounting for young/survivor/tenured
+  return true;
+}
+
+bool G1CollectedHeap::print_region_accounting_info() {
+  gclog_or_tty->print_cr("P regions: %d.", G1NumPopularRegions);
+  gclog_or_tty->print_cr("Free regions: %d (count: %d count list %d) (clean: %d unclean: %d).",
+                         free_regions(),
+                         count_free_regions(), count_free_regions_list(),
+                         _free_region_list_size, _unclean_region_list.sz());
+  gclog_or_tty->print_cr("cur_alloc: %d.",
+                         (_cur_alloc_region == NULL ? 0 : 1));
+  gclog_or_tty->print_cr("H regions: %d.", _num_humongous_regions);
+
+  // TODO: check regions accounting for young/survivor/tenured
+  return true;
+}
+
+bool G1CollectedHeap::is_in_closed_subset(const void* p) const {
+  HeapRegion* hr = heap_region_containing(p);
+  if (hr == NULL) {
+    return is_in_permanent(p);
+  } else {
+    return hr->is_in(p);
+  }
+}
+#endif // PRODUCT
+
+void G1CollectedHeap::g1_unimplemented() {
+  // Unimplemented();
+}
+
+
+// Local Variables: ***
+// c-indentation-style: gnu ***
+// End: ***
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,1191 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// A "G1CollectedHeap" is an implementation of a java heap for HotSpot.
+// It uses the "Garbage First" heap organization and algorithm, which
+// may combine concurrent marking with parallel, incremental compaction of
+// heap subsets that will yield large amounts of garbage.
+
+class HeapRegion;
+class HeapRegionSeq;
+class HeapRegionList;
+class PermanentGenerationSpec;
+class GenerationSpec;
+class OopsInHeapRegionClosure;
+class G1ScanHeapEvacClosure;
+class ObjectClosure;
+class SpaceClosure;
+class CompactibleSpaceClosure;
+class Space;
+class G1CollectorPolicy;
+class GenRemSet;
+class G1RemSet;
+class HeapRegionRemSetIterator;
+class ConcurrentMark;
+class ConcurrentMarkThread;
+class ConcurrentG1Refine;
+class ConcurrentZFThread;
+
+// If want to accumulate detailed statistics on work queues
+// turn this on.
+#define G1_DETAILED_STATS 0
+
+#if G1_DETAILED_STATS
+#  define IF_G1_DETAILED_STATS(code) code
+#else
+#  define IF_G1_DETAILED_STATS(code)
+#endif
+
+typedef GenericTaskQueue<oop*>    RefToScanQueue;
+typedef GenericTaskQueueSet<oop*> RefToScanQueueSet;
+
+enum G1GCThreadGroups {
+  G1CRGroup = 0,
+  G1ZFGroup = 1,
+  G1CMGroup = 2,
+  G1CLGroup = 3
+};
+
+enum GCAllocPurpose {
+  GCAllocForTenured,
+  GCAllocForSurvived,
+  GCAllocPurposeCount
+};
+
+class YoungList : public CHeapObj {
+private:
+  G1CollectedHeap* _g1h;
+
+  HeapRegion* _head;
+
+  HeapRegion* _scan_only_head;
+  HeapRegion* _scan_only_tail;
+  size_t      _length;
+  size_t      _scan_only_length;
+
+  size_t      _last_sampled_rs_lengths;
+  size_t      _sampled_rs_lengths;
+  HeapRegion* _curr;
+  HeapRegion* _curr_scan_only;
+
+  HeapRegion* _survivor_head;
+  HeapRegion* _survivors_tail;
+  size_t      _survivor_length;
+
+  void          empty_list(HeapRegion* list);
+
+public:
+  YoungList(G1CollectedHeap* g1h);
+
+  void          push_region(HeapRegion* hr);
+  void          add_survivor_region(HeapRegion* hr);
+  HeapRegion*   pop_region();
+  void          empty_list();
+  bool          is_empty() { return _length == 0; }
+  size_t        length() { return _length; }
+  size_t        scan_only_length() { return _scan_only_length; }
+
+  void rs_length_sampling_init();
+  bool rs_length_sampling_more();
+  void rs_length_sampling_next();
+
+  void reset_sampled_info() {
+    _last_sampled_rs_lengths =   0;
+  }
+  size_t sampled_rs_lengths() { return _last_sampled_rs_lengths; }
+
+  // for development purposes
+  void reset_auxilary_lists();
+  HeapRegion* first_region() { return _head; }
+  HeapRegion* first_scan_only_region() { return _scan_only_head; }
+  HeapRegion* first_survivor_region() { return _survivor_head; }
+  HeapRegion* par_get_next_scan_only_region() {
+    MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+    HeapRegion* ret = _curr_scan_only;
+    if (ret != NULL)
+      _curr_scan_only = ret->get_next_young_region();
+    return ret;
+  }
+
+  // debugging
+  bool          check_list_well_formed();
+  bool          check_list_empty(bool ignore_scan_only_list,
+                                 bool check_sample = true);
+  void          print();
+};
+
+class RefineCardTableEntryClosure;
+class G1CollectedHeap : public SharedHeap {
+  friend class VM_G1CollectForAllocation;
+  friend class VM_GenCollectForPermanentAllocation;
+  friend class VM_G1CollectFull;
+  friend class VM_G1IncCollectionPause;
+  friend class VM_G1PopRegionCollectionPause;
+  friend class VMStructs;
+
+  // Closures used in implementation.
+  friend class G1ParCopyHelper;
+  friend class G1IsAliveClosure;
+  friend class G1EvacuateFollowersClosure;
+  friend class G1ParScanThreadState;
+  friend class G1ParScanClosureSuper;
+  friend class G1ParEvacuateFollowersClosure;
+  friend class G1ParTask;
+  friend class G1FreeGarbageRegionClosure;
+  friend class RefineCardTableEntryClosure;
+  friend class G1PrepareCompactClosure;
+  friend class RegionSorter;
+  friend class CountRCClosure;
+  friend class EvacPopObjClosure;
+
+  // Other related classes.
+  friend class G1MarkSweep;
+
+private:
+  enum SomePrivateConstants {
+    VeryLargeInBytes = HeapRegion::GrainBytes/2,
+    VeryLargeInWords = VeryLargeInBytes/HeapWordSize,
+    MinHeapDeltaBytes = 10 * HeapRegion::GrainBytes,      // FIXME
+    NumAPIs = HeapRegion::MaxAge
+  };
+
+
+  // The one and only G1CollectedHeap, so static functions can find it.
+  static G1CollectedHeap* _g1h;
+
+  // Storage for the G1 heap (excludes the permanent generation).
+  VirtualSpace _g1_storage;
+  MemRegion    _g1_reserved;
+
+  // The part of _g1_storage that is currently committed.
+  MemRegion _g1_committed;
+
+  // The maximum part of _g1_storage that has ever been committed.
+  MemRegion _g1_max_committed;
+
+  // The number of regions that are completely free.
+  size_t _free_regions;
+
+  // The number of regions we could create by expansion.
+  size_t _expansion_regions;
+
+  // Return the number of free regions in the heap (by direct counting.)
+  size_t count_free_regions();
+  // Return the number of free regions on the free and unclean lists.
+  size_t count_free_regions_list();
+
+  // The block offset table for the G1 heap.
+  G1BlockOffsetSharedArray* _bot_shared;
+
+  // Move all of the regions off the free lists, then rebuild those free
+  // lists, before and after full GC.
+  void tear_down_region_lists();
+  void rebuild_region_lists();
+  // This sets all non-empty regions to need zero-fill (which they will if
+  // they are empty after full collection.)
+  void set_used_regions_to_need_zero_fill();
+
+  // The sequence of all heap regions in the heap.
+  HeapRegionSeq* _hrs;
+
+  // The region from which normal-sized objects are currently being
+  // allocated.  May be NULL.
+  HeapRegion* _cur_alloc_region;
+
+  // Postcondition: cur_alloc_region == NULL.
+  void abandon_cur_alloc_region();
+
+  // The to-space memory regions into which objects are being copied during
+  // a GC.
+  HeapRegion* _gc_alloc_regions[GCAllocPurposeCount];
+  uint _gc_alloc_region_counts[GCAllocPurposeCount];
+
+  // A list of the regions that have been set to be alloc regions in the
+  // current collection.
+  HeapRegion* _gc_alloc_region_list;
+
+  // When called by par thread, require par_alloc_during_gc_lock() to be held.
+  void push_gc_alloc_region(HeapRegion* hr);
+
+  // This should only be called single-threaded.  Undeclares all GC alloc
+  // regions.
+  void forget_alloc_region_list();
+
+  // Should be used to set an alloc region, because there's other
+  // associated bookkeeping.
+  void set_gc_alloc_region(int purpose, HeapRegion* r);
+
+  // Check well-formedness of alloc region list.
+  bool check_gc_alloc_regions();
+
+  // Outside of GC pauses, the number of bytes used in all regions other
+  // than the current allocation region.
+  size_t _summary_bytes_used;
+
+  // Summary information about popular objects; method to print it.
+  NumberSeq _pop_obj_rc_at_copy;
+  void print_popularity_summary_info() const;
+
+  unsigned _gc_time_stamp;
+
+  size_t* _surviving_young_words;
+
+  void setup_surviving_young_words();
+  void update_surviving_young_words(size_t* surv_young_words);
+  void cleanup_surviving_young_words();
+
+protected:
+
+  // Returns "true" iff none of the gc alloc regions have any allocations
+  // since the last call to "save_marks".
+  bool all_alloc_regions_no_allocs_since_save_marks();
+  // Calls "note_end_of_copying on all gc alloc_regions.
+  void all_alloc_regions_note_end_of_copying();
+
+  // The number of regions allocated to hold humongous objects.
+  int         _num_humongous_regions;
+  YoungList*  _young_list;
+
+  // The current policy object for the collector.
+  G1CollectorPolicy* _g1_policy;
+
+  // Parallel allocation lock to protect the current allocation region.
+  Mutex  _par_alloc_during_gc_lock;
+  Mutex* par_alloc_during_gc_lock() { return &_par_alloc_during_gc_lock; }
+
+  // If possible/desirable, allocate a new HeapRegion for normal object
+  // allocation sufficient for an allocation of the given "word_size".
+  // If "do_expand" is true, will attempt to expand the heap if necessary
+  // to to satisfy the request.  If "zero_filled" is true, requires a
+  // zero-filled region.
+  // (Returning NULL will trigger a GC.)
+  virtual HeapRegion* newAllocRegion_work(size_t word_size,
+                                          bool do_expand,
+                                          bool zero_filled);
+
+  virtual HeapRegion* newAllocRegion(size_t word_size,
+                                     bool zero_filled = true) {
+    return newAllocRegion_work(word_size, false, zero_filled);
+  }
+  virtual HeapRegion* newAllocRegionWithExpansion(int purpose,
+                                                  size_t word_size,
+                                                  bool zero_filled = true);
+
+  // Attempt to allocate an object of the given (very large) "word_size".
+  // Returns "NULL" on failure.
+  virtual HeapWord* humongousObjAllocate(size_t word_size);
+
+  // If possible, allocate a block of the given word_size, else return "NULL".
+  // Returning NULL will trigger GC or heap expansion.
+  // These two methods have rather awkward pre- and
+  // post-conditions. If they are called outside a safepoint, then
+  // they assume that the caller is holding the heap lock. Upon return
+  // they release the heap lock, if they are returning a non-NULL
+  // value. attempt_allocation_slow() also dirties the cards of a
+  // newly-allocated young region after it releases the heap
+  // lock. This change in interface was the neatest way to achieve
+  // this card dirtying without affecting mem_allocate(), which is a
+  // more frequently called method. We tried two or three different
+  // approaches, but they were even more hacky.
+  HeapWord* attempt_allocation(size_t word_size,
+                               bool permit_collection_pause = true);
+
+  HeapWord* attempt_allocation_slow(size_t word_size,
+                                    bool permit_collection_pause = true);
+
+  // Allocate blocks during garbage collection. Will ensure an
+  // allocation region, either by picking one or expanding the
+  // heap, and then allocate a block of the given size. The block
+  // may not be a humongous - it must fit into a single heap region.
+  HeapWord* allocate_during_gc(GCAllocPurpose purpose, size_t word_size);
+  HeapWord* par_allocate_during_gc(GCAllocPurpose purpose, size_t word_size);
+
+  HeapWord* allocate_during_gc_slow(GCAllocPurpose purpose,
+                                    HeapRegion*    alloc_region,
+                                    bool           par,
+                                    size_t         word_size);
+
+  // Ensure that no further allocations can happen in "r", bearing in mind
+  // that parallel threads might be attempting allocations.
+  void par_allocate_remaining_space(HeapRegion* r);
+
+  // Helper function for two callbacks below.
+  // "full", if true, indicates that the GC is for a System.gc() request,
+  // and should collect the entire heap.  If "clear_all_soft_refs" is true,
+  // all soft references are cleared during the GC.  If "full" is false,
+  // "word_size" describes the allocation that the GC should
+  // attempt (at least) to satisfy.
+  void do_collection(bool full, bool clear_all_soft_refs,
+                     size_t word_size);
+
+  // Callback from VM_G1CollectFull operation.
+  // Perform a full collection.
+  void do_full_collection(bool clear_all_soft_refs);
+
+  // Resize the heap if necessary after a full collection.  If this is
+  // after a collect-for allocation, "word_size" is the allocation size,
+  // and will be considered part of the used portion of the heap.
+  void resize_if_necessary_after_full_collection(size_t word_size);
+
+  // Callback from VM_G1CollectForAllocation operation.
+  // This function does everything necessary/possible to satisfy a
+  // failed allocation request (including collection, expansion, etc.)
+  HeapWord* satisfy_failed_allocation(size_t word_size);
+
+  // Attempting to expand the heap sufficiently
+  // to support an allocation of the given "word_size".  If
+  // successful, perform the allocation and return the address of the
+  // allocated block, or else "NULL".
+  virtual HeapWord* expand_and_allocate(size_t word_size);
+
+public:
+  // Expand the garbage-first heap by at least the given size (in bytes!).
+  // (Rounds up to a HeapRegion boundary.)
+  virtual void expand(size_t expand_bytes);
+
+  // Do anything common to GC's.
+  virtual void gc_prologue(bool full);
+  virtual void gc_epilogue(bool full);
+
+protected:
+
+  // Shrink the garbage-first heap by at most the given size (in bytes!).
+  // (Rounds down to a HeapRegion boundary.)
+  virtual void shrink(size_t expand_bytes);
+  void shrink_helper(size_t expand_bytes);
+
+  // Do an incremental collection: identify a collection set, and evacuate
+  // its live objects elsewhere.
+  virtual void do_collection_pause();
+
+  // The guts of the incremental collection pause, executed by the vm
+  // thread.  If "popular_region" is non-NULL, this pause should evacuate
+  // this single region whose remembered set has gotten large, moving
+  // any popular objects to one of the popular regions.
+  virtual void do_collection_pause_at_safepoint(HeapRegion* popular_region);
+
+  // Actually do the work of evacuating the collection set.
+  virtual void evacuate_collection_set();
+
+  // If this is an appropriate right time, do a collection pause.
+  // The "word_size" argument, if non-zero, indicates the size of an
+  // allocation request that is prompting this query.
+  void do_collection_pause_if_appropriate(size_t word_size);
+
+  // The g1 remembered set of the heap.
+  G1RemSet* _g1_rem_set;
+  // And it's mod ref barrier set, used to track updates for the above.
+  ModRefBarrierSet* _mr_bs;
+
+  // The Heap Region Rem Set Iterator.
+  HeapRegionRemSetIterator** _rem_set_iterator;
+
+  // The closure used to refine a single card.
+  RefineCardTableEntryClosure* _refine_cte_cl;
+
+  // A function to check the consistency of dirty card logs.
+  void check_ct_logs_at_safepoint();
+
+  // After a collection pause, make the regions in the CS into free
+  // regions.
+  void free_collection_set(HeapRegion* cs_head);
+
+  // Applies "scan_non_heap_roots" to roots outside the heap,
+  // "scan_rs" to roots inside the heap (having done "set_region" to
+  // indicate the region in which the root resides), and does "scan_perm"
+  // (setting the generation to the perm generation.)  If "scan_rs" is
+  // NULL, then this step is skipped.  The "worker_i"
+  // param is for use with parallel roots processing, and should be
+  // the "i" of the calling parallel worker thread's work(i) function.
+  // In the sequential case this param will be ignored.
+  void g1_process_strong_roots(bool collecting_perm_gen,
+                               SharedHeap::ScanningOption so,
+                               OopClosure* scan_non_heap_roots,
+                               OopsInHeapRegionClosure* scan_rs,
+                               OopsInHeapRegionClosure* scan_so,
+                               OopsInGenClosure* scan_perm,
+                               int worker_i);
+
+  void scan_scan_only_set(OopsInHeapRegionClosure* oc,
+                          int worker_i);
+  void scan_scan_only_region(HeapRegion* hr,
+                             OopsInHeapRegionClosure* oc,
+                             int worker_i);
+
+  // Apply "blk" to all the weak roots of the system.  These include
+  // JNI weak roots, the code cache, system dictionary, symbol table,
+  // string table, and referents of reachable weak refs.
+  void g1_process_weak_roots(OopClosure* root_closure,
+                             OopClosure* non_root_closure);
+
+  // Invoke "save_marks" on all heap regions.
+  void save_marks();
+
+  // Free a heap region.
+  void free_region(HeapRegion* hr);
+  // A component of "free_region", exposed for 'batching'.
+  // All the params after "hr" are out params: the used bytes of the freed
+  // region(s), the number of H regions cleared, the number of regions
+  // freed, and pointers to the head and tail of a list of freed contig
+  // regions, linked throught the "next_on_unclean_list" field.
+  void free_region_work(HeapRegion* hr,
+                        size_t& pre_used,
+                        size_t& cleared_h,
+                        size_t& freed_regions,
+                        UncleanRegionList* list,
+                        bool par = false);
+
+
+  // The concurrent marker (and the thread it runs in.)
+  ConcurrentMark* _cm;
+  ConcurrentMarkThread* _cmThread;
+  bool _mark_in_progress;
+
+  // The concurrent refiner.
+  ConcurrentG1Refine* _cg1r;
+
+  // The concurrent zero-fill thread.
+  ConcurrentZFThread* _czft;
+
+  // The parallel task queues
+  RefToScanQueueSet *_task_queues;
+
+  // True iff a evacuation has failed in the current collection.
+  bool _evacuation_failed;
+
+  // Set the attribute indicating whether evacuation has failed in the
+  // current collection.
+  void set_evacuation_failed(bool b) { _evacuation_failed = b; }
+
+  // Failed evacuations cause some logical from-space objects to have
+  // forwarding pointers to themselves.  Reset them.
+  void remove_self_forwarding_pointers();
+
+  // When one is non-null, so is the other.  Together, they each pair is
+  // an object with a preserved mark, and its mark value.
+  GrowableArray<oop>*     _objs_with_preserved_marks;
+  GrowableArray<markOop>* _preserved_marks_of_objs;
+
+  // Preserve the mark of "obj", if necessary, in preparation for its mark
+  // word being overwritten with a self-forwarding-pointer.
+  void preserve_mark_if_necessary(oop obj, markOop m);
+
+  // The stack of evac-failure objects left to be scanned.
+  GrowableArray<oop>*    _evac_failure_scan_stack;
+  // The closure to apply to evac-failure objects.
+
+  OopsInHeapRegionClosure* _evac_failure_closure;
+  // Set the field above.
+  void
+  set_evac_failure_closure(OopsInHeapRegionClosure* evac_failure_closure) {
+    _evac_failure_closure = evac_failure_closure;
+  }
+
+  // Push "obj" on the scan stack.
+  void push_on_evac_failure_scan_stack(oop obj);
+  // Process scan stack entries until the stack is empty.
+  void drain_evac_failure_scan_stack();
+  // True iff an invocation of "drain_scan_stack" is in progress; to
+  // prevent unnecessary recursion.
+  bool _drain_in_progress;
+
+  // Do any necessary initialization for evacuation-failure handling.
+  // "cl" is the closure that will be used to process evac-failure
+  // objects.
+  void init_for_evac_failure(OopsInHeapRegionClosure* cl);
+  // Do any necessary cleanup for evacuation-failure handling data
+  // structures.
+  void finalize_for_evac_failure();
+
+  // An attempt to evacuate "obj" has failed; take necessary steps.
+  void handle_evacuation_failure(oop obj);
+  oop handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, oop obj);
+  void handle_evacuation_failure_common(oop obj, markOop m);
+
+
+  // Ensure that the relevant gc_alloc regions are set.
+  void get_gc_alloc_regions();
+  // We're done with GC alloc regions; release them, as appropriate.
+  void release_gc_alloc_regions();
+
+  // ("Weak") Reference processing support
+  ReferenceProcessor* _ref_processor;
+
+  enum G1H_process_strong_roots_tasks {
+    G1H_PS_mark_stack_oops_do,
+    G1H_PS_refProcessor_oops_do,
+    // Leave this one last.
+    G1H_PS_NumElements
+  };
+
+  SubTasksDone* _process_strong_tasks;
+
+  // Allocate space to hold a popular object.  Result is guaranteed below
+  // "popular_object_boundary()".  Note: CURRENTLY halts the system if we
+  // run out of space to hold popular objects.
+  HeapWord* allocate_popular_object(size_t word_size);
+
+  // The boundary between popular and non-popular objects.
+  HeapWord* _popular_object_boundary;
+
+  HeapRegionList* _popular_regions_to_be_evacuated;
+
+  // Compute which objects in "single_region" are popular.  If any are,
+  // evacuate them to a popular region, leaving behind forwarding pointers,
+  // and select "popular_region" as the single collection set region.
+  // Otherwise, leave the collection set null.
+  void popularity_pause_preamble(HeapRegion* populer_region);
+
+  // Compute which objects in "single_region" are popular, and evacuate
+  // them to a popular region, leaving behind forwarding pointers.
+  // Returns "true" if at least one popular object is discovered and
+  // evacuated.  In any case, "*max_rc" is set to the maximum reference
+  // count of an object in the region.
+  bool compute_reference_counts_and_evac_popular(HeapRegion* populer_region,
+                                                 size_t* max_rc);
+  // Subroutines used in the above.
+  bool _rc_region_above;
+  size_t _rc_region_diff;
+  jint* obj_rc_addr(oop obj) {
+    uintptr_t obj_addr = (uintptr_t)obj;
+    if (_rc_region_above) {
+      jint* res = (jint*)(obj_addr + _rc_region_diff);
+      assert((uintptr_t)res > obj_addr, "RC region is above.");
+      return res;
+    } else {
+      jint* res = (jint*)(obj_addr - _rc_region_diff);
+      assert((uintptr_t)res < obj_addr, "RC region is below.");
+      return res;
+    }
+  }
+  jint obj_rc(oop obj) {
+    return *obj_rc_addr(obj);
+  }
+  void inc_obj_rc(oop obj) {
+    (*obj_rc_addr(obj))++;
+  }
+  void atomic_inc_obj_rc(oop obj);
+
+
+  // Number of popular objects and bytes (latter is cheaper!).
+  size_t pop_object_used_objs();
+  size_t pop_object_used_bytes();
+
+  // Index of the popular region in which allocation is currently being
+  // done.
+  int _cur_pop_hr_index;
+
+  // List of regions which require zero filling.
+  UncleanRegionList _unclean_region_list;
+  bool _unclean_regions_coming;
+
+  bool check_age_cohort_well_formed_work(int a, HeapRegion* hr);
+
+public:
+  void set_refine_cte_cl_concurrency(bool concurrent);
+
+  RefToScanQueue *task_queue(int i);
+
+  // Create a G1CollectedHeap with the specified policy.
+  // Must call the initialize method afterwards.
+  // May not return if something goes wrong.
+  G1CollectedHeap(G1CollectorPolicy* policy);
+
+  // Initialize the G1CollectedHeap to have the initial and
+  // maximum sizes, permanent generation, and remembered and barrier sets
+  // specified by the policy object.
+  jint initialize();
+
+  void ref_processing_init();
+
+  void set_par_threads(int t) {
+    SharedHeap::set_par_threads(t);
+    _process_strong_tasks->set_par_threads(t);
+  }
+
+  virtual CollectedHeap::Name kind() const {
+    return CollectedHeap::G1CollectedHeap;
+  }
+
+  // The current policy object for the collector.
+  G1CollectorPolicy* g1_policy() const { return _g1_policy; }
+
+  // Adaptive size policy.  No such thing for g1.
+  virtual AdaptiveSizePolicy* size_policy() { return NULL; }
+
+  // The rem set and barrier set.
+  G1RemSet* g1_rem_set() const { return _g1_rem_set; }
+  ModRefBarrierSet* mr_bs() const { return _mr_bs; }
+
+  // The rem set iterator.
+  HeapRegionRemSetIterator* rem_set_iterator(int i) {
+    return _rem_set_iterator[i];
+  }
+
+  HeapRegionRemSetIterator* rem_set_iterator() {
+    return _rem_set_iterator[0];
+  }
+
+  unsigned get_gc_time_stamp() {
+    return _gc_time_stamp;
+  }
+
+  void reset_gc_time_stamp() {
+    _gc_time_stamp = 0;
+  }
+
+  void iterate_dirty_card_closure(bool concurrent, int worker_i);
+
+  // The shared block offset table array.
+  G1BlockOffsetSharedArray* bot_shared() const { return _bot_shared; }
+
+  // Reference Processing accessor
+  ReferenceProcessor* ref_processor() { return _ref_processor; }
+
+  // Reserved (g1 only; super method includes perm), capacity and the used
+  // portion in bytes.
+  size_t g1_reserved_obj_bytes() { return _g1_reserved.byte_size(); }
+  virtual size_t capacity() const;
+  virtual size_t used() const;
+  size_t recalculate_used() const;
+#ifndef PRODUCT
+  size_t recalculate_used_regions() const;
+#endif // PRODUCT
+
+  // These virtual functions do the actual allocation.
+  virtual HeapWord* mem_allocate(size_t word_size,
+                                 bool   is_noref,
+                                 bool   is_tlab,
+                                 bool* gc_overhead_limit_was_exceeded);
+
+  // Some heaps may offer a contiguous region for shared non-blocking
+  // allocation, via inlined code (by exporting the address of the top and
+  // end fields defining the extent of the contiguous allocation region.)
+  // But G1CollectedHeap doesn't yet support this.
+
+  // Return an estimate of the maximum allocation that could be performed
+  // without triggering any collection or expansion activity.  In a
+  // generational collector, for example, this is probably the largest
+  // allocation that could be supported (without expansion) in the youngest
+  // generation.  It is "unsafe" because no locks are taken; the result
+  // should be treated as an approximation, not a guarantee, for use in
+  // heuristic resizing decisions.
+  virtual size_t unsafe_max_alloc();
+
+  virtual bool is_maximal_no_gc() const {
+    return _g1_storage.uncommitted_size() == 0;
+  }
+
+  // The total number of regions in the heap.
+  size_t n_regions();
+
+  // The number of regions that are completely free.
+  size_t max_regions();
+
+  // The number of regions that are completely free.
+  size_t free_regions();
+
+  // The number of regions that are not completely free.
+  size_t used_regions() { return n_regions() - free_regions(); }
+
+  // True iff the ZF thread should run.
+  bool should_zf();
+
+  // The number of regions available for "regular" expansion.
+  size_t expansion_regions() { return _expansion_regions; }
+
+#ifndef PRODUCT
+  bool regions_accounted_for();
+  bool print_region_accounting_info();
+  void print_region_counts();
+#endif
+
+  HeapRegion* alloc_region_from_unclean_list(bool zero_filled);
+  HeapRegion* alloc_region_from_unclean_list_locked(bool zero_filled);
+
+  void put_region_on_unclean_list(HeapRegion* r);
+  void put_region_on_unclean_list_locked(HeapRegion* r);
+
+  void prepend_region_list_on_unclean_list(UncleanRegionList* list);
+  void prepend_region_list_on_unclean_list_locked(UncleanRegionList* list);
+
+  void set_unclean_regions_coming(bool b);
+  void set_unclean_regions_coming_locked(bool b);
+  // Wait for cleanup to be complete.
+  void wait_for_cleanup_complete();
+  // Like above, but assumes that the calling thread owns the Heap_lock.
+  void wait_for_cleanup_complete_locked();
+
+  // Return the head of the unclean list.
+  HeapRegion* peek_unclean_region_list_locked();
+  // Remove and return the head of the unclean list.
+  HeapRegion* pop_unclean_region_list_locked();
+
+  // List of regions which are zero filled and ready for allocation.
+  HeapRegion* _free_region_list;
+  // Number of elements on the free list.
+  size_t _free_region_list_size;
+
+  // If the head of the unclean list is ZeroFilled, move it to the free
+  // list.
+  bool move_cleaned_region_to_free_list_locked();
+  bool move_cleaned_region_to_free_list();
+
+  void put_free_region_on_list_locked(HeapRegion* r);
+  void put_free_region_on_list(HeapRegion* r);
+
+  // Remove and return the head element of the free list.
+  HeapRegion* pop_free_region_list_locked();
+
+  // If "zero_filled" is true, we first try the free list, then we try the
+  // unclean list, zero-filling the result.  If "zero_filled" is false, we
+  // first try the unclean list, then the zero-filled list.
+  HeapRegion* alloc_free_region_from_lists(bool zero_filled);
+
+  // Verify the integrity of the region lists.
+  void remove_allocated_regions_from_lists();
+  bool verify_region_lists();
+  bool verify_region_lists_locked();
+  size_t unclean_region_list_length();
+  size_t free_region_list_length();
+
+  // Perform a collection of the heap; intended for use in implementing
+  // "System.gc".  This probably implies as full a collection as the
+  // "CollectedHeap" supports.
+  virtual void collect(GCCause::Cause cause);
+
+  // The same as above but assume that the caller holds the Heap_lock.
+  void collect_locked(GCCause::Cause cause);
+
+  // This interface assumes that it's being called by the
+  // vm thread. It collects the heap assuming that the
+  // heap lock is already held and that we are executing in
+  // the context of the vm thread.
+  virtual void collect_as_vm_thread(GCCause::Cause cause);
+
+  // True iff a evacuation has failed in the most-recent collection.
+  bool evacuation_failed() { return _evacuation_failed; }
+
+  // Free a region if it is totally full of garbage.  Returns the number of
+  // bytes freed (0 ==> didn't free it).
+  size_t free_region_if_totally_empty(HeapRegion *hr);
+  void free_region_if_totally_empty_work(HeapRegion *hr,
+                                         size_t& pre_used,
+                                         size_t& cleared_h_regions,
+                                         size_t& freed_regions,
+                                         UncleanRegionList* list,
+                                         bool par = false);
+
+  // If we've done free region work that yields the given changes, update
+  // the relevant global variables.
+  void finish_free_region_work(size_t pre_used,
+                               size_t cleared_h_regions,
+                               size_t freed_regions,
+                               UncleanRegionList* list);
+
+
+  // Returns "TRUE" iff "p" points into the allocated area of the heap.
+  virtual bool is_in(const void* p) const;
+
+  // Return "TRUE" iff the given object address is within the collection
+  // set.
+  inline bool obj_in_cs(oop obj);
+
+  // Return "TRUE" iff the given object address is in the reserved
+  // region of g1 (excluding the permanent generation).
+  bool is_in_g1_reserved(const void* p) const {
+    return _g1_reserved.contains(p);
+  }
+
+  // Returns a MemRegion that corresponds to the space that  has been
+  // committed in the heap
+  MemRegion g1_committed() {
+    return _g1_committed;
+  }
+
+  NOT_PRODUCT( bool is_in_closed_subset(const void* p) const; )
+
+  // Dirty card table entries covering a list of young regions.
+  void dirtyCardsForYoungRegions(CardTableModRefBS* ct_bs, HeapRegion* list);
+
+  // This resets the card table to all zeros.  It is used after
+  // a collection pause which used the card table to claim cards.
+  void cleanUpCardTable();
+
+  // Iteration functions.
+
+  // Iterate over all the ref-containing fields of all objects, calling
+  // "cl.do_oop" on each.
+  virtual void oop_iterate(OopClosure* cl);
+
+  // Same as above, restricted to a memory region.
+  virtual void oop_iterate(MemRegion mr, OopClosure* cl);
+
+  // Iterate over all objects, calling "cl.do_object" on each.
+  virtual void object_iterate(ObjectClosure* cl);
+
+  // Iterate over all objects allocated since the last collection, calling
+  // "cl.do_object" on each.  The heap must have been initialized properly
+  // to support this function, or else this call will fail.
+  virtual void object_iterate_since_last_GC(ObjectClosure* cl);
+
+  // Iterate over all spaces in use in the heap, in ascending address order.
+  virtual void space_iterate(SpaceClosure* cl);
+
+  // Iterate over heap regions, in address order, terminating the
+  // iteration early if the "doHeapRegion" method returns "true".
+  void heap_region_iterate(HeapRegionClosure* blk);
+
+  // Iterate over heap regions starting with r (or the first region if "r"
+  // is NULL), in address order, terminating early if the "doHeapRegion"
+  // method returns "true".
+  void heap_region_iterate_from(HeapRegion* r, HeapRegionClosure* blk);
+
+  // As above but starting from the region at index idx.
+  void heap_region_iterate_from(int idx, HeapRegionClosure* blk);
+
+  HeapRegion* region_at(size_t idx);
+
+
+  // Divide the heap region sequence into "chunks" of some size (the number
+  // of regions divided by the number of parallel threads times some
+  // overpartition factor, currently 4).  Assumes that this will be called
+  // in parallel by ParallelGCThreads worker threads with discinct worker
+  // ids in the range [0..max(ParallelGCThreads-1, 1)], that all parallel
+  // calls will use the same "claim_value", and that that claim value is
+  // different from the claim_value of any heap region before the start of
+  // the iteration.  Applies "blk->doHeapRegion" to each of the regions, by
+  // attempting to claim the first region in each chunk, and, if
+  // successful, applying the closure to each region in the chunk (and
+  // setting the claim value of the second and subsequent regions of the
+  // chunk.)  For now requires that "doHeapRegion" always returns "false",
+  // i.e., that a closure never attempt to abort a traversal.
+  void heap_region_par_iterate_chunked(HeapRegionClosure* blk,
+                                       int worker,
+                                       jint claim_value);
+
+  // Iterate over the regions (if any) in the current collection set.
+  void collection_set_iterate(HeapRegionClosure* blk);
+
+  // As above but starting from region r
+  void collection_set_iterate_from(HeapRegion* r, HeapRegionClosure *blk);
+
+  // Returns the first (lowest address) compactible space in the heap.
+  virtual CompactibleSpace* first_compactible_space();
+
+  // A CollectedHeap will contain some number of spaces.  This finds the
+  // space containing a given address, or else returns NULL.
+  virtual Space* space_containing(const void* addr) const;
+
+  // A G1CollectedHeap will contain some number of heap regions.  This
+  // finds the region containing a given address, or else returns NULL.
+  HeapRegion* heap_region_containing(const void* addr) const;
+
+  // Like the above, but requires "addr" to be in the heap (to avoid a
+  // null-check), and unlike the above, may return an continuing humongous
+  // region.
+  HeapRegion* heap_region_containing_raw(const void* addr) const;
+
+  // A CollectedHeap is divided into a dense sequence of "blocks"; that is,
+  // each address in the (reserved) heap is a member of exactly
+  // one block.  The defining characteristic of a block is that it is
+  // possible to find its size, and thus to progress forward to the next
+  // block.  (Blocks may be of different sizes.)  Thus, blocks may
+  // represent Java objects, or they might be free blocks in a
+  // free-list-based heap (or subheap), as long as the two kinds are
+  // distinguishable and the size of each is determinable.
+
+  // Returns the address of the start of the "block" that contains the
+  // address "addr".  We say "blocks" instead of "object" since some heaps
+  // may not pack objects densely; a chunk may either be an object or a
+  // non-object.
+  virtual HeapWord* block_start(const void* addr) const;
+
+  // Requires "addr" to be the start of a chunk, and returns its size.
+  // "addr + size" is required to be the start of a new chunk, or the end
+  // of the active area of the heap.
+  virtual size_t block_size(const HeapWord* addr) const;
+
+  // Requires "addr" to be the start of a block, and returns "TRUE" iff
+  // the block is an object.
+  virtual bool block_is_obj(const HeapWord* addr) const;
+
+  // Does this heap support heap inspection? (+PrintClassHistogram)
+  virtual bool supports_heap_inspection() const { return true; }
+
+  // Section on thread-local allocation buffers (TLABs)
+  // See CollectedHeap for semantics.
+
+  virtual bool supports_tlab_allocation() const;
+  virtual size_t tlab_capacity(Thread* thr) const;
+  virtual size_t unsafe_max_tlab_alloc(Thread* thr) const;
+  virtual HeapWord* allocate_new_tlab(size_t size);
+
+  // Can a compiler initialize a new object without store barriers?
+  // This permission only extends from the creation of a new object
+  // via a TLAB up to the first subsequent safepoint.
+  virtual bool can_elide_tlab_store_barriers() const {
+    // Since G1's TLAB's may, on occasion, come from non-young regions
+    // as well. (Is there a flag controlling that? XXX)
+    return false;
+  }
+
+  // Can a compiler elide a store barrier when it writes
+  // a permanent oop into the heap?  Applies when the compiler
+  // is storing x to the heap, where x->is_perm() is true.
+  virtual bool can_elide_permanent_oop_store_barriers() const {
+    // At least until perm gen collection is also G1-ified, at
+    // which point this should return false.
+    return true;
+  }
+
+  virtual bool allocs_are_zero_filled();
+
+  // The boundary between a "large" and "small" array of primitives, in
+  // words.
+  virtual size_t large_typearray_limit();
+
+  // All popular objects are guaranteed to have addresses below this
+  // boundary.
+  HeapWord* popular_object_boundary() {
+    return _popular_object_boundary;
+  }
+
+  // Declare the region as one that should be evacuated because its
+  // remembered set is too large.
+  void schedule_popular_region_evac(HeapRegion* r);
+  // If there is a popular region to evacuate it, remove it from the list
+  // and return it.
+  HeapRegion* popular_region_to_evac();
+  // Evacuate the given popular region.
+  void evac_popular_region(HeapRegion* r);
+
+  // Returns "true" iff the given word_size is "very large".
+  static bool isHumongous(size_t word_size) {
+    return word_size >= VeryLargeInWords;
+  }
+
+  // Update mod union table with the set of dirty cards.
+  void updateModUnion();
+
+  // Set the mod union bits corresponding to the given memRegion.  Note
+  // that this is always a safe operation, since it doesn't clear any
+  // bits.
+  void markModUnionRange(MemRegion mr);
+
+  // Records the fact that a marking phase is no longer in progress.
+  void set_marking_complete() {
+    _mark_in_progress = false;
+  }
+  void set_marking_started() {
+    _mark_in_progress = true;
+  }
+  bool mark_in_progress() {
+    return _mark_in_progress;
+  }
+
+  // Print the maximum heap capacity.
+  virtual size_t max_capacity() const;
+
+  virtual jlong millis_since_last_gc();
+
+  // Perform any cleanup actions necessary before allowing a verification.
+  virtual void prepare_for_verify();
+
+  // Perform verification.
+  virtual void verify(bool allow_dirty, bool silent);
+  virtual void print() const;
+  virtual void print_on(outputStream* st) const;
+
+  virtual void print_gc_threads_on(outputStream* st) const;
+  virtual void gc_threads_do(ThreadClosure* tc) const;
+
+  // Override
+  void print_tracing_info() const;
+
+  // If "addr" is a pointer into the (reserved?) heap, returns a positive
+  // number indicating the "arena" within the heap in which "addr" falls.
+  // Or else returns 0.
+  virtual int addr_to_arena_id(void* addr) const;
+
+  // Convenience function to be used in situations where the heap type can be
+  // asserted to be this type.
+  static G1CollectedHeap* heap();
+
+  void empty_young_list();
+  bool should_set_young_locked();
+
+  void set_region_short_lived_locked(HeapRegion* hr);
+  // add appropriate methods for any other surv rate groups
+
+  void young_list_rs_length_sampling_init() {
+    _young_list->rs_length_sampling_init();
+  }
+  bool young_list_rs_length_sampling_more() {
+    return _young_list->rs_length_sampling_more();
+  }
+  void young_list_rs_length_sampling_next() {
+    _young_list->rs_length_sampling_next();
+  }
+  size_t young_list_sampled_rs_lengths() {
+    return _young_list->sampled_rs_lengths();
+  }
+
+  size_t young_list_length()   { return _young_list->length(); }
+  size_t young_list_scan_only_length() {
+                                      return _young_list->scan_only_length(); }
+
+  HeapRegion* pop_region_from_young_list() {
+    return _young_list->pop_region();
+  }
+
+  HeapRegion* young_list_first_region() {
+    return _young_list->first_region();
+  }
+
+  // debugging
+  bool check_young_list_well_formed() {
+    return _young_list->check_list_well_formed();
+  }
+  bool check_young_list_empty(bool ignore_scan_only_list,
+                              bool check_sample = true);
+
+  // *** Stuff related to concurrent marking.  It's not clear to me that so
+  // many of these need to be public.
+
+  // The functions below are helper functions that a subclass of
+  // "CollectedHeap" can use in the implementation of its virtual
+  // functions.
+  // This performs a concurrent marking of the live objects in a
+  // bitmap off to the side.
+  void doConcurrentMark();
+
+  // This is called from the marksweep collector which then does
+  // a concurrent mark and verifies that the results agree with
+  // the stop the world marking.
+  void checkConcurrentMark();
+  void do_sync_mark();
+
+  bool isMarkedPrev(oop obj) const;
+  bool isMarkedNext(oop obj) const;
+
+  // Determine if an object is dead, given the object and also
+  // the region to which the object belongs. An object is dead
+  // iff a) it was not allocated since the last mark and b) it
+  // is not marked.
+
+  bool is_obj_dead(const oop obj, const HeapRegion* hr) const {
+    return
+      !hr->obj_allocated_since_prev_marking(obj) &&
+      !isMarkedPrev(obj);
+  }
+
+  // This is used when copying an object to survivor space.
+  // If the object is marked live, then we mark the copy live.
+  // If the object is allocated since the start of this mark
+  // cycle, then we mark the copy live.
+  // If the object has been around since the previous mark
+  // phase, and hasn't been marked yet during this phase,
+  // then we don't mark it, we just wait for the
+  // current marking cycle to get to it.
+
+  // This function returns true when an object has been
+  // around since the previous marking and hasn't yet
+  // been marked during this marking.
+
+  bool is_obj_ill(const oop obj, const HeapRegion* hr) const {
+    return
+      !hr->obj_allocated_since_next_marking(obj) &&
+      !isMarkedNext(obj);
+  }
+
+  // Determine if an object is dead, given only the object itself.
+  // This will find the region to which the object belongs and
+  // then call the region version of the same function.
+
+  // Added if it is in permanent gen it isn't dead.
+  // Added if it is NULL it isn't dead.
+
+  bool is_obj_dead(oop obj) {
+    HeapRegion* hr = heap_region_containing(obj);
+    if (hr == NULL) {
+      if (Universe::heap()->is_in_permanent(obj))
+        return false;
+      else if (obj == NULL) return false;
+      else return true;
+    }
+    else return is_obj_dead(obj, hr);
+  }
+
+  bool is_obj_ill(oop obj) {
+    HeapRegion* hr = heap_region_containing(obj);
+    if (hr == NULL) {
+      if (Universe::heap()->is_in_permanent(obj))
+        return false;
+      else if (obj == NULL) return false;
+      else return true;
+    }
+    else return is_obj_ill(obj, hr);
+  }
+
+  // The following is just to alert the verification code
+  // that a full collection has occurred and that the
+  // remembered sets are no longer up to date.
+  bool _full_collection;
+  void set_full_collection() { _full_collection = true;}
+  void clear_full_collection() {_full_collection = false;}
+  bool full_collection() {return _full_collection;}
+
+  ConcurrentMark* concurrent_mark() const { return _cm; }
+  ConcurrentG1Refine* concurrent_g1_refine() const { return _cg1r; }
+
+public:
+  void stop_conc_gc_threads();
+
+  // <NEW PREDICTION>
+
+  double predict_region_elapsed_time_ms(HeapRegion* hr, bool young);
+  void check_if_region_is_too_expensive(double predicted_time_ms);
+  size_t pending_card_num();
+  size_t max_pending_card_num();
+  size_t cards_scanned();
+
+  // </NEW PREDICTION>
+
+protected:
+  size_t _max_heap_capacity;
+
+//  debug_only(static void check_for_valid_allocation_state();)
+
+public:
+  // Temporary: call to mark things unimplemented for the G1 heap (e.g.,
+  // MemoryService).  In productization, we can make this assert false
+  // to catch such places (as well as searching for calls to this...)
+  static void g1_unimplemented();
+
+};
+
+// Local Variables: ***
+// c-indentation-style: gnu ***
+// End: ***
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Inline functions for G1CollectedHeap
+
+inline HeapRegion*
+G1CollectedHeap::heap_region_containing(const void* addr) const {
+  HeapRegion* hr = _hrs->addr_to_region(addr);
+  // hr can be null if addr in perm_gen
+  if (hr != NULL && hr->continuesHumongous()) {
+    hr = hr->humongous_start_region();
+  }
+  return hr;
+}
+
+inline HeapRegion*
+G1CollectedHeap::heap_region_containing_raw(const void* addr) const {
+  HeapRegion* res = _hrs->addr_to_region(addr);
+  assert(res != NULL, "addr outside of heap?");
+  return res;
+}
+
+inline bool G1CollectedHeap::obj_in_cs(oop obj) {
+  HeapRegion* r = _hrs->addr_to_region(obj);
+  return r != NULL && r->in_collection_set();
+}
+
+inline HeapWord* G1CollectedHeap::attempt_allocation(size_t word_size,
+                                              bool permit_collection_pause) {
+  HeapWord* res = NULL;
+
+  assert( SafepointSynchronize::is_at_safepoint() ||
+          Heap_lock->owned_by_self(), "pre-condition of the call" );
+
+  if (_cur_alloc_region != NULL) {
+
+    // If this allocation causes a region to become non empty,
+    // then we need to update our free_regions count.
+
+    if (_cur_alloc_region->is_empty()) {
+      res = _cur_alloc_region->allocate(word_size);
+      if (res != NULL)
+        _free_regions--;
+    } else {
+      res = _cur_alloc_region->allocate(word_size);
+    }
+  }
+  if (res != NULL) {
+    if (!SafepointSynchronize::is_at_safepoint()) {
+      assert( Heap_lock->owned_by_self(), "invariant" );
+      Heap_lock->unlock();
+    }
+    return res;
+  }
+  // attempt_allocation_slow will also unlock the heap lock when appropriate.
+  return attempt_allocation_slow(word_size, permit_collection_pause);
+}
+
+inline RefToScanQueue* G1CollectedHeap::task_queue(int i) {
+  return _task_queues->queue(i);
+}
+
+
+inline  bool G1CollectedHeap::isMarkedPrev(oop obj) const {
+  return _cm->prevMarkBitMap()->isMarked((HeapWord *)obj);
+}
+
+inline bool G1CollectedHeap::isMarkedNext(oop obj) const {
+  return _cm->nextMarkBitMap()->isMarked((HeapWord *)obj);
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,3159 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1CollectorPolicy.cpp.incl"
+
+#define PREDICTIONS_VERBOSE 0
+
+// <NEW PREDICTION>
+
+// Different defaults for different number of GC threads
+// They were chosen by running GCOld and SPECjbb on debris with different
+//   numbers of GC threads and choosing them based on the results
+
+// all the same
+static double rs_length_diff_defaults[] = {
+  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
+};
+
+static double cost_per_card_ms_defaults[] = {
+  0.01, 0.005, 0.005, 0.003, 0.003, 0.002, 0.002, 0.0015
+};
+
+static double cost_per_scan_only_region_ms_defaults[] = {
+  1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
+};
+
+// all the same
+static double fully_young_cards_per_entry_ratio_defaults[] = {
+  1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
+};
+
+static double cost_per_entry_ms_defaults[] = {
+  0.015, 0.01, 0.01, 0.008, 0.008, 0.0055, 0.0055, 0.005
+};
+
+static double cost_per_byte_ms_defaults[] = {
+  0.00006, 0.00003, 0.00003, 0.000015, 0.000015, 0.00001, 0.00001, 0.000009
+};
+
+// these should be pretty consistent
+static double constant_other_time_ms_defaults[] = {
+  5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0
+};
+
+
+static double young_other_cost_per_region_ms_defaults[] = {
+  0.3, 0.2, 0.2, 0.15, 0.15, 0.12, 0.12, 0.1
+};
+
+static double non_young_other_cost_per_region_ms_defaults[] = {
+  1.0, 0.7, 0.7, 0.5, 0.5, 0.42, 0.42, 0.30
+};
+
+// </NEW PREDICTION>
+
+G1CollectorPolicy::G1CollectorPolicy() :
+  _parallel_gc_threads((ParallelGCThreads > 0) ? ParallelGCThreads : 1),
+  _n_pauses(0),
+  _recent_CH_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _recent_G1_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _recent_evac_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _recent_pause_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _recent_rs_sizes(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _recent_gc_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _all_pause_times_ms(new NumberSeq()),
+  _stop_world_start(0.0),
+  _all_stop_world_times_ms(new NumberSeq()),
+  _all_yield_times_ms(new NumberSeq()),
+
+  _all_mod_union_times_ms(new NumberSeq()),
+
+  _non_pop_summary(new NonPopSummary()),
+  _pop_summary(new PopSummary()),
+  _non_pop_abandoned_summary(new NonPopAbandonedSummary()),
+  _pop_abandoned_summary(new PopAbandonedSummary()),
+
+  _cur_clear_ct_time_ms(0.0),
+
+  _region_num_young(0),
+  _region_num_tenured(0),
+  _prev_region_num_young(0),
+  _prev_region_num_tenured(0),
+
+  _aux_num(10),
+  _all_aux_times_ms(new NumberSeq[_aux_num]),
+  _cur_aux_start_times_ms(new double[_aux_num]),
+  _cur_aux_times_ms(new double[_aux_num]),
+  _cur_aux_times_set(new bool[_aux_num]),
+
+  _pop_compute_rc_start(0.0),
+  _pop_evac_start(0.0),
+
+  _concurrent_mark_init_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _concurrent_mark_remark_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _concurrent_mark_cleanup_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+
+  // <NEW PREDICTION>
+
+  _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _prev_collection_pause_end_ms(0.0),
+  _pending_card_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _cost_per_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _cost_per_scan_only_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _fully_young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _partially_young_cards_per_entry_ratio_seq(
+                                         new TruncatedSeq(TruncatedSeqLength)),
+  _cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _partially_young_cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _cost_per_byte_ms_during_cm_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _cost_per_scan_only_region_ms_during_cm_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _constant_other_time_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _non_young_other_cost_per_region_ms_seq(
+                                         new TruncatedSeq(TruncatedSeqLength)),
+
+  _pending_cards_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _scanned_cards_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _rs_lengths_seq(new TruncatedSeq(TruncatedSeqLength)),
+
+  _pause_time_target_ms((double) G1MaxPauseTimeMS),
+
+  // </NEW PREDICTION>
+
+  _in_young_gc_mode(false),
+  _full_young_gcs(true),
+  _full_young_pause_num(0),
+  _partial_young_pause_num(0),
+
+  _during_marking(false),
+  _in_marking_window(false),
+  _in_marking_window_im(false),
+
+  _known_garbage_ratio(0.0),
+  _known_garbage_bytes(0),
+
+  _young_gc_eff_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _target_pause_time_ms(-1.0),
+
+   _recent_prev_end_times_for_all_gcs_sec(new TruncatedSeq(NumPrevPausesForHeuristics)),
+
+  _recent_CS_bytes_used_before(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _recent_CS_bytes_surviving(new TruncatedSeq(NumPrevPausesForHeuristics)),
+
+  _recent_avg_pause_time_ratio(0.0),
+  _num_markings(0),
+  _n_marks(0),
+  _n_pauses_at_mark_end(0),
+
+  _all_full_gc_times_ms(new NumberSeq()),
+
+  _conc_refine_enabled(0),
+  _conc_refine_zero_traversals(0),
+  _conc_refine_max_traversals(0),
+  _conc_refine_current_delta(G1ConcRefineInitialDelta),
+
+  // G1PausesBtwnConcMark defaults to -1
+  // so the hack is to do the cast  QQQ FIXME
+  _pauses_btwn_concurrent_mark((size_t)G1PausesBtwnConcMark),
+  _n_marks_since_last_pause(0),
+  _conc_mark_initiated(false),
+  _should_initiate_conc_mark(false),
+  _should_revert_to_full_young_gcs(false),
+  _last_full_young_gc(false),
+
+  _prev_collection_pause_used_at_end_bytes(0),
+
+  _collection_set(NULL),
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+  _short_lived_surv_rate_group(new SurvRateGroup(this, "Short Lived",
+                                                 G1YoungSurvRateNumRegionsSummary)),
+  _survivor_surv_rate_group(new SurvRateGroup(this, "Survivor",
+                                              G1YoungSurvRateNumRegionsSummary))
+  // add here any more surv rate groups
+{
+  _recent_prev_end_times_for_all_gcs_sec->add(os::elapsedTime());
+  _prev_collection_pause_end_ms = os::elapsedTime() * 1000.0;
+
+  _par_last_ext_root_scan_times_ms = new double[_parallel_gc_threads];
+  _par_last_mark_stack_scan_times_ms = new double[_parallel_gc_threads];
+  _par_last_scan_only_times_ms = new double[_parallel_gc_threads];
+  _par_last_scan_only_regions_scanned = new double[_parallel_gc_threads];
+
+  _par_last_update_rs_start_times_ms = new double[_parallel_gc_threads];
+  _par_last_update_rs_times_ms = new double[_parallel_gc_threads];
+  _par_last_update_rs_processed_buffers = new double[_parallel_gc_threads];
+
+  _par_last_scan_rs_start_times_ms = new double[_parallel_gc_threads];
+  _par_last_scan_rs_times_ms = new double[_parallel_gc_threads];
+  _par_last_scan_new_refs_times_ms = new double[_parallel_gc_threads];
+
+  _par_last_obj_copy_times_ms = new double[_parallel_gc_threads];
+
+  _par_last_termination_times_ms = new double[_parallel_gc_threads];
+
+  // we store the data from the first pass during popularity pauses
+  _pop_par_last_update_rs_start_times_ms = new double[_parallel_gc_threads];
+  _pop_par_last_update_rs_times_ms = new double[_parallel_gc_threads];
+  _pop_par_last_update_rs_processed_buffers = new double[_parallel_gc_threads];
+
+  _pop_par_last_scan_rs_start_times_ms = new double[_parallel_gc_threads];
+  _pop_par_last_scan_rs_times_ms = new double[_parallel_gc_threads];
+
+  _pop_par_last_closure_app_times_ms = new double[_parallel_gc_threads];
+
+  // start conservatively
+  _expensive_region_limit_ms = 0.5 * (double) G1MaxPauseTimeMS;
+
+  // <NEW PREDICTION>
+
+  int index;
+  if (ParallelGCThreads == 0)
+    index = 0;
+  else if (ParallelGCThreads > 8)
+    index = 7;
+  else
+    index = ParallelGCThreads - 1;
+
+  _pending_card_diff_seq->add(0.0);
+  _rs_length_diff_seq->add(rs_length_diff_defaults[index]);
+  _cost_per_card_ms_seq->add(cost_per_card_ms_defaults[index]);
+  _cost_per_scan_only_region_ms_seq->add(
+                                 cost_per_scan_only_region_ms_defaults[index]);
+  _fully_young_cards_per_entry_ratio_seq->add(
+                            fully_young_cards_per_entry_ratio_defaults[index]);
+  _cost_per_entry_ms_seq->add(cost_per_entry_ms_defaults[index]);
+  _cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]);
+  _constant_other_time_ms_seq->add(constant_other_time_ms_defaults[index]);
+  _young_other_cost_per_region_ms_seq->add(
+                               young_other_cost_per_region_ms_defaults[index]);
+  _non_young_other_cost_per_region_ms_seq->add(
+                           non_young_other_cost_per_region_ms_defaults[index]);
+
+  // </NEW PREDICTION>
+
+  double time_slice  = (double) G1TimeSliceMS / 1000.0;
+  double max_gc_time = (double) G1MaxPauseTimeMS / 1000.0;
+  guarantee(max_gc_time < time_slice,
+            "Max GC time should not be greater than the time slice");
+  _mmu_tracker = new G1MMUTrackerQueue(time_slice, max_gc_time);
+  _sigma = (double) G1ConfidencePerc / 100.0;
+
+  // start conservatively (around 50ms is about right)
+  _concurrent_mark_init_times_ms->add(0.05);
+  _concurrent_mark_remark_times_ms->add(0.05);
+  _concurrent_mark_cleanup_times_ms->add(0.20);
+  _tenuring_threshold = MaxTenuringThreshold;
+
+  initialize_all();
+}
+
+// Increment "i", mod "len"
+static void inc_mod(int& i, int len) {
+  i++; if (i == len) i = 0;
+}
+
+void G1CollectorPolicy::initialize_flags() {
+  set_min_alignment(HeapRegion::GrainBytes);
+  set_max_alignment(GenRemSet::max_alignment_constraint(rem_set_name()));
+  CollectorPolicy::initialize_flags();
+}
+
+void G1CollectorPolicy::init() {
+  // Set aside an initial future to_space.
+  _g1 = G1CollectedHeap::heap();
+  size_t regions = Universe::heap()->capacity() / HeapRegion::GrainBytes;
+
+  assert(Heap_lock->owned_by_self(), "Locking discipline.");
+
+  if (G1SteadyStateUsed < 50) {
+    vm_exit_during_initialization("G1SteadyStateUsed must be at least 50%.");
+  }
+  if (UseConcMarkSweepGC) {
+    vm_exit_during_initialization("-XX:+UseG1GC is incompatible with "
+                                  "-XX:+UseConcMarkSweepGC.");
+  }
+
+  if (G1Gen) {
+    _in_young_gc_mode = true;
+
+    if (G1YoungGenSize == 0) {
+      set_adaptive_young_list_length(true);
+      _young_list_fixed_length = 0;
+    } else {
+      set_adaptive_young_list_length(false);
+      _young_list_fixed_length = (G1YoungGenSize / HeapRegion::GrainBytes);
+    }
+     _free_regions_at_end_of_collection = _g1->free_regions();
+     _scan_only_regions_at_end_of_collection = 0;
+     calculate_young_list_min_length();
+     guarantee( _young_list_min_length == 0, "invariant, not enough info" );
+     calculate_young_list_target_config();
+   } else {
+     _young_list_fixed_length = 0;
+    _in_young_gc_mode = false;
+  }
+}
+
+void G1CollectorPolicy::calculate_young_list_min_length() {
+  _young_list_min_length = 0;
+
+  if (!adaptive_young_list_length())
+    return;
+
+  if (_alloc_rate_ms_seq->num() > 3) {
+    double now_sec = os::elapsedTime();
+    double when_ms = _mmu_tracker->when_max_gc_sec(now_sec) * 1000.0;
+    double alloc_rate_ms = predict_alloc_rate_ms();
+    int min_regions = (int) ceil(alloc_rate_ms * when_ms);
+    int current_region_num = (int) _g1->young_list_length();
+    _young_list_min_length = min_regions + current_region_num;
+  }
+}
+
+void G1CollectorPolicy::calculate_young_list_target_config() {
+  if (adaptive_young_list_length()) {
+    size_t rs_lengths = (size_t) get_new_prediction(_rs_lengths_seq);
+    calculate_young_list_target_config(rs_lengths);
+  } else {
+    if (full_young_gcs())
+      _young_list_target_length = _young_list_fixed_length;
+    else
+      _young_list_target_length = _young_list_fixed_length / 2;
+    _young_list_target_length = MAX2(_young_list_target_length, (size_t)1);
+    size_t so_length = calculate_optimal_so_length(_young_list_target_length);
+    guarantee( so_length < _young_list_target_length, "invariant" );
+    _young_list_so_prefix_length = so_length;
+  }
+}
+
+// This method calculate the optimal scan-only set for a fixed young
+// gen size. I couldn't work out how to reuse the more elaborate one,
+// i.e. calculate_young_list_target_config(rs_length), as the loops are
+// fundamentally different (the other one finds a config for different
+// S-O lengths, whereas here we need to do the opposite).
+size_t G1CollectorPolicy::calculate_optimal_so_length(
+                                                    size_t young_list_length) {
+  if (!G1UseScanOnlyPrefix)
+    return 0;
+
+  if (_all_pause_times_ms->num() < 3) {
+    // we won't use a scan-only set at the beginning to allow the rest
+    // of the predictors to warm up
+    return 0;
+  }
+
+  if (_cost_per_scan_only_region_ms_seq->num() < 3) {
+    // then, we'll only set the S-O set to 1 for a little bit of time,
+    // to get enough information on the scanning cost
+    return 1;
+  }
+
+  size_t pending_cards = (size_t) get_new_prediction(_pending_cards_seq);
+  size_t rs_lengths = (size_t) get_new_prediction(_rs_lengths_seq);
+  size_t adj_rs_lengths = rs_lengths + predict_rs_length_diff();
+  size_t scanned_cards;
+  if (full_young_gcs())
+    scanned_cards = predict_young_card_num(adj_rs_lengths);
+  else
+    scanned_cards = predict_non_young_card_num(adj_rs_lengths);
+  double base_time_ms = predict_base_elapsed_time_ms(pending_cards,
+                                                     scanned_cards);
+
+  size_t so_length = 0;
+  double max_gc_eff = 0.0;
+  for (size_t i = 0; i < young_list_length; ++i) {
+    double gc_eff = 0.0;
+    double pause_time_ms = 0.0;
+    predict_gc_eff(young_list_length, i, base_time_ms,
+                   &gc_eff, &pause_time_ms);
+    if (gc_eff > max_gc_eff) {
+      max_gc_eff = gc_eff;
+      so_length = i;
+    }
+  }
+
+  // set it to 95% of the optimal to make sure we sample the "area"
+  // around the optimal length to get up-to-date survival rate data
+  return so_length * 950 / 1000;
+}
+
+// This is a really cool piece of code! It finds the best
+// target configuration (young length / scan-only prefix length) so
+// that GC efficiency is maximized and that we also meet a pause
+// time. It's a triple nested loop. These loops are explained below
+// from the inside-out :-)
+//
+// (a) The innermost loop will try to find the optimal young length
+// for a fixed S-O length. It uses a binary search to speed up the
+// process. We assume that, for a fixed S-O length, as we add more
+// young regions to the CSet, the GC efficiency will only go up (I'll
+// skip the proof). So, using a binary search to optimize this process
+// makes perfect sense.
+//
+// (b) The middle loop will fix the S-O length before calling the
+// innermost one. It will vary it between two parameters, increasing
+// it by a given increment.
+//
+// (c) The outermost loop will call the middle loop three times.
+//   (1) The first time it will explore all possible S-O length values
+//   from 0 to as large as it can get, using a coarse increment (to
+//   quickly "home in" to where the optimal seems to be).
+//   (2) The second time it will explore the values around the optimal
+//   that was found by the first iteration using a fine increment.
+//   (3) Once the optimal config has been determined by the second
+//   iteration, we'll redo the calculation, but setting the S-O length
+//   to 95% of the optimal to make sure we sample the "area"
+//   around the optimal length to get up-to-date survival rate data
+//
+// Termination conditions for the iterations are several: the pause
+// time is over the limit, we do not have enough to-space, etc.
+
+void G1CollectorPolicy::calculate_young_list_target_config(size_t rs_lengths) {
+  guarantee( adaptive_young_list_length(), "pre-condition" );
+
+  double start_time_sec = os::elapsedTime();
+  size_t min_reserve_perc = MAX2((size_t)2, (size_t)G1MinReservePerc);
+  min_reserve_perc = MIN2((size_t) 50, min_reserve_perc);
+  size_t reserve_regions =
+    (size_t) ((double) min_reserve_perc * (double) _g1->n_regions() / 100.0);
+
+  if (full_young_gcs() && _free_regions_at_end_of_collection > 0) {
+    // we are in fully-young mode and there are free regions in the heap
+
+    size_t min_so_length = 0;
+    size_t max_so_length = 0;
+
+    if (G1UseScanOnlyPrefix) {
+      if (_all_pause_times_ms->num() < 3) {
+        // we won't use a scan-only set at the beginning to allow the rest
+        // of the predictors to warm up
+        min_so_length = 0;
+        max_so_length = 0;
+      } else if (_cost_per_scan_only_region_ms_seq->num() < 3) {
+        // then, we'll only set the S-O set to 1 for a little bit of time,
+        // to get enough information on the scanning cost
+        min_so_length = 1;
+        max_so_length = 1;
+      } else if (_in_marking_window || _last_full_young_gc) {
+        // no S-O prefix during a marking phase either, as at the end
+        // of the marking phase we'll have to use a very small young
+        // length target to fill up the rest of the CSet with
+        // non-young regions and, if we have lots of scan-only regions
+        // left-over, we will not be able to add any more non-young
+        // regions.
+        min_so_length = 0;
+        max_so_length = 0;
+      } else {
+        // this is the common case; we'll never reach the maximum, we
+        // one of the end conditions will fire well before that
+        // (hopefully!)
+        min_so_length = 0;
+        max_so_length = _free_regions_at_end_of_collection - 1;
+      }
+    } else {
+      // no S-O prefix, as the switch is not set, but we still need to
+      // do one iteration to calculate the best young target that
+      // meets the pause time; this way we reuse the same code instead
+      // of replicating it
+      min_so_length = 0;
+      max_so_length = 0;
+    }
+
+    double target_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0;
+    size_t pending_cards = (size_t) get_new_prediction(_pending_cards_seq);
+    size_t adj_rs_lengths = rs_lengths + predict_rs_length_diff();
+    size_t scanned_cards;
+    if (full_young_gcs())
+      scanned_cards = predict_young_card_num(adj_rs_lengths);
+    else
+      scanned_cards = predict_non_young_card_num(adj_rs_lengths);
+    // calculate this once, so that we don't have to recalculate it in
+    // the innermost loop
+    double base_time_ms = predict_base_elapsed_time_ms(pending_cards,
+                                                       scanned_cards);
+
+    // the result
+    size_t final_young_length = 0;
+    size_t final_so_length = 0;
+    double final_gc_eff = 0.0;
+    // we'll also keep track of how many times we go into the inner loop
+    // this is for profiling reasons
+    size_t calculations = 0;
+
+    // this determines which of the three iterations the outer loop is in
+    typedef enum {
+      pass_type_coarse,
+      pass_type_fine,
+      pass_type_final
+    } pass_type_t;
+
+    // range of the outer loop's iteration
+    size_t from_so_length   = min_so_length;
+    size_t to_so_length     = max_so_length;
+    guarantee( from_so_length <= to_so_length, "invariant" );
+
+    // this will keep the S-O length that's found by the second
+    // iteration of the outer loop; we'll keep it just in case the third
+    // iteration fails to find something
+    size_t fine_so_length   = 0;
+
+    // the increment step for the coarse (first) iteration
+    size_t so_coarse_increments = 5;
+
+    // the common case, we'll start with the coarse iteration
+    pass_type_t pass = pass_type_coarse;
+    size_t so_length_incr = so_coarse_increments;
+
+    if (from_so_length == to_so_length) {
+      // not point in doing the coarse iteration, we'll go directly into
+      // the fine one (we essentially trying to find the optimal young
+      // length for a fixed S-O length).
+      so_length_incr = 1;
+      pass = pass_type_final;
+    } else if (to_so_length - from_so_length < 3 * so_coarse_increments) {
+      // again, the range is too short so no point in foind the coarse
+      // iteration either
+      so_length_incr = 1;
+      pass = pass_type_fine;
+    }
+
+    bool done = false;
+    // this is the outermost loop
+    while (!done) {
+#if 0
+      // leave this in for debugging, just in case
+      gclog_or_tty->print_cr("searching between " SIZE_FORMAT " and " SIZE_FORMAT
+                             ", incr " SIZE_FORMAT ", pass %s",
+                             from_so_length, to_so_length, so_length_incr,
+                             (pass == pass_type_coarse) ? "coarse" :
+                             (pass == pass_type_fine) ? "fine" : "final");
+#endif // 0
+
+      size_t so_length = from_so_length;
+      size_t init_free_regions =
+        MAX2((size_t)0,
+             _free_regions_at_end_of_collection +
+             _scan_only_regions_at_end_of_collection - reserve_regions);
+
+      // this determines whether a configuration was found
+      bool gc_eff_set = false;
+      // this is the middle loop
+      while (so_length <= to_so_length) {
+        // base time, which excludes region-related time; again we
+        // calculate it once to avoid recalculating it in the
+        // innermost loop
+        double base_time_with_so_ms =
+                           base_time_ms + predict_scan_only_time_ms(so_length);
+        // it's already over the pause target, go around
+        if (base_time_with_so_ms > target_pause_time_ms)
+          break;
+
+        size_t starting_young_length = so_length+1;
+
+        // we make sure that the short young length that makes sense
+        // (one more than the S-O length) is feasible
+        size_t min_young_length = starting_young_length;
+        double min_gc_eff;
+        bool min_ok;
+        ++calculations;
+        min_ok = predict_gc_eff(min_young_length, so_length,
+                                base_time_with_so_ms,
+                                init_free_regions, target_pause_time_ms,
+                                &min_gc_eff);
+
+        if (min_ok) {
+          // the shortest young length is indeed feasible; we'll know
+          // set up the max young length and we'll do a binary search
+          // between min_young_length and max_young_length
+          size_t max_young_length = _free_regions_at_end_of_collection - 1;
+          double max_gc_eff = 0.0;
+          bool max_ok = false;
+
+          // the innermost loop! (finally!)
+          while (max_young_length > min_young_length) {
+            // we'll make sure that min_young_length is always at a
+            // feasible config
+            guarantee( min_ok, "invariant" );
+
+            ++calculations;
+            max_ok = predict_gc_eff(max_young_length, so_length,
+                                    base_time_with_so_ms,
+                                    init_free_regions, target_pause_time_ms,
+                                    &max_gc_eff);
+
+            size_t diff = (max_young_length - min_young_length) / 2;
+            if (max_ok) {
+              min_young_length = max_young_length;
+              min_gc_eff = max_gc_eff;
+              min_ok = true;
+            }
+            max_young_length = min_young_length + diff;
+          }
+
+          // the innermost loop found a config
+          guarantee( min_ok, "invariant" );
+          if (min_gc_eff > final_gc_eff) {
+            // it's the best config so far, so we'll keep it
+            final_gc_eff = min_gc_eff;
+            final_young_length = min_young_length;
+            final_so_length = so_length;
+            gc_eff_set = true;
+          }
+        }
+
+        // incremental the fixed S-O length and go around
+        so_length += so_length_incr;
+      }
+
+      // this is the end of the outermost loop and we need to decide
+      // what to do during the next iteration
+      if (pass == pass_type_coarse) {
+        // we just did the coarse pass (first iteration)
+
+        if (!gc_eff_set)
+          // we didn't find a feasible config so we'll just bail out; of
+          // course, it might be the case that we missed it; but I'd say
+          // it's a bit unlikely
+          done = true;
+        else {
+          // We did find a feasible config with optimal GC eff during
+          // the first pass. So the second pass we'll only consider the
+          // S-O lengths around that config with a fine increment.
+
+          guarantee( so_length_incr == so_coarse_increments, "invariant" );
+          guarantee( final_so_length >= min_so_length, "invariant" );
+
+#if 0
+          // leave this in for debugging, just in case
+          gclog_or_tty->print_cr("  coarse pass: SO length " SIZE_FORMAT,
+                                 final_so_length);
+#endif // 0
+
+          from_so_length =
+            (final_so_length - min_so_length > so_coarse_increments) ?
+            final_so_length - so_coarse_increments + 1 : min_so_length;
+          to_so_length =
+            (max_so_length - final_so_length > so_coarse_increments) ?
+            final_so_length + so_coarse_increments - 1 : max_so_length;
+
+          pass = pass_type_fine;
+          so_length_incr = 1;
+        }
+      } else if (pass == pass_type_fine) {
+        // we just finished the second pass
+
+        if (!gc_eff_set) {
+          // we didn't find a feasible config (yes, it's possible;
+          // notice that, sometimes, we go directly into the fine
+          // iteration and skip the coarse one) so we bail out
+          done = true;
+        } else {
+          // We did find a feasible config with optimal GC eff
+          guarantee( so_length_incr == 1, "invariant" );
+
+          if (final_so_length == 0) {
+            // The config is of an empty S-O set, so we'll just bail out
+            done = true;
+          } else {
+            // we'll go around once more, setting the S-O length to 95%
+            // of the optimal
+            size_t new_so_length = 950 * final_so_length / 1000;
+
+#if 0
+            // leave this in for debugging, just in case
+            gclog_or_tty->print_cr("  fine pass: SO length " SIZE_FORMAT
+                                   ", setting it to " SIZE_FORMAT,
+                                    final_so_length, new_so_length);
+#endif // 0
+
+            from_so_length = new_so_length;
+            to_so_length = new_so_length;
+            fine_so_length = final_so_length;
+
+            pass = pass_type_final;
+          }
+        }
+      } else if (pass == pass_type_final) {
+        // we just finished the final (third) pass
+
+        if (!gc_eff_set)
+          // we didn't find a feasible config, so we'll just use the one
+          // we found during the second pass, which we saved
+          final_so_length = fine_so_length;
+
+        // and we're done!
+        done = true;
+      } else {
+        guarantee( false, "should never reach here" );
+      }
+
+      // we now go around the outermost loop
+    }
+
+    // we should have at least one region in the target young length
+    _young_list_target_length = MAX2((size_t) 1, final_young_length);
+    if (final_so_length >= final_young_length)
+      // and we need to ensure that the S-O length is not greater than
+      // the target young length (this is being a bit careful)
+      final_so_length = 0;
+    _young_list_so_prefix_length = final_so_length;
+    guarantee( !_in_marking_window || !_last_full_young_gc ||
+               _young_list_so_prefix_length == 0, "invariant" );
+
+    // let's keep an eye of how long we spend on this calculation
+    // right now, I assume that we'll print it when we need it; we
+    // should really adde it to the breakdown of a pause
+    double end_time_sec = os::elapsedTime();
+    double elapsed_time_ms = (end_time_sec - start_time_sec) * 1000.0;
+
+#if 0
+    // leave this in for debugging, just in case
+    gclog_or_tty->print_cr("target = %1.1lf ms, young = " SIZE_FORMAT
+                           ", SO = " SIZE_FORMAT ", "
+                           "elapsed %1.2lf ms, calcs: " SIZE_FORMAT " (%s%s) "
+                           SIZE_FORMAT SIZE_FORMAT,
+                           target_pause_time_ms,
+                           _young_list_target_length - _young_list_so_prefix_length,
+                           _young_list_so_prefix_length,
+                           elapsed_time_ms,
+                           calculations,
+                           full_young_gcs() ? "full" : "partial",
+                           should_initiate_conc_mark() ? " i-m" : "",
+                           in_marking_window(),
+                           in_marking_window_im());
+#endif // 0
+
+    if (_young_list_target_length < _young_list_min_length) {
+      // bummer; this means that, if we do a pause when the optimal
+      // config dictates, we'll violate the pause spacing target (the
+      // min length was calculate based on the application's current
+      // alloc rate);
+
+      // so, we have to bite the bullet, and allocate the minimum
+      // number. We'll violate our target, but we just can't meet it.
+
+      size_t so_length = 0;
+      // a note further up explains why we do not want an S-O length
+      // during marking
+      if (!_in_marking_window && !_last_full_young_gc)
+        // but we can still try to see whether we can find an optimal
+        // S-O length
+        so_length = calculate_optimal_so_length(_young_list_min_length);
+
+#if 0
+      // leave this in for debugging, just in case
+      gclog_or_tty->print_cr("adjusted target length from "
+                             SIZE_FORMAT " to " SIZE_FORMAT
+                             ", SO " SIZE_FORMAT,
+                             _young_list_target_length, _young_list_min_length,
+                             so_length);
+#endif // 0
+
+      _young_list_target_length =
+        MAX2(_young_list_min_length, (size_t)1);
+      _young_list_so_prefix_length = so_length;
+    }
+  } else {
+    // we are in a partially-young mode or we've run out of regions (due
+    // to evacuation failure)
+
+#if 0
+    // leave this in for debugging, just in case
+    gclog_or_tty->print_cr("(partial) setting target to " SIZE_FORMAT
+                           ", SO " SIZE_FORMAT,
+                           _young_list_min_length, 0);
+#endif // 0
+
+    // we'll do the pause as soon as possible and with no S-O prefix
+    // (see above for the reasons behind the latter)
+    _young_list_target_length =
+      MAX2(_young_list_min_length, (size_t) 1);
+    _young_list_so_prefix_length = 0;
+  }
+
+  _rs_lengths_prediction = rs_lengths;
+}
+
+// This is used by: calculate_optimal_so_length(length). It returns
+// the GC eff and predicted pause time for a particular config
+void
+G1CollectorPolicy::predict_gc_eff(size_t young_length,
+                                  size_t so_length,
+                                  double base_time_ms,
+                                  double* ret_gc_eff,
+                                  double* ret_pause_time_ms) {
+  double so_time_ms = predict_scan_only_time_ms(so_length);
+  double accum_surv_rate_adj = 0.0;
+  if (so_length > 0)
+    accum_surv_rate_adj = accum_yg_surv_rate_pred((int)(so_length - 1));
+  double accum_surv_rate =
+    accum_yg_surv_rate_pred((int)(young_length - 1)) - accum_surv_rate_adj;
+  size_t bytes_to_copy =
+    (size_t) (accum_surv_rate * (double) HeapRegion::GrainBytes);
+  double copy_time_ms = predict_object_copy_time_ms(bytes_to_copy);
+  double young_other_time_ms =
+                       predict_young_other_time_ms(young_length - so_length);
+  double pause_time_ms =
+                base_time_ms + so_time_ms + copy_time_ms + young_other_time_ms;
+  size_t reclaimed_bytes =
+    (young_length - so_length) * HeapRegion::GrainBytes - bytes_to_copy;
+  double gc_eff = (double) reclaimed_bytes / pause_time_ms;
+
+  *ret_gc_eff = gc_eff;
+  *ret_pause_time_ms = pause_time_ms;
+}
+
+// This is used by: calculate_young_list_target_config(rs_length). It
+// returns the GC eff of a particular config. It returns false if that
+// config violates any of the end conditions of the search in the
+// calling method, or true upon success. The end conditions were put
+// here since it's called twice and it was best not to replicate them
+// in the caller. Also, passing the parameteres avoids having to
+// recalculate them in the innermost loop.
+bool
+G1CollectorPolicy::predict_gc_eff(size_t young_length,
+                                  size_t so_length,
+                                  double base_time_with_so_ms,
+                                  size_t init_free_regions,
+                                  double target_pause_time_ms,
+                                  double* ret_gc_eff) {
+  *ret_gc_eff = 0.0;
+
+  if (young_length >= init_free_regions)
+    // end condition 1: not enough space for the young regions
+    return false;
+
+  double accum_surv_rate_adj = 0.0;
+  if (so_length > 0)
+    accum_surv_rate_adj = accum_yg_surv_rate_pred((int)(so_length - 1));
+  double accum_surv_rate =
+    accum_yg_surv_rate_pred((int)(young_length - 1)) - accum_surv_rate_adj;
+  size_t bytes_to_copy =
+    (size_t) (accum_surv_rate * (double) HeapRegion::GrainBytes);
+  double copy_time_ms = predict_object_copy_time_ms(bytes_to_copy);
+  double young_other_time_ms =
+                       predict_young_other_time_ms(young_length - so_length);
+  double pause_time_ms =
+                   base_time_with_so_ms + copy_time_ms + young_other_time_ms;
+
+  if (pause_time_ms > target_pause_time_ms)
+    // end condition 2: over the target pause time
+    return false;
+
+  size_t reclaimed_bytes =
+    (young_length - so_length) * HeapRegion::GrainBytes - bytes_to_copy;
+  size_t free_bytes =
+                 (init_free_regions - young_length) * HeapRegion::GrainBytes;
+
+  if ((2.0 + sigma()) * (double) bytes_to_copy > (double) free_bytes)
+    // end condition 3: out of to-space (conservatively)
+    return false;
+
+  // success!
+  double gc_eff = (double) reclaimed_bytes / pause_time_ms;
+  *ret_gc_eff = gc_eff;
+
+  return true;
+}
+
+void G1CollectorPolicy::check_prediction_validity() {
+  guarantee( adaptive_young_list_length(), "should not call this otherwise" );
+
+  size_t rs_lengths = _g1->young_list_sampled_rs_lengths();
+  if (rs_lengths > _rs_lengths_prediction) {
+    // add 10% to avoid having to recalculate often
+    size_t rs_lengths_prediction = rs_lengths * 1100 / 1000;
+    calculate_young_list_target_config(rs_lengths_prediction);
+  }
+}
+
+HeapWord* G1CollectorPolicy::mem_allocate_work(size_t size,
+                                               bool is_tlab,
+                                               bool* gc_overhead_limit_was_exceeded) {
+  guarantee(false, "Not using this policy feature yet.");
+  return NULL;
+}
+
+// This method controls how a collector handles one or more
+// of its generations being fully allocated.
+HeapWord* G1CollectorPolicy::satisfy_failed_allocation(size_t size,
+                                                       bool is_tlab) {
+  guarantee(false, "Not using this policy feature yet.");
+  return NULL;
+}
+
+
+#ifndef PRODUCT
+bool G1CollectorPolicy::verify_young_ages() {
+  HeapRegion* head = _g1->young_list_first_region();
+  return
+    verify_young_ages(head, _short_lived_surv_rate_group);
+  // also call verify_young_ages on any additional surv rate groups
+}
+
+bool
+G1CollectorPolicy::verify_young_ages(HeapRegion* head,
+                                     SurvRateGroup *surv_rate_group) {
+  guarantee( surv_rate_group != NULL, "pre-condition" );
+
+  const char* name = surv_rate_group->name();
+  bool ret = true;
+  int prev_age = -1;
+
+  for (HeapRegion* curr = head;
+       curr != NULL;
+       curr = curr->get_next_young_region()) {
+    SurvRateGroup* group = curr->surv_rate_group();
+    if (group == NULL && !curr->is_survivor()) {
+      gclog_or_tty->print_cr("## %s: encountered NULL surv_rate_group", name);
+      ret = false;
+    }
+
+    if (surv_rate_group == group) {
+      int age = curr->age_in_surv_rate_group();
+
+      if (age < 0) {
+        gclog_or_tty->print_cr("## %s: encountered negative age", name);
+        ret = false;
+      }
+
+      if (age <= prev_age) {
+        gclog_or_tty->print_cr("## %s: region ages are not strictly increasing "
+                               "(%d, %d)", name, age, prev_age);
+        ret = false;
+      }
+      prev_age = age;
+    }
+  }
+
+  return ret;
+}
+#endif // PRODUCT
+
+void G1CollectorPolicy::record_full_collection_start() {
+  _cur_collection_start_sec = os::elapsedTime();
+  // Release the future to-space so that it is available for compaction into.
+  _g1->set_full_collection();
+}
+
+void G1CollectorPolicy::record_full_collection_end() {
+  // Consider this like a collection pause for the purposes of allocation
+  // since last pause.
+  double end_sec = os::elapsedTime();
+  double full_gc_time_sec = end_sec - _cur_collection_start_sec;
+  double full_gc_time_ms = full_gc_time_sec * 1000.0;
+
+  checkpoint_conc_overhead();
+
+  _all_full_gc_times_ms->add(full_gc_time_ms);
+
+  update_recent_gc_times(end_sec, full_gc_time_sec);
+
+  _g1->clear_full_collection();
+
+  // "Nuke" the heuristics that control the fully/partially young GC
+  // transitions and make sure we start with fully young GCs after the
+  // Full GC.
+  set_full_young_gcs(true);
+  _last_full_young_gc = false;
+  _should_revert_to_full_young_gcs = false;
+  _should_initiate_conc_mark = false;
+  _known_garbage_bytes = 0;
+  _known_garbage_ratio = 0.0;
+  _in_marking_window = false;
+  _in_marking_window_im = false;
+
+  _short_lived_surv_rate_group->record_scan_only_prefix(0);
+  _short_lived_surv_rate_group->start_adding_regions();
+  // also call this on any additional surv rate groups
+
+  _prev_region_num_young   = _region_num_young;
+  _prev_region_num_tenured = _region_num_tenured;
+
+  _free_regions_at_end_of_collection = _g1->free_regions();
+  _scan_only_regions_at_end_of_collection = 0;
+  calculate_young_list_min_length();
+  calculate_young_list_target_config();
+ }
+
+void G1CollectorPolicy::record_pop_compute_rc_start() {
+  _pop_compute_rc_start = os::elapsedTime();
+}
+void G1CollectorPolicy::record_pop_compute_rc_end() {
+  double ms = (os::elapsedTime() - _pop_compute_rc_start)*1000.0;
+  _cur_popular_compute_rc_time_ms = ms;
+  _pop_compute_rc_start = 0.0;
+}
+void G1CollectorPolicy::record_pop_evac_start() {
+  _pop_evac_start = os::elapsedTime();
+}
+void G1CollectorPolicy::record_pop_evac_end() {
+  double ms = (os::elapsedTime() - _pop_evac_start)*1000.0;
+  _cur_popular_evac_time_ms = ms;
+  _pop_evac_start = 0.0;
+}
+
+void G1CollectorPolicy::record_before_bytes(size_t bytes) {
+  _bytes_in_to_space_before_gc += bytes;
+}
+
+void G1CollectorPolicy::record_after_bytes(size_t bytes) {
+  _bytes_in_to_space_after_gc += bytes;
+}
+
+void G1CollectorPolicy::record_stop_world_start() {
+  _stop_world_start = os::elapsedTime();
+}
+
+void G1CollectorPolicy::record_collection_pause_start(double start_time_sec,
+                                                      size_t start_used) {
+  if (PrintGCDetails) {
+    gclog_or_tty->stamp(PrintGCTimeStamps);
+    gclog_or_tty->print("[GC pause");
+    if (in_young_gc_mode())
+      gclog_or_tty->print(" (%s)", full_young_gcs() ? "young" : "partial");
+  }
+
+  assert(_g1->used_regions() == _g1->recalculate_used_regions(),
+         "sanity");
+
+  double s_w_t_ms = (start_time_sec - _stop_world_start) * 1000.0;
+  _all_stop_world_times_ms->add(s_w_t_ms);
+  _stop_world_start = 0.0;
+
+  _cur_collection_start_sec = start_time_sec;
+  _cur_collection_pause_used_at_start_bytes = start_used;
+  _cur_collection_pause_used_regions_at_start = _g1->used_regions();
+  _pending_cards = _g1->pending_card_num();
+  _max_pending_cards = _g1->max_pending_card_num();
+
+  _bytes_in_to_space_before_gc = 0;
+  _bytes_in_to_space_after_gc = 0;
+  _bytes_in_collection_set_before_gc = 0;
+
+#ifdef DEBUG
+  // initialise these to something well known so that we can spot
+  // if they are not set properly
+
+  for (int i = 0; i < _parallel_gc_threads; ++i) {
+    _par_last_ext_root_scan_times_ms[i] = -666.0;
+    _par_last_mark_stack_scan_times_ms[i] = -666.0;
+    _par_last_scan_only_times_ms[i] = -666.0;
+    _par_last_scan_only_regions_scanned[i] = -666.0;
+    _par_last_update_rs_start_times_ms[i] = -666.0;
+    _par_last_update_rs_times_ms[i] = -666.0;
+    _par_last_update_rs_processed_buffers[i] = -666.0;
+    _par_last_scan_rs_start_times_ms[i] = -666.0;
+    _par_last_scan_rs_times_ms[i] = -666.0;
+    _par_last_scan_new_refs_times_ms[i] = -666.0;
+    _par_last_obj_copy_times_ms[i] = -666.0;
+    _par_last_termination_times_ms[i] = -666.0;
+
+    _pop_par_last_update_rs_start_times_ms[i] = -666.0;
+    _pop_par_last_update_rs_times_ms[i] = -666.0;
+    _pop_par_last_update_rs_processed_buffers[i] = -666.0;
+    _pop_par_last_scan_rs_start_times_ms[i] = -666.0;
+    _pop_par_last_scan_rs_times_ms[i] = -666.0;
+    _pop_par_last_closure_app_times_ms[i] = -666.0;
+  }
+#endif
+
+  for (int i = 0; i < _aux_num; ++i) {
+    _cur_aux_times_ms[i] = 0.0;
+    _cur_aux_times_set[i] = false;
+  }
+
+  _satb_drain_time_set = false;
+  _last_satb_drain_processed_buffers = -1;
+
+  if (in_young_gc_mode())
+    _last_young_gc_full = false;
+
+
+  // do that for any other surv rate groups
+  _short_lived_surv_rate_group->stop_adding_regions();
+  size_t short_lived_so_length = _young_list_so_prefix_length;
+  _short_lived_surv_rate_group->record_scan_only_prefix(short_lived_so_length);
+  tag_scan_only(short_lived_so_length);
+
+  assert( verify_young_ages(), "region age verification" );
+}
+
+void G1CollectorPolicy::tag_scan_only(size_t short_lived_scan_only_length) {
+  // done in a way that it can be extended for other surv rate groups too...
+
+  HeapRegion* head = _g1->young_list_first_region();
+  bool finished_short_lived = (short_lived_scan_only_length == 0);
+
+  if (finished_short_lived)
+    return;
+
+  for (HeapRegion* curr = head;
+       curr != NULL;
+       curr = curr->get_next_young_region()) {
+    SurvRateGroup* surv_rate_group = curr->surv_rate_group();
+    int age = curr->age_in_surv_rate_group();
+
+    if (surv_rate_group == _short_lived_surv_rate_group) {
+      if ((size_t)age < short_lived_scan_only_length)
+        curr->set_scan_only();
+      else
+        finished_short_lived = true;
+    }
+
+
+    if (finished_short_lived)
+      return;
+  }
+
+  guarantee( false, "we should never reach here" );
+}
+
+void G1CollectorPolicy::record_popular_pause_preamble_start() {
+  _cur_popular_preamble_start_ms = os::elapsedTime() * 1000.0;
+}
+
+void G1CollectorPolicy::record_popular_pause_preamble_end() {
+  _cur_popular_preamble_time_ms =
+    (os::elapsedTime() * 1000.0) - _cur_popular_preamble_start_ms;
+
+  // copy the recorded statistics of the first pass to temporary arrays
+  for (int i = 0; i < _parallel_gc_threads; ++i) {
+    _pop_par_last_update_rs_start_times_ms[i] = _par_last_update_rs_start_times_ms[i];
+    _pop_par_last_update_rs_times_ms[i] = _par_last_update_rs_times_ms[i];
+    _pop_par_last_update_rs_processed_buffers[i] = _par_last_update_rs_processed_buffers[i];
+    _pop_par_last_scan_rs_start_times_ms[i] = _par_last_scan_rs_start_times_ms[i];
+    _pop_par_last_scan_rs_times_ms[i] = _par_last_scan_rs_times_ms[i];
+    _pop_par_last_closure_app_times_ms[i] = _par_last_obj_copy_times_ms[i];
+  }
+}
+
+void G1CollectorPolicy::record_mark_closure_time(double mark_closure_time_ms) {
+  _mark_closure_time_ms = mark_closure_time_ms;
+}
+
+void G1CollectorPolicy::record_concurrent_mark_init_start() {
+  _mark_init_start_sec = os::elapsedTime();
+  guarantee(!in_young_gc_mode(), "should not do be here in young GC mode");
+}
+
+void G1CollectorPolicy::record_concurrent_mark_init_end_pre(double
+                                                   mark_init_elapsed_time_ms) {
+  _during_marking = true;
+  _should_initiate_conc_mark = false;
+  _cur_mark_stop_world_time_ms = mark_init_elapsed_time_ms;
+}
+
+void G1CollectorPolicy::record_concurrent_mark_init_end() {
+  double end_time_sec = os::elapsedTime();
+  double elapsed_time_ms = (end_time_sec - _mark_init_start_sec) * 1000.0;
+  _concurrent_mark_init_times_ms->add(elapsed_time_ms);
+  checkpoint_conc_overhead();
+  record_concurrent_mark_init_end_pre(elapsed_time_ms);
+
+  _mmu_tracker->add_pause(_mark_init_start_sec, end_time_sec, true);
+}
+
+void G1CollectorPolicy::record_concurrent_mark_remark_start() {
+  _mark_remark_start_sec = os::elapsedTime();
+  _during_marking = false;
+}
+
+void G1CollectorPolicy::record_concurrent_mark_remark_end() {
+  double end_time_sec = os::elapsedTime();
+  double elapsed_time_ms = (end_time_sec - _mark_remark_start_sec)*1000.0;
+  checkpoint_conc_overhead();
+  _concurrent_mark_remark_times_ms->add(elapsed_time_ms);
+  _cur_mark_stop_world_time_ms += elapsed_time_ms;
+  _prev_collection_pause_end_ms += elapsed_time_ms;
+
+  _mmu_tracker->add_pause(_mark_remark_start_sec, end_time_sec, true);
+}
+
+void G1CollectorPolicy::record_concurrent_mark_cleanup_start() {
+  _mark_cleanup_start_sec = os::elapsedTime();
+}
+
+void
+G1CollectorPolicy::record_concurrent_mark_cleanup_end(size_t freed_bytes,
+                                                      size_t max_live_bytes) {
+  record_concurrent_mark_cleanup_end_work1(freed_bytes, max_live_bytes);
+  record_concurrent_mark_cleanup_end_work2();
+}
+
+void
+G1CollectorPolicy::
+record_concurrent_mark_cleanup_end_work1(size_t freed_bytes,
+                                         size_t max_live_bytes) {
+  if (_n_marks < 2) _n_marks++;
+  if (G1PolicyVerbose > 0)
+    gclog_or_tty->print_cr("At end of marking, max_live is " SIZE_FORMAT " MB "
+                           " (of " SIZE_FORMAT " MB heap).",
+                           max_live_bytes/M, _g1->capacity()/M);
+}
+
+// The important thing about this is that it includes "os::elapsedTime".
+void G1CollectorPolicy::record_concurrent_mark_cleanup_end_work2() {
+  checkpoint_conc_overhead();
+  double end_time_sec = os::elapsedTime();
+  double elapsed_time_ms = (end_time_sec - _mark_cleanup_start_sec)*1000.0;
+  _concurrent_mark_cleanup_times_ms->add(elapsed_time_ms);
+  _cur_mark_stop_world_time_ms += elapsed_time_ms;
+  _prev_collection_pause_end_ms += elapsed_time_ms;
+
+  _mmu_tracker->add_pause(_mark_cleanup_start_sec, end_time_sec, true);
+
+  _num_markings++;
+
+  // We did a marking, so reset the "since_last_mark" variables.
+  double considerConcMarkCost = 1.0;
+  // If there are available processors, concurrent activity is free...
+  if (Threads::number_of_non_daemon_threads() * 2 <
+      os::active_processor_count()) {
+    considerConcMarkCost = 0.0;
+  }
+  _n_pauses_at_mark_end = _n_pauses;
+  _n_marks_since_last_pause++;
+  _conc_mark_initiated = false;
+}
+
+void
+G1CollectorPolicy::record_concurrent_mark_cleanup_completed() {
+  if (in_young_gc_mode()) {
+    _should_revert_to_full_young_gcs = false;
+    _last_full_young_gc = true;
+    _in_marking_window = false;
+    if (adaptive_young_list_length())
+      calculate_young_list_target_config();
+  }
+}
+
+void G1CollectorPolicy::record_concurrent_pause() {
+  if (_stop_world_start > 0.0) {
+    double yield_ms = (os::elapsedTime() - _stop_world_start) * 1000.0;
+    _all_yield_times_ms->add(yield_ms);
+  }
+}
+
+void G1CollectorPolicy::record_concurrent_pause_end() {
+}
+
+void G1CollectorPolicy::record_collection_pause_end_CH_strong_roots() {
+  _cur_CH_strong_roots_end_sec = os::elapsedTime();
+  _cur_CH_strong_roots_dur_ms =
+    (_cur_CH_strong_roots_end_sec - _cur_collection_start_sec) * 1000.0;
+}
+
+void G1CollectorPolicy::record_collection_pause_end_G1_strong_roots() {
+  _cur_G1_strong_roots_end_sec = os::elapsedTime();
+  _cur_G1_strong_roots_dur_ms =
+    (_cur_G1_strong_roots_end_sec - _cur_CH_strong_roots_end_sec) * 1000.0;
+}
+
+template<class T>
+T sum_of(T* sum_arr, int start, int n, int N) {
+  T sum = (T)0;
+  for (int i = 0; i < n; i++) {
+    int j = (start + i) % N;
+    sum += sum_arr[j];
+  }
+  return sum;
+}
+
+void G1CollectorPolicy::print_par_stats (int level,
+                                         const char* str,
+                                         double* data,
+                                         bool summary) {
+  double min = data[0], max = data[0];
+  double total = 0.0;
+  int j;
+  for (j = 0; j < level; ++j)
+    gclog_or_tty->print("   ");
+  gclog_or_tty->print("[%s (ms):", str);
+  for (uint i = 0; i < ParallelGCThreads; ++i) {
+    double val = data[i];
+    if (val < min)
+      min = val;
+    if (val > max)
+      max = val;
+    total += val;
+    gclog_or_tty->print("  %3.1lf", val);
+  }
+  if (summary) {
+    gclog_or_tty->print_cr("");
+    double avg = total / (double) ParallelGCThreads;
+    gclog_or_tty->print(" ");
+    for (j = 0; j < level; ++j)
+      gclog_or_tty->print("   ");
+    gclog_or_tty->print("Avg: %5.1lf, Min: %5.1lf, Max: %5.1lf",
+                        avg, min, max);
+  }
+  gclog_or_tty->print_cr("]");
+}
+
+void G1CollectorPolicy::print_par_buffers (int level,
+                                         const char* str,
+                                         double* data,
+                                         bool summary) {
+  double min = data[0], max = data[0];
+  double total = 0.0;
+  int j;
+  for (j = 0; j < level; ++j)
+    gclog_or_tty->print("   ");
+  gclog_or_tty->print("[%s :", str);
+  for (uint i = 0; i < ParallelGCThreads; ++i) {
+    double val = data[i];
+    if (val < min)
+      min = val;
+    if (val > max)
+      max = val;
+    total += val;
+    gclog_or_tty->print(" %d", (int) val);
+  }
+  if (summary) {
+    gclog_or_tty->print_cr("");
+    double avg = total / (double) ParallelGCThreads;
+    gclog_or_tty->print(" ");
+    for (j = 0; j < level; ++j)
+      gclog_or_tty->print("   ");
+    gclog_or_tty->print("Sum: %d, Avg: %d, Min: %d, Max: %d",
+               (int)total, (int)avg, (int)min, (int)max);
+  }
+  gclog_or_tty->print_cr("]");
+}
+
+void G1CollectorPolicy::print_stats (int level,
+                                     const char* str,
+                                     double value) {
+  for (int j = 0; j < level; ++j)
+    gclog_or_tty->print("   ");
+  gclog_or_tty->print_cr("[%s: %5.1lf ms]", str, value);
+}
+
+void G1CollectorPolicy::print_stats (int level,
+                                     const char* str,
+                                     int value) {
+  for (int j = 0; j < level; ++j)
+    gclog_or_tty->print("   ");
+  gclog_or_tty->print_cr("[%s: %d]", str, value);
+}
+
+double G1CollectorPolicy::avg_value (double* data) {
+  if (ParallelGCThreads > 0) {
+    double ret = 0.0;
+    for (uint i = 0; i < ParallelGCThreads; ++i)
+      ret += data[i];
+    return ret / (double) ParallelGCThreads;
+  } else {
+    return data[0];
+  }
+}
+
+double G1CollectorPolicy::max_value (double* data) {
+  if (ParallelGCThreads > 0) {
+    double ret = data[0];
+    for (uint i = 1; i < ParallelGCThreads; ++i)
+      if (data[i] > ret)
+        ret = data[i];
+    return ret;
+  } else {
+    return data[0];
+  }
+}
+
+double G1CollectorPolicy::sum_of_values (double* data) {
+  if (ParallelGCThreads > 0) {
+    double sum = 0.0;
+    for (uint i = 0; i < ParallelGCThreads; i++)
+      sum += data[i];
+    return sum;
+  } else {
+    return data[0];
+  }
+}
+
+double G1CollectorPolicy::max_sum (double* data1,
+                                   double* data2) {
+  double ret = data1[0] + data2[0];
+
+  if (ParallelGCThreads > 0) {
+    for (uint i = 1; i < ParallelGCThreads; ++i) {
+      double data = data1[i] + data2[i];
+      if (data > ret)
+        ret = data;
+    }
+  }
+  return ret;
+}
+
+// Anything below that is considered to be zero
+#define MIN_TIMER_GRANULARITY 0.0000001
+
+void G1CollectorPolicy::record_collection_pause_end(bool popular,
+                                                    bool abandoned) {
+  double end_time_sec = os::elapsedTime();
+  double elapsed_ms = _last_pause_time_ms;
+  bool parallel = ParallelGCThreads > 0;
+  double evac_ms = (end_time_sec - _cur_G1_strong_roots_end_sec) * 1000.0;
+  size_t rs_size =
+    _cur_collection_pause_used_regions_at_start - collection_set_size();
+  size_t cur_used_bytes = _g1->used();
+  assert(cur_used_bytes == _g1->recalculate_used(), "It should!");
+  bool last_pause_included_initial_mark = false;
+
+#ifndef PRODUCT
+  if (G1YoungSurvRateVerbose) {
+    gclog_or_tty->print_cr("");
+    _short_lived_surv_rate_group->print();
+    // do that for any other surv rate groups too
+  }
+#endif // PRODUCT
+
+  checkpoint_conc_overhead();
+
+  if (in_young_gc_mode()) {
+    last_pause_included_initial_mark = _should_initiate_conc_mark;
+    if (last_pause_included_initial_mark)
+      record_concurrent_mark_init_end_pre(0.0);
+
+    size_t min_used_targ =
+      (_g1->capacity() / 100) * (G1SteadyStateUsed - G1SteadyStateUsedDelta);
+
+    if (cur_used_bytes > min_used_targ) {
+      if (cur_used_bytes <= _prev_collection_pause_used_at_end_bytes) {
+      } else if (!_g1->mark_in_progress() && !_last_full_young_gc) {
+        _should_initiate_conc_mark = true;
+      }
+    }
+
+    _prev_collection_pause_used_at_end_bytes = cur_used_bytes;
+  }
+
+  _mmu_tracker->add_pause(end_time_sec - elapsed_ms/1000.0,
+                          end_time_sec, false);
+
+  guarantee(_cur_collection_pause_used_regions_at_start >=
+            collection_set_size(),
+            "Negative RS size?");
+
+  // This assert is exempted when we're doing parallel collection pauses,
+  // because the fragmentation caused by the parallel GC allocation buffers
+  // can lead to more memory being used during collection than was used
+  // before. Best leave this out until the fragmentation problem is fixed.
+  // Pauses in which evacuation failed can also lead to negative
+  // collections, since no space is reclaimed from a region containing an
+  // object whose evacuation failed.
+  // Further, we're now always doing parallel collection.  But I'm still
+  // leaving this here as a placeholder for a more precise assertion later.
+  // (DLD, 10/05.)
+  assert((true || parallel) // Always using GC LABs now.
+         || _g1->evacuation_failed()
+         || _cur_collection_pause_used_at_start_bytes >= cur_used_bytes,
+         "Negative collection");
+
+  size_t freed_bytes =
+    _cur_collection_pause_used_at_start_bytes - cur_used_bytes;
+  size_t surviving_bytes = _collection_set_bytes_used_before - freed_bytes;
+  double survival_fraction =
+    (double)surviving_bytes/
+    (double)_collection_set_bytes_used_before;
+
+  _n_pauses++;
+
+  if (!abandoned) {
+    _recent_CH_strong_roots_times_ms->add(_cur_CH_strong_roots_dur_ms);
+    _recent_G1_strong_roots_times_ms->add(_cur_G1_strong_roots_dur_ms);
+    _recent_evac_times_ms->add(evac_ms);
+    _recent_pause_times_ms->add(elapsed_ms);
+
+    _recent_rs_sizes->add(rs_size);
+
+    // We exempt parallel collection from this check because Alloc Buffer
+    // fragmentation can produce negative collections.  Same with evac
+    // failure.
+    // Further, we're now always doing parallel collection.  But I'm still
+    // leaving this here as a placeholder for a more precise assertion later.
+    // (DLD, 10/05.
+    assert((true || parallel)
+           || _g1->evacuation_failed()
+           || surviving_bytes <= _collection_set_bytes_used_before,
+           "Or else negative collection!");
+    _recent_CS_bytes_used_before->add(_collection_set_bytes_used_before);
+    _recent_CS_bytes_surviving->add(surviving_bytes);
+
+    // this is where we update the allocation rate of the application
+    double app_time_ms =
+      (_cur_collection_start_sec * 1000.0 - _prev_collection_pause_end_ms);
+    if (app_time_ms < MIN_TIMER_GRANULARITY) {
+      // This usually happens due to the timer not having the required
+      // granularity. Some Linuxes are the usual culprits.
+      // We'll just set it to something (arbitrarily) small.
+      app_time_ms = 1.0;
+    }
+    size_t regions_allocated =
+      (_region_num_young - _prev_region_num_young) +
+      (_region_num_tenured - _prev_region_num_tenured);
+    double alloc_rate_ms = (double) regions_allocated / app_time_ms;
+    _alloc_rate_ms_seq->add(alloc_rate_ms);
+    _prev_region_num_young   = _region_num_young;
+    _prev_region_num_tenured = _region_num_tenured;
+
+    double interval_ms =
+      (end_time_sec - _recent_prev_end_times_for_all_gcs_sec->oldest()) * 1000.0;
+    update_recent_gc_times(end_time_sec, elapsed_ms);
+    _recent_avg_pause_time_ratio = _recent_gc_times_ms->sum()/interval_ms;
+    assert(recent_avg_pause_time_ratio() < 1.00, "All GC?");
+  }
+
+  if (G1PolicyVerbose > 1) {
+    gclog_or_tty->print_cr("   Recording collection pause(%d)", _n_pauses);
+  }
+
+  PauseSummary* summary;
+  if (!abandoned && !popular)
+    summary = _non_pop_summary;
+  else if (!abandoned && popular)
+    summary = _pop_summary;
+  else if (abandoned && !popular)
+    summary = _non_pop_abandoned_summary;
+  else if (abandoned && popular)
+    summary = _pop_abandoned_summary;
+  else
+    guarantee(false, "should not get here!");
+
+  double pop_update_rs_time;
+  double pop_update_rs_processed_buffers;
+  double pop_scan_rs_time;
+  double pop_closure_app_time;
+  double pop_other_time;
+
+  if (popular) {
+    PopPreambleSummary* preamble_summary = summary->pop_preamble_summary();
+    guarantee(preamble_summary != NULL, "should not be null!");
+
+    pop_update_rs_time = avg_value(_pop_par_last_update_rs_times_ms);
+    pop_update_rs_processed_buffers =
+      sum_of_values(_pop_par_last_update_rs_processed_buffers);
+    pop_scan_rs_time = avg_value(_pop_par_last_scan_rs_times_ms);
+    pop_closure_app_time = avg_value(_pop_par_last_closure_app_times_ms);
+    pop_other_time = _cur_popular_preamble_time_ms -
+      (pop_update_rs_time + pop_scan_rs_time + pop_closure_app_time +
+       _cur_popular_evac_time_ms);
+
+    preamble_summary->record_pop_preamble_time_ms(_cur_popular_preamble_time_ms);
+    preamble_summary->record_pop_update_rs_time_ms(pop_update_rs_time);
+    preamble_summary->record_pop_scan_rs_time_ms(pop_scan_rs_time);
+    preamble_summary->record_pop_closure_app_time_ms(pop_closure_app_time);
+    preamble_summary->record_pop_evacuation_time_ms(_cur_popular_evac_time_ms);
+    preamble_summary->record_pop_other_time_ms(pop_other_time);
+  }
+
+  double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms);
+  double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms);
+  double scan_only_time = avg_value(_par_last_scan_only_times_ms);
+  double scan_only_regions_scanned =
+    sum_of_values(_par_last_scan_only_regions_scanned);
+  double update_rs_time = avg_value(_par_last_update_rs_times_ms);
+  double update_rs_processed_buffers =
+    sum_of_values(_par_last_update_rs_processed_buffers);
+  double scan_rs_time = avg_value(_par_last_scan_rs_times_ms);
+  double obj_copy_time = avg_value(_par_last_obj_copy_times_ms);
+  double termination_time = avg_value(_par_last_termination_times_ms);
+
+  double parallel_other_time;
+  if (!abandoned) {
+    MainBodySummary* body_summary = summary->main_body_summary();
+    guarantee(body_summary != NULL, "should not be null!");
+
+    if (_satb_drain_time_set)
+      body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms);
+    else
+      body_summary->record_satb_drain_time_ms(0.0);
+    body_summary->record_ext_root_scan_time_ms(ext_root_scan_time);
+    body_summary->record_mark_stack_scan_time_ms(mark_stack_scan_time);
+    body_summary->record_scan_only_time_ms(scan_only_time);
+    body_summary->record_update_rs_time_ms(update_rs_time);
+    body_summary->record_scan_rs_time_ms(scan_rs_time);
+    body_summary->record_obj_copy_time_ms(obj_copy_time);
+    if (parallel) {
+      body_summary->record_parallel_time_ms(_cur_collection_par_time_ms);
+      body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms);
+      body_summary->record_termination_time_ms(termination_time);
+      parallel_other_time = _cur_collection_par_time_ms -
+        (update_rs_time + ext_root_scan_time + mark_stack_scan_time +
+         scan_only_time + scan_rs_time + obj_copy_time + termination_time);
+      body_summary->record_parallel_other_time_ms(parallel_other_time);
+    }
+    body_summary->record_mark_closure_time_ms(_mark_closure_time_ms);
+  }
+
+  if (G1PolicyVerbose > 1) {
+    gclog_or_tty->print_cr("      ET: %10.6f ms           (avg: %10.6f ms)\n"
+                           "        CH Strong: %10.6f ms    (avg: %10.6f ms)\n"
+                           "        G1 Strong: %10.6f ms    (avg: %10.6f ms)\n"
+                           "        Evac:      %10.6f ms    (avg: %10.6f ms)\n"
+                           "       ET-RS:  %10.6f ms      (avg: %10.6f ms)\n"
+                           "      |RS|: " SIZE_FORMAT,
+                           elapsed_ms, recent_avg_time_for_pauses_ms(),
+                           _cur_CH_strong_roots_dur_ms, recent_avg_time_for_CH_strong_ms(),
+                           _cur_G1_strong_roots_dur_ms, recent_avg_time_for_G1_strong_ms(),
+                           evac_ms, recent_avg_time_for_evac_ms(),
+                           scan_rs_time,
+                           recent_avg_time_for_pauses_ms() -
+                           recent_avg_time_for_G1_strong_ms(),
+                           rs_size);
+
+    gclog_or_tty->print_cr("       Used at start: " SIZE_FORMAT"K"
+                           "       At end " SIZE_FORMAT "K\n"
+                           "       garbage      : " SIZE_FORMAT "K"
+                           "       of     " SIZE_FORMAT "K\n"
+                           "       survival     : %6.2f%%  (%6.2f%% avg)",
+                           _cur_collection_pause_used_at_start_bytes/K,
+                           _g1->used()/K, freed_bytes/K,
+                           _collection_set_bytes_used_before/K,
+                           survival_fraction*100.0,
+                           recent_avg_survival_fraction()*100.0);
+    gclog_or_tty->print_cr("       Recent %% gc pause time: %6.2f",
+                           recent_avg_pause_time_ratio() * 100.0);
+  }
+
+  double other_time_ms = elapsed_ms;
+  if (popular)
+    other_time_ms -= _cur_popular_preamble_time_ms;
+
+  if (!abandoned) {
+    if (_satb_drain_time_set)
+      other_time_ms -= _cur_satb_drain_time_ms;
+
+    if (parallel)
+      other_time_ms -= _cur_collection_par_time_ms + _cur_clear_ct_time_ms;
+    else
+      other_time_ms -=
+        update_rs_time +
+        ext_root_scan_time + mark_stack_scan_time + scan_only_time +
+        scan_rs_time + obj_copy_time;
+  }
+
+  if (PrintGCDetails) {
+    gclog_or_tty->print_cr("%s%s, %1.8lf secs]",
+                           (popular && !abandoned) ? " (popular)" :
+                           (!popular && abandoned) ? " (abandoned)" :
+                           (popular && abandoned) ? " (popular/abandoned)" : "",
+                           (last_pause_included_initial_mark) ? " (initial-mark)" : "",
+                           elapsed_ms / 1000.0);
+
+    if (!abandoned) {
+      if (_satb_drain_time_set)
+        print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms);
+      if (_last_satb_drain_processed_buffers >= 0)
+        print_stats(2, "Processed Buffers", _last_satb_drain_processed_buffers);
+    }
+    if (popular)
+      print_stats(1, "Popularity Preamble", _cur_popular_preamble_time_ms);
+    if (parallel) {
+      if (popular) {
+        print_par_stats(2, "Update RS (Start)", _pop_par_last_update_rs_start_times_ms, false);
+        print_par_stats(2, "Update RS", _pop_par_last_update_rs_times_ms);
+        if (G1RSBarrierUseQueue)
+          print_par_buffers(3, "Processed Buffers",
+                            _pop_par_last_update_rs_processed_buffers, true);
+        print_par_stats(2, "Scan RS", _pop_par_last_scan_rs_times_ms);
+        print_par_stats(2, "Closure app", _pop_par_last_closure_app_times_ms);
+        print_stats(2, "Evacuation", _cur_popular_evac_time_ms);
+        print_stats(2, "Other", pop_other_time);
+      }
+      if (!abandoned) {
+        print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
+        if (!popular) {
+          print_par_stats(2, "Update RS (Start)", _par_last_update_rs_start_times_ms, false);
+          print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
+          if (G1RSBarrierUseQueue)
+            print_par_buffers(3, "Processed Buffers",
+                              _par_last_update_rs_processed_buffers, true);
+        }
+        print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms);
+        print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms);
+        print_par_stats(2, "Scan-Only Scanning", _par_last_scan_only_times_ms);
+        print_par_buffers(3, "Scan-Only Regions",
+                          _par_last_scan_only_regions_scanned, true);
+        print_par_stats(2, "Scan RS", _par_last_scan_rs_times_ms);
+        print_par_stats(2, "Object Copy", _par_last_obj_copy_times_ms);
+        print_par_stats(2, "Termination", _par_last_termination_times_ms);
+        print_stats(2, "Other", parallel_other_time);
+        print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
+      }
+    } else {
+      if (popular) {
+        print_stats(2, "Update RS", pop_update_rs_time);
+        if (G1RSBarrierUseQueue)
+          print_stats(3, "Processed Buffers",
+                      (int)pop_update_rs_processed_buffers);
+        print_stats(2, "Scan RS", pop_scan_rs_time);
+        print_stats(2, "Closure App", pop_closure_app_time);
+        print_stats(2, "Evacuation", _cur_popular_evac_time_ms);
+        print_stats(2, "Other", pop_other_time);
+      }
+      if (!abandoned) {
+        if (!popular) {
+          print_stats(1, "Update RS", update_rs_time);
+          if (G1RSBarrierUseQueue)
+            print_stats(2, "Processed Buffers",
+                        (int)update_rs_processed_buffers);
+        }
+        print_stats(1, "Ext Root Scanning", ext_root_scan_time);
+        print_stats(1, "Mark Stack Scanning", mark_stack_scan_time);
+        print_stats(1, "Scan-Only Scanning", scan_only_time);
+        print_stats(1, "Scan RS", scan_rs_time);
+        print_stats(1, "Object Copying", obj_copy_time);
+      }
+    }
+    print_stats(1, "Other", other_time_ms);
+    for (int i = 0; i < _aux_num; ++i) {
+      if (_cur_aux_times_set[i]) {
+        char buffer[96];
+        sprintf(buffer, "Aux%d", i);
+        print_stats(1, buffer, _cur_aux_times_ms[i]);
+      }
+    }
+  }
+  if (PrintGCDetails)
+    gclog_or_tty->print("   [");
+  if (PrintGC || PrintGCDetails)
+    _g1->print_size_transition(gclog_or_tty,
+                               _cur_collection_pause_used_at_start_bytes,
+                               _g1->used(), _g1->capacity());
+  if (PrintGCDetails)
+    gclog_or_tty->print_cr("]");
+
+  _all_pause_times_ms->add(elapsed_ms);
+  summary->record_total_time_ms(elapsed_ms);
+  summary->record_other_time_ms(other_time_ms);
+  for (int i = 0; i < _aux_num; ++i)
+    if (_cur_aux_times_set[i])
+      _all_aux_times_ms[i].add(_cur_aux_times_ms[i]);
+
+  // Reset marks-between-pauses counter.
+  _n_marks_since_last_pause = 0;
+
+  // Update the efficiency-since-mark vars.
+  double proc_ms = elapsed_ms * (double) _parallel_gc_threads;
+  if (elapsed_ms < MIN_TIMER_GRANULARITY) {
+    // This usually happens due to the timer not having the required
+    // granularity. Some Linuxes are the usual culprits.
+    // We'll just set it to something (arbitrarily) small.
+    proc_ms = 1.0;
+  }
+  double cur_efficiency = (double) freed_bytes / proc_ms;
+
+  bool new_in_marking_window = _in_marking_window;
+  bool new_in_marking_window_im = false;
+  if (_should_initiate_conc_mark) {
+    new_in_marking_window = true;
+    new_in_marking_window_im = true;
+  }
+
+  if (in_young_gc_mode()) {
+    if (_last_full_young_gc) {
+      set_full_young_gcs(false);
+      _last_full_young_gc = false;
+    }
+
+    if ( !_last_young_gc_full ) {
+      if ( _should_revert_to_full_young_gcs ||
+           _known_garbage_ratio < 0.05 ||
+           (adaptive_young_list_length() &&
+           (get_gc_eff_factor() * cur_efficiency < predict_young_gc_eff())) ) {
+        set_full_young_gcs(true);
+      }
+    }
+    _should_revert_to_full_young_gcs = false;
+
+    if (_last_young_gc_full && !_during_marking)
+      _young_gc_eff_seq->add(cur_efficiency);
+  }
+
+  _short_lived_surv_rate_group->start_adding_regions();
+  // do that for any other surv rate groupsx
+
+  // <NEW PREDICTION>
+
+  if (!popular && !abandoned) {
+    double pause_time_ms = elapsed_ms;
+
+    size_t diff = 0;
+    if (_max_pending_cards >= _pending_cards)
+      diff = _max_pending_cards - _pending_cards;
+    _pending_card_diff_seq->add((double) diff);
+
+    double cost_per_card_ms = 0.0;
+    if (_pending_cards > 0) {
+      cost_per_card_ms = update_rs_time / (double) _pending_cards;
+      _cost_per_card_ms_seq->add(cost_per_card_ms);
+    }
+
+    double cost_per_scan_only_region_ms = 0.0;
+    if (scan_only_regions_scanned > 0.0) {
+      cost_per_scan_only_region_ms =
+        scan_only_time / scan_only_regions_scanned;
+      if (_in_marking_window_im)
+        _cost_per_scan_only_region_ms_during_cm_seq->add(cost_per_scan_only_region_ms);
+      else
+        _cost_per_scan_only_region_ms_seq->add(cost_per_scan_only_region_ms);
+    }
+
+    size_t cards_scanned = _g1->cards_scanned();
+
+    double cost_per_entry_ms = 0.0;
+    if (cards_scanned > 10) {
+      cost_per_entry_ms = scan_rs_time / (double) cards_scanned;
+      if (_last_young_gc_full)
+        _cost_per_entry_ms_seq->add(cost_per_entry_ms);
+      else
+        _partially_young_cost_per_entry_ms_seq->add(cost_per_entry_ms);
+    }
+
+    if (_max_rs_lengths > 0) {
+      double cards_per_entry_ratio =
+        (double) cards_scanned / (double) _max_rs_lengths;
+      if (_last_young_gc_full)
+        _fully_young_cards_per_entry_ratio_seq->add(cards_per_entry_ratio);
+      else
+        _partially_young_cards_per_entry_ratio_seq->add(cards_per_entry_ratio);
+    }
+
+    size_t rs_length_diff = _max_rs_lengths - _recorded_rs_lengths;
+    if (rs_length_diff >= 0)
+      _rs_length_diff_seq->add((double) rs_length_diff);
+
+    size_t copied_bytes = surviving_bytes;
+    double cost_per_byte_ms = 0.0;
+    if (copied_bytes > 0) {
+      cost_per_byte_ms = obj_copy_time / (double) copied_bytes;
+      if (_in_marking_window)
+        _cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms);
+      else
+        _cost_per_byte_ms_seq->add(cost_per_byte_ms);
+    }
+
+    double all_other_time_ms = pause_time_ms -
+      (update_rs_time + scan_only_time + scan_rs_time + obj_copy_time +
+       _mark_closure_time_ms + termination_time);
+
+    double young_other_time_ms = 0.0;
+    if (_recorded_young_regions > 0) {
+      young_other_time_ms =
+        _recorded_young_cset_choice_time_ms +
+        _recorded_young_free_cset_time_ms;
+      _young_other_cost_per_region_ms_seq->add(young_other_time_ms /
+                                             (double) _recorded_young_regions);
+    }
+    double non_young_other_time_ms = 0.0;
+    if (_recorded_non_young_regions > 0) {
+      non_young_other_time_ms =
+        _recorded_non_young_cset_choice_time_ms +
+        _recorded_non_young_free_cset_time_ms;
+
+      _non_young_other_cost_per_region_ms_seq->add(non_young_other_time_ms /
+                                         (double) _recorded_non_young_regions);
+    }
+
+    double constant_other_time_ms = all_other_time_ms -
+      (young_other_time_ms + non_young_other_time_ms);
+    _constant_other_time_ms_seq->add(constant_other_time_ms);
+
+    double survival_ratio = 0.0;
+    if (_bytes_in_collection_set_before_gc > 0) {
+      survival_ratio = (double) bytes_in_to_space_during_gc() /
+        (double) _bytes_in_collection_set_before_gc;
+    }
+
+    _pending_cards_seq->add((double) _pending_cards);
+    _scanned_cards_seq->add((double) cards_scanned);
+    _rs_lengths_seq->add((double) _max_rs_lengths);
+
+    double expensive_region_limit_ms =
+      (double) G1MaxPauseTimeMS - predict_constant_other_time_ms();
+    if (expensive_region_limit_ms < 0.0) {
+      // this means that the other time was predicted to be longer than
+      // than the max pause time
+      expensive_region_limit_ms = (double) G1MaxPauseTimeMS;
+    }
+    _expensive_region_limit_ms = expensive_region_limit_ms;
+
+    if (PREDICTIONS_VERBOSE) {
+      gclog_or_tty->print_cr("");
+      gclog_or_tty->print_cr("PREDICTIONS %1.4lf %d "
+                    "REGIONS %d %d %d %d "
+                    "PENDING_CARDS %d %d "
+                    "CARDS_SCANNED %d %d "
+                    "RS_LENGTHS %d %d "
+                    "SCAN_ONLY_SCAN %1.6lf %1.6lf "
+                    "RS_UPDATE %1.6lf %1.6lf RS_SCAN %1.6lf %1.6lf "
+                    "SURVIVAL_RATIO %1.6lf %1.6lf "
+                    "OBJECT_COPY %1.6lf %1.6lf OTHER_CONSTANT %1.6lf %1.6lf "
+                    "OTHER_YOUNG %1.6lf %1.6lf "
+                    "OTHER_NON_YOUNG %1.6lf %1.6lf "
+                    "VTIME_DIFF %1.6lf TERMINATION %1.6lf "
+                    "ELAPSED %1.6lf %1.6lf ",
+                    _cur_collection_start_sec,
+                    (!_last_young_gc_full) ? 2 :
+                    (last_pause_included_initial_mark) ? 1 : 0,
+                    _recorded_region_num,
+                    _recorded_young_regions,
+                    _recorded_scan_only_regions,
+                    _recorded_non_young_regions,
+                    _predicted_pending_cards, _pending_cards,
+                    _predicted_cards_scanned, cards_scanned,
+                    _predicted_rs_lengths, _max_rs_lengths,
+                    _predicted_scan_only_scan_time_ms, scan_only_time,
+                    _predicted_rs_update_time_ms, update_rs_time,
+                    _predicted_rs_scan_time_ms, scan_rs_time,
+                    _predicted_survival_ratio, survival_ratio,
+                    _predicted_object_copy_time_ms, obj_copy_time,
+                    _predicted_constant_other_time_ms, constant_other_time_ms,
+                    _predicted_young_other_time_ms, young_other_time_ms,
+                    _predicted_non_young_other_time_ms,
+                    non_young_other_time_ms,
+                    _vtime_diff_ms, termination_time,
+                    _predicted_pause_time_ms, elapsed_ms);
+    }
+
+    if (G1PolicyVerbose > 0) {
+      gclog_or_tty->print_cr("Pause Time, predicted: %1.4lfms (predicted %s), actual: %1.4lfms",
+                    _predicted_pause_time_ms,
+                    (_within_target) ? "within" : "outside",
+                    elapsed_ms);
+    }
+
+  }
+
+  _in_marking_window = new_in_marking_window;
+  _in_marking_window_im = new_in_marking_window_im;
+  _free_regions_at_end_of_collection = _g1->free_regions();
+  _scan_only_regions_at_end_of_collection = _g1->young_list_length();
+  calculate_young_list_min_length();
+  calculate_young_list_target_config();
+
+  // </NEW PREDICTION>
+
+  _target_pause_time_ms = -1.0;
+
+  // TODO: calculate tenuring threshold
+  _tenuring_threshold = MaxTenuringThreshold;
+}
+
+// <NEW PREDICTION>
+
+double
+G1CollectorPolicy::
+predict_young_collection_elapsed_time_ms(size_t adjustment) {
+  guarantee( adjustment == 0 || adjustment == 1, "invariant" );
+
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  size_t young_num = g1h->young_list_length();
+  if (young_num == 0)
+    return 0.0;
+
+  young_num += adjustment;
+  size_t pending_cards = predict_pending_cards();
+  size_t rs_lengths = g1h->young_list_sampled_rs_lengths() +
+                      predict_rs_length_diff();
+  size_t card_num;
+  if (full_young_gcs())
+    card_num = predict_young_card_num(rs_lengths);
+  else
+    card_num = predict_non_young_card_num(rs_lengths);
+  size_t young_byte_size = young_num * HeapRegion::GrainBytes;
+  double accum_yg_surv_rate =
+    _short_lived_surv_rate_group->accum_surv_rate(adjustment);
+
+  size_t bytes_to_copy =
+    (size_t) (accum_yg_surv_rate * (double) HeapRegion::GrainBytes);
+
+  return
+    predict_rs_update_time_ms(pending_cards) +
+    predict_rs_scan_time_ms(card_num) +
+    predict_object_copy_time_ms(bytes_to_copy) +
+    predict_young_other_time_ms(young_num) +
+    predict_constant_other_time_ms();
+}
+
+double
+G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards) {
+  size_t rs_length = predict_rs_length_diff();
+  size_t card_num;
+  if (full_young_gcs())
+    card_num = predict_young_card_num(rs_length);
+  else
+    card_num = predict_non_young_card_num(rs_length);
+  return predict_base_elapsed_time_ms(pending_cards, card_num);
+}
+
+double
+G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards,
+                                                size_t scanned_cards) {
+  return
+    predict_rs_update_time_ms(pending_cards) +
+    predict_rs_scan_time_ms(scanned_cards) +
+    predict_constant_other_time_ms();
+}
+
+double
+G1CollectorPolicy::predict_region_elapsed_time_ms(HeapRegion* hr,
+                                                  bool young) {
+  size_t rs_length = hr->rem_set()->occupied();
+  size_t card_num;
+  if (full_young_gcs())
+    card_num = predict_young_card_num(rs_length);
+  else
+    card_num = predict_non_young_card_num(rs_length);
+  size_t bytes_to_copy = predict_bytes_to_copy(hr);
+
+  double region_elapsed_time_ms =
+    predict_rs_scan_time_ms(card_num) +
+    predict_object_copy_time_ms(bytes_to_copy);
+
+  if (young)
+    region_elapsed_time_ms += predict_young_other_time_ms(1);
+  else
+    region_elapsed_time_ms += predict_non_young_other_time_ms(1);
+
+  return region_elapsed_time_ms;
+}
+
+size_t
+G1CollectorPolicy::predict_bytes_to_copy(HeapRegion* hr) {
+  size_t bytes_to_copy;
+  if (hr->is_marked())
+    bytes_to_copy = hr->max_live_bytes();
+  else {
+    guarantee( hr->is_young() && hr->age_in_surv_rate_group() != -1,
+               "invariant" );
+    int age = hr->age_in_surv_rate_group();
+    double yg_surv_rate = predict_yg_surv_rate(age);
+    bytes_to_copy = (size_t) ((double) hr->used() * yg_surv_rate);
+  }
+
+  return bytes_to_copy;
+}
+
+void
+G1CollectorPolicy::start_recording_regions() {
+  _recorded_rs_lengths            = 0;
+  _recorded_scan_only_regions     = 0;
+  _recorded_young_regions         = 0;
+  _recorded_non_young_regions     = 0;
+
+#if PREDICTIONS_VERBOSE
+  _predicted_rs_lengths           = 0;
+  _predicted_cards_scanned        = 0;
+
+  _recorded_marked_bytes          = 0;
+  _recorded_young_bytes           = 0;
+  _predicted_bytes_to_copy        = 0;
+#endif // PREDICTIONS_VERBOSE
+}
+
+void
+G1CollectorPolicy::record_cset_region(HeapRegion* hr, bool young) {
+  if (young) {
+    ++_recorded_young_regions;
+  } else {
+    ++_recorded_non_young_regions;
+  }
+#if PREDICTIONS_VERBOSE
+  if (young) {
+    _recorded_young_bytes += hr->asSpace()->used();
+  } else {
+    _recorded_marked_bytes += hr->max_live_bytes();
+  }
+  _predicted_bytes_to_copy += predict_bytes_to_copy(hr);
+#endif // PREDICTIONS_VERBOSE
+
+  size_t rs_length = hr->rem_set()->occupied();
+  _recorded_rs_lengths += rs_length;
+}
+
+void
+G1CollectorPolicy::record_scan_only_regions(size_t scan_only_length) {
+  _recorded_scan_only_regions = scan_only_length;
+}
+
+void
+G1CollectorPolicy::end_recording_regions() {
+#if PREDICTIONS_VERBOSE
+  _predicted_pending_cards = predict_pending_cards();
+  _predicted_rs_lengths = _recorded_rs_lengths + predict_rs_length_diff();
+  if (full_young_gcs())
+    _predicted_cards_scanned += predict_young_card_num(_predicted_rs_lengths);
+  else
+    _predicted_cards_scanned +=
+      predict_non_young_card_num(_predicted_rs_lengths);
+  _recorded_region_num = _recorded_young_regions + _recorded_non_young_regions;
+
+  _predicted_young_survival_ratio = 0.0;
+  for (int i = 0; i < _recorded_young_regions; ++i)
+    _predicted_young_survival_ratio += predict_yg_surv_rate(i);
+  _predicted_young_survival_ratio /= (double) _recorded_young_regions;
+
+  _predicted_scan_only_scan_time_ms =
+    predict_scan_only_time_ms(_recorded_scan_only_regions);
+  _predicted_rs_update_time_ms =
+    predict_rs_update_time_ms(_g1->pending_card_num());
+  _predicted_rs_scan_time_ms =
+    predict_rs_scan_time_ms(_predicted_cards_scanned);
+  _predicted_object_copy_time_ms =
+    predict_object_copy_time_ms(_predicted_bytes_to_copy);
+  _predicted_constant_other_time_ms =
+    predict_constant_other_time_ms();
+  _predicted_young_other_time_ms =
+    predict_young_other_time_ms(_recorded_young_regions);
+  _predicted_non_young_other_time_ms =
+    predict_non_young_other_time_ms(_recorded_non_young_regions);
+
+  _predicted_pause_time_ms =
+    _predicted_scan_only_scan_time_ms +
+    _predicted_rs_update_time_ms +
+    _predicted_rs_scan_time_ms +
+    _predicted_object_copy_time_ms +
+    _predicted_constant_other_time_ms +
+    _predicted_young_other_time_ms +
+    _predicted_non_young_other_time_ms;
+#endif // PREDICTIONS_VERBOSE
+}
+
+void G1CollectorPolicy::check_if_region_is_too_expensive(double
+                                                           predicted_time_ms) {
+  // I don't think we need to do this when in young GC mode since
+  // marking will be initiated next time we hit the soft limit anyway...
+  if (predicted_time_ms > _expensive_region_limit_ms) {
+    if (!in_young_gc_mode()) {
+        set_full_young_gcs(true);
+      _should_initiate_conc_mark = true;
+    } else
+      // no point in doing another partial one
+      _should_revert_to_full_young_gcs = true;
+  }
+}
+
+// </NEW PREDICTION>
+
+
+void G1CollectorPolicy::update_recent_gc_times(double end_time_sec,
+                                               double elapsed_ms) {
+  _recent_gc_times_ms->add(elapsed_ms);
+  _recent_prev_end_times_for_all_gcs_sec->add(end_time_sec);
+  _prev_collection_pause_end_ms = end_time_sec * 1000.0;
+}
+
+double G1CollectorPolicy::recent_avg_time_for_pauses_ms() {
+  if (_recent_pause_times_ms->num() == 0) return (double) G1MaxPauseTimeMS;
+  else return _recent_pause_times_ms->avg();
+}
+
+double G1CollectorPolicy::recent_avg_time_for_CH_strong_ms() {
+  if (_recent_CH_strong_roots_times_ms->num() == 0)
+    return (double)G1MaxPauseTimeMS/3.0;
+  else return _recent_CH_strong_roots_times_ms->avg();
+}
+
+double G1CollectorPolicy::recent_avg_time_for_G1_strong_ms() {
+  if (_recent_G1_strong_roots_times_ms->num() == 0)
+    return (double)G1MaxPauseTimeMS/3.0;
+  else return _recent_G1_strong_roots_times_ms->avg();
+}
+
+double G1CollectorPolicy::recent_avg_time_for_evac_ms() {
+  if (_recent_evac_times_ms->num() == 0) return (double)G1MaxPauseTimeMS/3.0;
+  else return _recent_evac_times_ms->avg();
+}
+
+int G1CollectorPolicy::number_of_recent_gcs() {
+  assert(_recent_CH_strong_roots_times_ms->num() ==
+         _recent_G1_strong_roots_times_ms->num(), "Sequence out of sync");
+  assert(_recent_G1_strong_roots_times_ms->num() ==
+         _recent_evac_times_ms->num(), "Sequence out of sync");
+  assert(_recent_evac_times_ms->num() ==
+         _recent_pause_times_ms->num(), "Sequence out of sync");
+  assert(_recent_pause_times_ms->num() ==
+         _recent_CS_bytes_used_before->num(), "Sequence out of sync");
+  assert(_recent_CS_bytes_used_before->num() ==
+         _recent_CS_bytes_surviving->num(), "Sequence out of sync");
+  return _recent_pause_times_ms->num();
+}
+
+double G1CollectorPolicy::recent_avg_survival_fraction() {
+  return recent_avg_survival_fraction_work(_recent_CS_bytes_surviving,
+                                           _recent_CS_bytes_used_before);
+}
+
+double G1CollectorPolicy::last_survival_fraction() {
+  return last_survival_fraction_work(_recent_CS_bytes_surviving,
+                                     _recent_CS_bytes_used_before);
+}
+
+double
+G1CollectorPolicy::recent_avg_survival_fraction_work(TruncatedSeq* surviving,
+                                                     TruncatedSeq* before) {
+  assert(surviving->num() == before->num(), "Sequence out of sync");
+  if (before->sum() > 0.0) {
+      double recent_survival_rate = surviving->sum() / before->sum();
+      // We exempt parallel collection from this check because Alloc Buffer
+      // fragmentation can produce negative collections.
+      // Further, we're now always doing parallel collection.  But I'm still
+      // leaving this here as a placeholder for a more precise assertion later.
+      // (DLD, 10/05.)
+      assert((true || ParallelGCThreads > 0) ||
+             _g1->evacuation_failed() ||
+             recent_survival_rate <= 1.0, "Or bad frac");
+      return recent_survival_rate;
+  } else {
+    return 1.0; // Be conservative.
+  }
+}
+
+double
+G1CollectorPolicy::last_survival_fraction_work(TruncatedSeq* surviving,
+                                               TruncatedSeq* before) {
+  assert(surviving->num() == before->num(), "Sequence out of sync");
+  if (surviving->num() > 0 && before->last() > 0.0) {
+    double last_survival_rate = surviving->last() / before->last();
+    // We exempt parallel collection from this check because Alloc Buffer
+    // fragmentation can produce negative collections.
+    // Further, we're now always doing parallel collection.  But I'm still
+    // leaving this here as a placeholder for a more precise assertion later.
+    // (DLD, 10/05.)
+    assert((true || ParallelGCThreads > 0) ||
+           last_survival_rate <= 1.0, "Or bad frac");
+    return last_survival_rate;
+  } else {
+    return 1.0;
+  }
+}
+
+static const int survival_min_obs = 5;
+static double survival_min_obs_limits[] = { 0.9, 0.7, 0.5, 0.3, 0.1 };
+static const double min_survival_rate = 0.1;
+
+double
+G1CollectorPolicy::conservative_avg_survival_fraction_work(double avg,
+                                                           double latest) {
+  double res = avg;
+  if (number_of_recent_gcs() < survival_min_obs) {
+    res = MAX2(res, survival_min_obs_limits[number_of_recent_gcs()]);
+  }
+  res = MAX2(res, latest);
+  res = MAX2(res, min_survival_rate);
+  // In the parallel case, LAB fragmentation can produce "negative
+  // collections"; so can evac failure.  Cap at 1.0
+  res = MIN2(res, 1.0);
+  return res;
+}
+
+size_t G1CollectorPolicy::expansion_amount() {
+  if ((int)(recent_avg_pause_time_ratio() * 100.0) > G1GCPct) {
+    // We will double the existing space, or take G1ExpandByPctOfAvail % of
+    // the available expansion space, whichever is smaller, bounded below
+    // by a minimum expansion (unless that's all that's left.)
+    const size_t min_expand_bytes = 1*M;
+    size_t reserved_bytes = _g1->g1_reserved_obj_bytes();
+    size_t committed_bytes = _g1->capacity();
+    size_t uncommitted_bytes = reserved_bytes - committed_bytes;
+    size_t expand_bytes;
+    size_t expand_bytes_via_pct =
+      uncommitted_bytes * G1ExpandByPctOfAvail / 100;
+    expand_bytes = MIN2(expand_bytes_via_pct, committed_bytes);
+    expand_bytes = MAX2(expand_bytes, min_expand_bytes);
+    expand_bytes = MIN2(expand_bytes, uncommitted_bytes);
+    if (G1PolicyVerbose > 1) {
+      gclog_or_tty->print("Decided to expand: ratio = %5.2f, "
+                 "committed = %d%s, uncommited = %d%s, via pct = %d%s.\n"
+                 "                   Answer = %d.\n",
+                 recent_avg_pause_time_ratio(),
+                 byte_size_in_proper_unit(committed_bytes),
+                 proper_unit_for_byte_size(committed_bytes),
+                 byte_size_in_proper_unit(uncommitted_bytes),
+                 proper_unit_for_byte_size(uncommitted_bytes),
+                 byte_size_in_proper_unit(expand_bytes_via_pct),
+                 proper_unit_for_byte_size(expand_bytes_via_pct),
+                 byte_size_in_proper_unit(expand_bytes),
+                 proper_unit_for_byte_size(expand_bytes));
+    }
+    return expand_bytes;
+  } else {
+    return 0;
+  }
+}
+
+void G1CollectorPolicy::note_start_of_mark_thread() {
+  _mark_thread_startup_sec = os::elapsedTime();
+}
+
+class CountCSClosure: public HeapRegionClosure {
+  G1CollectorPolicy* _g1_policy;
+public:
+  CountCSClosure(G1CollectorPolicy* g1_policy) :
+    _g1_policy(g1_policy) {}
+  bool doHeapRegion(HeapRegion* r) {
+    _g1_policy->_bytes_in_collection_set_before_gc += r->used();
+    return false;
+  }
+};
+
+void G1CollectorPolicy::count_CS_bytes_used() {
+  CountCSClosure cs_closure(this);
+  _g1->collection_set_iterate(&cs_closure);
+}
+
+static void print_indent(int level) {
+  for (int j = 0; j < level+1; ++j)
+    gclog_or_tty->print("   ");
+}
+
+void G1CollectorPolicy::print_summary (int level,
+                                       const char* str,
+                                       NumberSeq* seq) const {
+  double sum = seq->sum();
+  print_indent(level);
+  gclog_or_tty->print_cr("%-24s = %8.2lf s (avg = %8.2lf ms)",
+                str, sum / 1000.0, seq->avg());
+}
+
+void G1CollectorPolicy::print_summary_sd (int level,
+                                          const char* str,
+                                          NumberSeq* seq) const {
+  print_summary(level, str, seq);
+  print_indent(level + 5);
+  gclog_or_tty->print_cr("(num = %5d, std dev = %8.2lf ms, max = %8.2lf ms)",
+                seq->num(), seq->sd(), seq->maximum());
+}
+
+void G1CollectorPolicy::check_other_times(int level,
+                                        NumberSeq* other_times_ms,
+                                        NumberSeq* calc_other_times_ms) const {
+  bool should_print = false;
+
+  double max_sum = MAX2(fabs(other_times_ms->sum()),
+                        fabs(calc_other_times_ms->sum()));
+  double min_sum = MIN2(fabs(other_times_ms->sum()),
+                        fabs(calc_other_times_ms->sum()));
+  double sum_ratio = max_sum / min_sum;
+  if (sum_ratio > 1.1) {
+    should_print = true;
+    print_indent(level + 1);
+    gclog_or_tty->print_cr("## CALCULATED OTHER SUM DOESN'T MATCH RECORDED ###");
+  }
+
+  double max_avg = MAX2(fabs(other_times_ms->avg()),
+                        fabs(calc_other_times_ms->avg()));
+  double min_avg = MIN2(fabs(other_times_ms->avg()),
+                        fabs(calc_other_times_ms->avg()));
+  double avg_ratio = max_avg / min_avg;
+  if (avg_ratio > 1.1) {
+    should_print = true;
+    print_indent(level + 1);
+    gclog_or_tty->print_cr("## CALCULATED OTHER AVG DOESN'T MATCH RECORDED ###");
+  }
+
+  if (other_times_ms->sum() < -0.01) {
+    print_indent(level + 1);
+    gclog_or_tty->print_cr("## RECORDED OTHER SUM IS NEGATIVE ###");
+  }
+
+  if (other_times_ms->avg() < -0.01) {
+    print_indent(level + 1);
+    gclog_or_tty->print_cr("## RECORDED OTHER AVG IS NEGATIVE ###");
+  }
+
+  if (calc_other_times_ms->sum() < -0.01) {
+    should_print = true;
+    print_indent(level + 1);
+    gclog_or_tty->print_cr("## CALCULATED OTHER SUM IS NEGATIVE ###");
+  }
+
+  if (calc_other_times_ms->avg() < -0.01) {
+    should_print = true;
+    print_indent(level + 1);
+    gclog_or_tty->print_cr("## CALCULATED OTHER AVG IS NEGATIVE ###");
+  }
+
+  if (should_print)
+    print_summary(level, "Other(Calc)", calc_other_times_ms);
+}
+
+void G1CollectorPolicy::print_summary(PauseSummary* summary) const {
+  bool parallel = ParallelGCThreads > 0;
+  MainBodySummary*    body_summary = summary->main_body_summary();
+  PopPreambleSummary* preamble_summary = summary->pop_preamble_summary();
+
+  if (summary->get_total_seq()->num() > 0) {
+    print_summary_sd(0,
+                     (preamble_summary == NULL) ? "Non-Popular Pauses" :
+                     "Popular Pauses",
+                     summary->get_total_seq());
+    if (preamble_summary != NULL) {
+      print_summary(1, "Popularity Preamble",
+                    preamble_summary->get_pop_preamble_seq());
+      print_summary(2, "Update RS", preamble_summary->get_pop_update_rs_seq());
+      print_summary(2, "Scan RS", preamble_summary->get_pop_scan_rs_seq());
+      print_summary(2, "Closure App",
+                    preamble_summary->get_pop_closure_app_seq());
+      print_summary(2, "Evacuation",
+                    preamble_summary->get_pop_evacuation_seq());
+      print_summary(2, "Other", preamble_summary->get_pop_other_seq());
+      {
+        NumberSeq* other_parts[] = {
+          preamble_summary->get_pop_update_rs_seq(),
+          preamble_summary->get_pop_scan_rs_seq(),
+          preamble_summary->get_pop_closure_app_seq(),
+          preamble_summary->get_pop_evacuation_seq()
+        };
+        NumberSeq calc_other_times_ms(preamble_summary->get_pop_preamble_seq(),
+                                      4, other_parts);
+        check_other_times(2, preamble_summary->get_pop_other_seq(),
+                          &calc_other_times_ms);
+      }
+    }
+    if (body_summary != NULL) {
+      print_summary(1, "SATB Drain", body_summary->get_satb_drain_seq());
+      if (parallel) {
+        print_summary(1, "Parallel Time", body_summary->get_parallel_seq());
+        print_summary(2, "Update RS", body_summary->get_update_rs_seq());
+        print_summary(2, "Ext Root Scanning",
+                      body_summary->get_ext_root_scan_seq());
+        print_summary(2, "Mark Stack Scanning",
+                      body_summary->get_mark_stack_scan_seq());
+        print_summary(2, "Scan-Only Scanning",
+                      body_summary->get_scan_only_seq());
+        print_summary(2, "Scan RS", body_summary->get_scan_rs_seq());
+        print_summary(2, "Object Copy", body_summary->get_obj_copy_seq());
+        print_summary(2, "Termination", body_summary->get_termination_seq());
+        print_summary(2, "Other", body_summary->get_parallel_other_seq());
+        {
+          NumberSeq* other_parts[] = {
+            body_summary->get_update_rs_seq(),
+            body_summary->get_ext_root_scan_seq(),
+            body_summary->get_mark_stack_scan_seq(),
+            body_summary->get_scan_only_seq(),
+            body_summary->get_scan_rs_seq(),
+            body_summary->get_obj_copy_seq(),
+            body_summary->get_termination_seq()
+          };
+          NumberSeq calc_other_times_ms(body_summary->get_parallel_seq(),
+                                        7, other_parts);
+          check_other_times(2, body_summary->get_parallel_other_seq(),
+                            &calc_other_times_ms);
+        }
+        print_summary(1, "Mark Closure", body_summary->get_mark_closure_seq());
+        print_summary(1, "Clear CT", body_summary->get_clear_ct_seq());
+      } else {
+        print_summary(1, "Update RS", body_summary->get_update_rs_seq());
+        print_summary(1, "Ext Root Scanning",
+                      body_summary->get_ext_root_scan_seq());
+        print_summary(1, "Mark Stack Scanning",
+                      body_summary->get_mark_stack_scan_seq());
+        print_summary(1, "Scan-Only Scanning",
+                      body_summary->get_scan_only_seq());
+        print_summary(1, "Scan RS", body_summary->get_scan_rs_seq());
+        print_summary(1, "Object Copy", body_summary->get_obj_copy_seq());
+      }
+    }
+    print_summary(1, "Other", summary->get_other_seq());
+    {
+      NumberSeq calc_other_times_ms;
+      if (body_summary != NULL) {
+        // not abandoned
+        if (parallel) {
+          // parallel
+          NumberSeq* other_parts[] = {
+            body_summary->get_satb_drain_seq(),
+            (preamble_summary == NULL) ? NULL :
+              preamble_summary->get_pop_preamble_seq(),
+            body_summary->get_parallel_seq(),
+            body_summary->get_clear_ct_seq()
+          };
+          calc_other_times_ms = NumberSeq (summary->get_total_seq(),
+                                          4, other_parts);
+        } else {
+          // serial
+          NumberSeq* other_parts[] = {
+            body_summary->get_satb_drain_seq(),
+            (preamble_summary == NULL) ? NULL :
+              preamble_summary->get_pop_preamble_seq(),
+            body_summary->get_update_rs_seq(),
+            body_summary->get_ext_root_scan_seq(),
+            body_summary->get_mark_stack_scan_seq(),
+            body_summary->get_scan_only_seq(),
+            body_summary->get_scan_rs_seq(),
+            body_summary->get_obj_copy_seq()
+          };
+          calc_other_times_ms = NumberSeq(summary->get_total_seq(),
+                                          8, other_parts);
+        }
+      } else {
+        // abandoned
+        NumberSeq* other_parts[] = {
+          (preamble_summary == NULL) ? NULL :
+            preamble_summary->get_pop_preamble_seq()
+        };
+        calc_other_times_ms = NumberSeq(summary->get_total_seq(),
+                                        1, other_parts);
+      }
+      check_other_times(1,  summary->get_other_seq(), &calc_other_times_ms);
+    }
+  } else {
+    print_indent(0);
+    gclog_or_tty->print_cr("none");
+  }
+  gclog_or_tty->print_cr("");
+}
+
+void
+G1CollectorPolicy::print_abandoned_summary(PauseSummary* non_pop_summary,
+                                           PauseSummary* pop_summary) const {
+  bool printed = false;
+  if (non_pop_summary->get_total_seq()->num() > 0) {
+    printed = true;
+    print_summary(non_pop_summary);
+  }
+  if (pop_summary->get_total_seq()->num() > 0) {
+    printed = true;
+    print_summary(pop_summary);
+  }
+
+  if (!printed) {
+    print_indent(0);
+    gclog_or_tty->print_cr("none");
+    gclog_or_tty->print_cr("");
+  }
+}
+
+void G1CollectorPolicy::print_tracing_info() const {
+  if (TraceGen0Time) {
+    gclog_or_tty->print_cr("ALL PAUSES");
+    print_summary_sd(0, "Total", _all_pause_times_ms);
+    gclog_or_tty->print_cr("");
+    gclog_or_tty->print_cr("");
+    gclog_or_tty->print_cr("   Full Young GC Pauses:    %8d", _full_young_pause_num);
+    gclog_or_tty->print_cr("   Partial Young GC Pauses: %8d", _partial_young_pause_num);
+    gclog_or_tty->print_cr("");
+
+    gclog_or_tty->print_cr("NON-POPULAR PAUSES");
+    print_summary(_non_pop_summary);
+
+    gclog_or_tty->print_cr("POPULAR PAUSES");
+    print_summary(_pop_summary);
+
+    gclog_or_tty->print_cr("ABANDONED PAUSES");
+    print_abandoned_summary(_non_pop_abandoned_summary,
+                            _pop_abandoned_summary);
+
+    gclog_or_tty->print_cr("MISC");
+    print_summary_sd(0, "Stop World", _all_stop_world_times_ms);
+    print_summary_sd(0, "Yields", _all_yield_times_ms);
+    for (int i = 0; i < _aux_num; ++i) {
+      if (_all_aux_times_ms[i].num() > 0) {
+        char buffer[96];
+        sprintf(buffer, "Aux%d", i);
+        print_summary_sd(0, buffer, &_all_aux_times_ms[i]);
+      }
+    }
+
+    size_t all_region_num = _region_num_young + _region_num_tenured;
+    gclog_or_tty->print_cr("   New Regions %8d, Young %8d (%6.2lf%%), "
+               "Tenured %8d (%6.2lf%%)",
+               all_region_num,
+               _region_num_young,
+               (double) _region_num_young / (double) all_region_num * 100.0,
+               _region_num_tenured,
+               (double) _region_num_tenured / (double) all_region_num * 100.0);
+
+    if (!G1RSBarrierUseQueue) {
+      gclog_or_tty->print_cr("Of %d times conc refinement was enabled, %d (%7.2f%%) "
+                    "did zero traversals.",
+                    _conc_refine_enabled, _conc_refine_zero_traversals,
+                    _conc_refine_enabled > 0 ?
+                    100.0 * (float)_conc_refine_zero_traversals/
+                    (float)_conc_refine_enabled : 0.0);
+      gclog_or_tty->print_cr("  Max # of traversals = %d.",
+                    _conc_refine_max_traversals);
+      gclog_or_tty->print_cr("");
+    }
+  }
+  if (TraceGen1Time) {
+    if (_all_full_gc_times_ms->num() > 0) {
+      gclog_or_tty->print("\n%4d full_gcs: total time = %8.2f s",
+                 _all_full_gc_times_ms->num(),
+                 _all_full_gc_times_ms->sum() / 1000.0);
+      gclog_or_tty->print_cr(" (avg = %8.2fms).", _all_full_gc_times_ms->avg());
+      gclog_or_tty->print_cr("                     [std. dev = %8.2f ms, max = %8.2f ms]",
+                    _all_full_gc_times_ms->sd(),
+                    _all_full_gc_times_ms->maximum());
+    }
+  }
+}
+
+void G1CollectorPolicy::print_yg_surv_rate_info() const {
+#ifndef PRODUCT
+  _short_lived_surv_rate_group->print_surv_rate_summary();
+  // add this call for any other surv rate groups
+#endif // PRODUCT
+}
+
+void G1CollectorPolicy::update_conc_refine_data() {
+  unsigned traversals = _g1->concurrent_g1_refine()->disable();
+  if (traversals == 0) _conc_refine_zero_traversals++;
+  _conc_refine_max_traversals = MAX2(_conc_refine_max_traversals,
+                                     (size_t)traversals);
+
+  if (G1PolicyVerbose > 1)
+    gclog_or_tty->print_cr("Did a CR traversal series: %d traversals.", traversals);
+  double multiplier = 1.0;
+  if (traversals == 0) {
+    multiplier = 4.0;
+  } else if (traversals > (size_t)G1ConcRefineTargTraversals) {
+    multiplier = 1.0/1.5;
+  } else if (traversals < (size_t)G1ConcRefineTargTraversals) {
+    multiplier = 1.5;
+  }
+  if (G1PolicyVerbose > 1) {
+    gclog_or_tty->print_cr("  Multiplier = %7.2f.", multiplier);
+    gclog_or_tty->print("  Delta went from %d regions to ",
+               _conc_refine_current_delta);
+  }
+  _conc_refine_current_delta =
+    MIN2(_g1->n_regions(),
+         (size_t)(_conc_refine_current_delta * multiplier));
+  _conc_refine_current_delta =
+    MAX2(_conc_refine_current_delta, (size_t)1);
+  if (G1PolicyVerbose > 1) {
+    gclog_or_tty->print_cr("%d regions.", _conc_refine_current_delta);
+  }
+  _conc_refine_enabled++;
+}
+
+void G1CollectorPolicy::set_single_region_collection_set(HeapRegion* hr) {
+  assert(collection_set() == NULL, "Must be no current CS.");
+  _collection_set_size = 0;
+  _collection_set_bytes_used_before = 0;
+  add_to_collection_set(hr);
+  count_CS_bytes_used();
+}
+
+bool
+G1CollectorPolicy::should_add_next_region_to_young_list() {
+  assert(in_young_gc_mode(), "should be in young GC mode");
+  bool ret;
+  size_t young_list_length = _g1->young_list_length();
+
+  if (young_list_length < _young_list_target_length) {
+    ret = true;
+    ++_region_num_young;
+  } else {
+    ret = false;
+    ++_region_num_tenured;
+  }
+
+  return ret;
+}
+
+#ifndef PRODUCT
+// for debugging, bit of a hack...
+static char*
+region_num_to_mbs(int length) {
+  static char buffer[64];
+  double bytes = (double) (length * HeapRegion::GrainBytes);
+  double mbs = bytes / (double) (1024 * 1024);
+  sprintf(buffer, "%7.2lfMB", mbs);
+  return buffer;
+}
+#endif // PRODUCT
+
+void
+G1CollectorPolicy::checkpoint_conc_overhead() {
+  double conc_overhead = 0.0;
+  if (G1AccountConcurrentOverhead)
+    conc_overhead = COTracker::totalPredConcOverhead();
+  _mmu_tracker->update_conc_overhead(conc_overhead);
+#if 0
+  gclog_or_tty->print(" CO %1.4lf TARGET %1.4lf",
+             conc_overhead, _mmu_tracker->max_gc_time());
+#endif
+}
+
+
+uint G1CollectorPolicy::max_regions(int purpose) {
+  switch (purpose) {
+    case GCAllocForSurvived:
+      return G1MaxSurvivorRegions;
+    case GCAllocForTenured:
+      return UINT_MAX;
+    default:
+      return UINT_MAX;
+  };
+}
+
+void
+G1CollectorPolicy_BestRegionsFirst::
+set_single_region_collection_set(HeapRegion* hr) {
+  G1CollectorPolicy::set_single_region_collection_set(hr);
+  _collectionSetChooser->removeRegion(hr);
+}
+
+
+bool
+G1CollectorPolicy_BestRegionsFirst::should_do_collection_pause(size_t
+                                                               word_size) {
+  assert(_g1->regions_accounted_for(), "Region leakage!");
+  // Initiate a pause when we reach the steady-state "used" target.
+  size_t used_hard = (_g1->capacity() / 100) * G1SteadyStateUsed;
+  size_t used_soft =
+   MAX2((_g1->capacity() / 100) * (G1SteadyStateUsed - G1SteadyStateUsedDelta),
+        used_hard/2);
+  size_t used = _g1->used();
+
+  double max_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0;
+
+  size_t young_list_length = _g1->young_list_length();
+  bool reached_target_length = young_list_length >= _young_list_target_length;
+
+  if (in_young_gc_mode()) {
+    if (reached_target_length) {
+      assert( young_list_length > 0 && _g1->young_list_length() > 0,
+              "invariant" );
+      _target_pause_time_ms = max_pause_time_ms;
+      return true;
+    }
+  } else {
+    guarantee( false, "should not reach here" );
+  }
+
+  return false;
+}
+
+#ifndef PRODUCT
+class HRSortIndexIsOKClosure: public HeapRegionClosure {
+  CollectionSetChooser* _chooser;
+public:
+  HRSortIndexIsOKClosure(CollectionSetChooser* chooser) :
+    _chooser(chooser) {}
+
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->continuesHumongous()) {
+      assert(_chooser->regionProperlyOrdered(r), "Ought to be.");
+    }
+    return false;
+  }
+};
+
+bool G1CollectorPolicy_BestRegionsFirst::assertMarkedBytesDataOK() {
+  HRSortIndexIsOKClosure cl(_collectionSetChooser);
+  _g1->heap_region_iterate(&cl);
+  return true;
+}
+#endif
+
+void
+G1CollectorPolicy_BestRegionsFirst::
+record_collection_pause_start(double start_time_sec, size_t start_used) {
+  G1CollectorPolicy::record_collection_pause_start(start_time_sec, start_used);
+}
+
+class NextNonCSElemFinder: public HeapRegionClosure {
+  HeapRegion* _res;
+public:
+  NextNonCSElemFinder(): _res(NULL) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->in_collection_set()) {
+      _res = r;
+      return true;
+    } else {
+      return false;
+    }
+  }
+  HeapRegion* res() { return _res; }
+};
+
+class KnownGarbageClosure: public HeapRegionClosure {
+  CollectionSetChooser* _hrSorted;
+
+public:
+  KnownGarbageClosure(CollectionSetChooser* hrSorted) :
+    _hrSorted(hrSorted)
+  {}
+
+  bool doHeapRegion(HeapRegion* r) {
+    // We only include humongous regions in collection
+    // sets when concurrent mark shows that their contained object is
+    // unreachable.
+
+    // Do we have any marking information for this region?
+    if (r->is_marked()) {
+      // We don't include humongous regions in collection
+      // sets because we collect them immediately at the end of a marking
+      // cycle.  We also don't include young regions because we *must*
+      // include them in the next collection pause.
+      if (!r->isHumongous() && !r->is_young()) {
+        _hrSorted->addMarkedHeapRegion(r);
+      }
+    }
+    return false;
+  }
+};
+
+class ParKnownGarbageHRClosure: public HeapRegionClosure {
+  CollectionSetChooser* _hrSorted;
+  jint _marked_regions_added;
+  jint _chunk_size;
+  jint _cur_chunk_idx;
+  jint _cur_chunk_end; // Cur chunk [_cur_chunk_idx, _cur_chunk_end)
+  int _worker;
+  int _invokes;
+
+  void get_new_chunk() {
+    _cur_chunk_idx = _hrSorted->getParMarkedHeapRegionChunk(_chunk_size);
+    _cur_chunk_end = _cur_chunk_idx + _chunk_size;
+  }
+  void add_region(HeapRegion* r) {
+    if (_cur_chunk_idx == _cur_chunk_end) {
+      get_new_chunk();
+    }
+    assert(_cur_chunk_idx < _cur_chunk_end, "postcondition");
+    _hrSorted->setMarkedHeapRegion(_cur_chunk_idx, r);
+    _marked_regions_added++;
+    _cur_chunk_idx++;
+  }
+
+public:
+  ParKnownGarbageHRClosure(CollectionSetChooser* hrSorted,
+                           jint chunk_size,
+                           int worker) :
+    _hrSorted(hrSorted), _chunk_size(chunk_size), _worker(worker),
+    _marked_regions_added(0), _cur_chunk_idx(0), _cur_chunk_end(0),
+    _invokes(0)
+  {}
+
+  bool doHeapRegion(HeapRegion* r) {
+    // We only include humongous regions in collection
+    // sets when concurrent mark shows that their contained object is
+    // unreachable.
+    _invokes++;
+
+    // Do we have any marking information for this region?
+    if (r->is_marked()) {
+      // We don't include humongous regions in collection
+      // sets because we collect them immediately at the end of a marking
+      // cycle.
+      // We also do not include young regions in collection sets
+      if (!r->isHumongous() && !r->is_young()) {
+        add_region(r);
+      }
+    }
+    return false;
+  }
+  jint marked_regions_added() { return _marked_regions_added; }
+  int invokes() { return _invokes; }
+};
+
+class ParKnownGarbageTask: public AbstractGangTask {
+  CollectionSetChooser* _hrSorted;
+  jint _chunk_size;
+  G1CollectedHeap* _g1;
+public:
+  ParKnownGarbageTask(CollectionSetChooser* hrSorted, jint chunk_size) :
+    AbstractGangTask("ParKnownGarbageTask"),
+    _hrSorted(hrSorted), _chunk_size(chunk_size),
+    _g1(G1CollectedHeap::heap())
+  {}
+
+  void work(int i) {
+    ParKnownGarbageHRClosure parKnownGarbageCl(_hrSorted, _chunk_size, i);
+    // Back to zero for the claim value.
+    _g1->heap_region_par_iterate_chunked(&parKnownGarbageCl, i, 0);
+    jint regions_added = parKnownGarbageCl.marked_regions_added();
+    _hrSorted->incNumMarkedHeapRegions(regions_added);
+    if (G1PrintParCleanupStats) {
+      gclog_or_tty->print("     Thread %d called %d times, added %d regions to list.\n",
+                 i, parKnownGarbageCl.invokes(), regions_added);
+    }
+  }
+};
+
+void
+G1CollectorPolicy_BestRegionsFirst::
+record_concurrent_mark_cleanup_end(size_t freed_bytes,
+                                   size_t max_live_bytes) {
+  double start;
+  if (G1PrintParCleanupStats) start = os::elapsedTime();
+  record_concurrent_mark_cleanup_end_work1(freed_bytes, max_live_bytes);
+
+  _collectionSetChooser->clearMarkedHeapRegions();
+  double clear_marked_end;
+  if (G1PrintParCleanupStats) {
+    clear_marked_end = os::elapsedTime();
+    gclog_or_tty->print_cr("  clear marked regions + work1: %8.3f ms.",
+                  (clear_marked_end - start)*1000.0);
+  }
+  if (ParallelGCThreads > 0) {
+    const size_t OverpartitionFactor = 4;
+    const size_t MinChunkSize = 8;
+    const size_t ChunkSize =
+      MAX2(_g1->n_regions() / (ParallelGCThreads * OverpartitionFactor),
+           MinChunkSize);
+    _collectionSetChooser->prepareForAddMarkedHeapRegionsPar(_g1->n_regions(),
+                                                             ChunkSize);
+    ParKnownGarbageTask parKnownGarbageTask(_collectionSetChooser,
+                                            (int) ChunkSize);
+    _g1->workers()->run_task(&parKnownGarbageTask);
+  } else {
+    KnownGarbageClosure knownGarbagecl(_collectionSetChooser);
+    _g1->heap_region_iterate(&knownGarbagecl);
+  }
+  double known_garbage_end;
+  if (G1PrintParCleanupStats) {
+    known_garbage_end = os::elapsedTime();
+    gclog_or_tty->print_cr("  compute known garbage: %8.3f ms.",
+                  (known_garbage_end - clear_marked_end)*1000.0);
+  }
+  _collectionSetChooser->sortMarkedHeapRegions();
+  double sort_end;
+  if (G1PrintParCleanupStats) {
+    sort_end = os::elapsedTime();
+    gclog_or_tty->print_cr("  sorting: %8.3f ms.",
+                  (sort_end - known_garbage_end)*1000.0);
+  }
+
+  record_concurrent_mark_cleanup_end_work2();
+  double work2_end;
+  if (G1PrintParCleanupStats) {
+    work2_end = os::elapsedTime();
+    gclog_or_tty->print_cr("  work2: %8.3f ms.",
+                  (work2_end - sort_end)*1000.0);
+  }
+}
+
+// Add the heap region to the collection set and return the conservative
+// estimate of the number of live bytes.
+void G1CollectorPolicy::
+add_to_collection_set(HeapRegion* hr) {
+  if (G1TraceRegions) {
+    gclog_or_tty->print_cr("added region to cset %d:["PTR_FORMAT", "PTR_FORMAT"], "
+                  "top "PTR_FORMAT", young %s",
+                  hr->hrs_index(), hr->bottom(), hr->end(),
+                  hr->top(), (hr->is_young()) ? "YES" : "NO");
+  }
+
+  if (_g1->mark_in_progress())
+    _g1->concurrent_mark()->registerCSetRegion(hr);
+
+  assert(!hr->in_collection_set(),
+              "should not already be in the CSet");
+  hr->set_in_collection_set(true);
+  hr->set_next_in_collection_set(_collection_set);
+  _collection_set = hr;
+  _collection_set_size++;
+  _collection_set_bytes_used_before += hr->used();
+}
+
+void
+G1CollectorPolicy_BestRegionsFirst::
+choose_collection_set(HeapRegion* pop_region) {
+  double non_young_start_time_sec;
+  start_recording_regions();
+
+  if (pop_region != NULL) {
+    _target_pause_time_ms = (double) G1MaxPauseTimeMS;
+  } else {
+    guarantee(_target_pause_time_ms > -1.0,
+              "_target_pause_time_ms should have been set!");
+  }
+
+  // pop region is either null (and so is CS), or else it *is* the CS.
+  assert(_collection_set == pop_region, "Precondition");
+
+  double base_time_ms = predict_base_elapsed_time_ms(_pending_cards);
+  double predicted_pause_time_ms = base_time_ms;
+
+  double target_time_ms = _target_pause_time_ms;
+  double time_remaining_ms = target_time_ms - base_time_ms;
+
+  // the 10% and 50% values are arbitrary...
+  if (time_remaining_ms < 0.10*target_time_ms) {
+    time_remaining_ms = 0.50 * target_time_ms;
+    _within_target = false;
+  } else {
+    _within_target = true;
+  }
+
+  // We figure out the number of bytes available for future to-space.
+  // For new regions without marking information, we must assume the
+  // worst-case of complete survival.  If we have marking information for a
+  // region, we can bound the amount of live data.  We can add a number of
+  // such regions, as long as the sum of the live data bounds does not
+  // exceed the available evacuation space.
+  size_t max_live_bytes = _g1->free_regions() * HeapRegion::GrainBytes;
+
+  size_t expansion_bytes =
+    _g1->expansion_regions() * HeapRegion::GrainBytes;
+
+  if (pop_region == NULL) {
+    _collection_set_bytes_used_before = 0;
+    _collection_set_size = 0;
+  }
+
+  // Adjust for expansion and slop.
+  max_live_bytes = max_live_bytes + expansion_bytes;
+
+  assert(pop_region != NULL || _g1->regions_accounted_for(), "Region leakage!");
+
+  HeapRegion* hr;
+  if (in_young_gc_mode()) {
+    double young_start_time_sec = os::elapsedTime();
+
+    if (G1PolicyVerbose > 0) {
+      gclog_or_tty->print_cr("Adding %d young regions to the CSet",
+                    _g1->young_list_length());
+    }
+    _young_cset_length  = 0;
+    _last_young_gc_full = full_young_gcs() ? true : false;
+    if (_last_young_gc_full)
+      ++_full_young_pause_num;
+    else
+      ++_partial_young_pause_num;
+    hr = _g1->pop_region_from_young_list();
+    while (hr != NULL) {
+
+      assert( hr->young_index_in_cset() == -1, "invariant" );
+      assert( hr->age_in_surv_rate_group() != -1, "invariant" );
+      hr->set_young_index_in_cset((int) _young_cset_length);
+
+      ++_young_cset_length;
+      double predicted_time_ms = predict_region_elapsed_time_ms(hr, true);
+      time_remaining_ms -= predicted_time_ms;
+      predicted_pause_time_ms += predicted_time_ms;
+      if (hr == pop_region) {
+        // The popular region was young.  Skip over it.
+        assert(hr->in_collection_set(), "It's the pop region.");
+      } else {
+        assert(!hr->in_collection_set(), "It's not the pop region.");
+        add_to_collection_set(hr);
+        record_cset_region(hr, true);
+      }
+      max_live_bytes -= MIN2(hr->max_live_bytes(), max_live_bytes);
+      if (G1PolicyVerbose > 0) {
+        gclog_or_tty->print_cr("  Added [" PTR_FORMAT ", " PTR_FORMAT") to CS.",
+                      hr->bottom(), hr->end());
+        gclog_or_tty->print_cr("    (" SIZE_FORMAT " KB left in heap.)",
+                      max_live_bytes/K);
+      }
+      hr = _g1->pop_region_from_young_list();
+    }
+
+    record_scan_only_regions(_g1->young_list_scan_only_length());
+
+    double young_end_time_sec = os::elapsedTime();
+    _recorded_young_cset_choice_time_ms =
+      (young_end_time_sec - young_start_time_sec) * 1000.0;
+
+    non_young_start_time_sec = os::elapsedTime();
+
+    if (_young_cset_length > 0 && _last_young_gc_full) {
+      // don't bother adding more regions...
+      goto choose_collection_set_end;
+    }
+  } else if (pop_region != NULL) {
+    // We're not in young mode, and we chose a popular region; don't choose
+    // any more.
+    return;
+  }
+
+  if (!in_young_gc_mode() || !full_young_gcs()) {
+    bool should_continue = true;
+    NumberSeq seq;
+    double avg_prediction = 100000000000000000.0; // something very large
+    do {
+      hr = _collectionSetChooser->getNextMarkedRegion(time_remaining_ms,
+                                                      avg_prediction);
+      if (hr != NULL && !hr->popular()) {
+        double predicted_time_ms = predict_region_elapsed_time_ms(hr, false);
+        time_remaining_ms -= predicted_time_ms;
+        predicted_pause_time_ms += predicted_time_ms;
+        add_to_collection_set(hr);
+        record_cset_region(hr, false);
+        max_live_bytes -= MIN2(hr->max_live_bytes(), max_live_bytes);
+        if (G1PolicyVerbose > 0) {
+          gclog_or_tty->print_cr("    (" SIZE_FORMAT " KB left in heap.)",
+                        max_live_bytes/K);
+        }
+        seq.add(predicted_time_ms);
+        avg_prediction = seq.avg() + seq.sd();
+      }
+      should_continue =
+        ( hr != NULL) &&
+        ( (adaptive_young_list_length()) ? time_remaining_ms > 0.0
+          : _collection_set_size < _young_list_fixed_length );
+    } while (should_continue);
+
+    if (!adaptive_young_list_length() &&
+        _collection_set_size < _young_list_fixed_length)
+      _should_revert_to_full_young_gcs  = true;
+  }
+
+choose_collection_set_end:
+  count_CS_bytes_used();
+
+  end_recording_regions();
+
+  double non_young_end_time_sec = os::elapsedTime();
+  _recorded_non_young_cset_choice_time_ms =
+    (non_young_end_time_sec - non_young_start_time_sec) * 1000.0;
+}
+
+void G1CollectorPolicy_BestRegionsFirst::record_full_collection_end() {
+  G1CollectorPolicy::record_full_collection_end();
+  _collectionSetChooser->updateAfterFullCollection();
+}
+
+void G1CollectorPolicy_BestRegionsFirst::
+expand_if_possible(size_t numRegions) {
+  size_t expansion_bytes = numRegions * HeapRegion::GrainBytes;
+  _g1->expand(expansion_bytes);
+}
+
+void G1CollectorPolicy_BestRegionsFirst::
+record_collection_pause_end(bool popular, bool abandoned) {
+  G1CollectorPolicy::record_collection_pause_end(popular, abandoned);
+  assert(assertMarkedBytesDataOK(), "Marked regions not OK at pause end.");
+}
+
+// Local Variables: ***
+// c-indentation-style: gnu ***
+// End: ***
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,1199 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// A G1CollectorPolicy makes policy decisions that determine the
+// characteristics of the collector.  Examples include:
+//   * choice of collection set.
+//   * when to collect.
+
+class HeapRegion;
+class CollectionSetChooser;
+
+// Yes, this is a bit unpleasant... but it saves replicating the same thing
+// over and over again and introducing subtle problems through small typos and
+// cutting and pasting mistakes. The macros below introduces a number
+// sequnce into the following two classes and the methods that access it.
+
+#define define_num_seq(name)                                                  \
+private:                                                                      \
+  NumberSeq _all_##name##_times_ms;                                           \
+public:                                                                       \
+  void record_##name##_time_ms(double ms) {                                   \
+    _all_##name##_times_ms.add(ms);                                           \
+  }                                                                           \
+  NumberSeq* get_##name##_seq() {                                             \
+    return &_all_##name##_times_ms;                                           \
+  }
+
+class MainBodySummary;
+class PopPreambleSummary;
+
+class PauseSummary {
+  define_num_seq(total)
+    define_num_seq(other)
+
+public:
+  virtual MainBodySummary*    main_body_summary()    { return NULL; }
+  virtual PopPreambleSummary* pop_preamble_summary() { return NULL; }
+};
+
+class MainBodySummary {
+  define_num_seq(satb_drain) // optional
+  define_num_seq(parallel) // parallel only
+    define_num_seq(ext_root_scan)
+    define_num_seq(mark_stack_scan)
+    define_num_seq(scan_only)
+    define_num_seq(update_rs)
+    define_num_seq(scan_rs)
+    define_num_seq(scan_new_refs) // Only for temp use; added to
+                                  // in parallel case.
+    define_num_seq(obj_copy)
+    define_num_seq(termination) // parallel only
+    define_num_seq(parallel_other) // parallel only
+  define_num_seq(mark_closure)
+  define_num_seq(clear_ct)  // parallel only
+};
+
+class PopPreambleSummary {
+  define_num_seq(pop_preamble)
+    define_num_seq(pop_update_rs)
+    define_num_seq(pop_scan_rs)
+    define_num_seq(pop_closure_app)
+    define_num_seq(pop_evacuation)
+    define_num_seq(pop_other)
+};
+
+class NonPopSummary: public PauseSummary,
+                     public MainBodySummary {
+public:
+  virtual MainBodySummary*    main_body_summary()    { return this; }
+};
+
+class PopSummary: public PauseSummary,
+                  public MainBodySummary,
+                  public PopPreambleSummary {
+public:
+  virtual MainBodySummary*    main_body_summary()    { return this; }
+  virtual PopPreambleSummary* pop_preamble_summary() { return this; }
+};
+
+class NonPopAbandonedSummary: public PauseSummary {
+};
+
+class PopAbandonedSummary: public PauseSummary,
+                           public PopPreambleSummary {
+public:
+  virtual PopPreambleSummary* pop_preamble_summary() { return this; }
+};
+
+class G1CollectorPolicy: public CollectorPolicy {
+protected:
+  // The number of pauses during the execution.
+  long _n_pauses;
+
+  // either equal to the number of parallel threads, if ParallelGCThreads
+  // has been set, or 1 otherwise
+  int _parallel_gc_threads;
+
+  enum SomePrivateConstants {
+    NumPrevPausesForHeuristics = 10,
+    NumPrevGCsForHeuristics = 10,
+    NumAPIs = HeapRegion::MaxAge
+  };
+
+  G1MMUTracker* _mmu_tracker;
+
+  void initialize_flags();
+
+  void initialize_all() {
+    initialize_flags();
+    initialize_size_info();
+    initialize_perm_generation(PermGen::MarkSweepCompact);
+  }
+
+  virtual size_t default_init_heap_size() {
+    // Pick some reasonable default.
+    return 8*M;
+  }
+
+
+  double _cur_collection_start_sec;
+  size_t _cur_collection_pause_used_at_start_bytes;
+  size_t _cur_collection_pause_used_regions_at_start;
+  size_t _prev_collection_pause_used_at_end_bytes;
+  double _cur_collection_par_time_ms;
+  double _cur_satb_drain_time_ms;
+  double _cur_clear_ct_time_ms;
+  bool   _satb_drain_time_set;
+  double _cur_popular_preamble_start_ms;
+  double _cur_popular_preamble_time_ms;
+  double _cur_popular_compute_rc_time_ms;
+  double _cur_popular_evac_time_ms;
+
+  double _cur_CH_strong_roots_end_sec;
+  double _cur_CH_strong_roots_dur_ms;
+  double _cur_G1_strong_roots_end_sec;
+  double _cur_G1_strong_roots_dur_ms;
+
+  // Statistics for recent GC pauses.  See below for how indexed.
+  TruncatedSeq* _recent_CH_strong_roots_times_ms;
+  TruncatedSeq* _recent_G1_strong_roots_times_ms;
+  TruncatedSeq* _recent_evac_times_ms;
+  // These exclude marking times.
+  TruncatedSeq* _recent_pause_times_ms;
+  TruncatedSeq* _recent_gc_times_ms;
+
+  TruncatedSeq* _recent_CS_bytes_used_before;
+  TruncatedSeq* _recent_CS_bytes_surviving;
+
+  TruncatedSeq* _recent_rs_sizes;
+
+  TruncatedSeq* _concurrent_mark_init_times_ms;
+  TruncatedSeq* _concurrent_mark_remark_times_ms;
+  TruncatedSeq* _concurrent_mark_cleanup_times_ms;
+
+  NonPopSummary*           _non_pop_summary;
+  PopSummary*              _pop_summary;
+  NonPopAbandonedSummary*  _non_pop_abandoned_summary;
+  PopAbandonedSummary*     _pop_abandoned_summary;
+
+  NumberSeq* _all_pause_times_ms;
+  NumberSeq* _all_full_gc_times_ms;
+  double _stop_world_start;
+  NumberSeq* _all_stop_world_times_ms;
+  NumberSeq* _all_yield_times_ms;
+
+  size_t     _region_num_young;
+  size_t     _region_num_tenured;
+  size_t     _prev_region_num_young;
+  size_t     _prev_region_num_tenured;
+
+  NumberSeq* _all_mod_union_times_ms;
+
+  int        _aux_num;
+  NumberSeq* _all_aux_times_ms;
+  double*    _cur_aux_start_times_ms;
+  double*    _cur_aux_times_ms;
+  bool*      _cur_aux_times_set;
+
+  double* _par_last_ext_root_scan_times_ms;
+  double* _par_last_mark_stack_scan_times_ms;
+  double* _par_last_scan_only_times_ms;
+  double* _par_last_scan_only_regions_scanned;
+  double* _par_last_update_rs_start_times_ms;
+  double* _par_last_update_rs_times_ms;
+  double* _par_last_update_rs_processed_buffers;
+  double* _par_last_scan_rs_start_times_ms;
+  double* _par_last_scan_rs_times_ms;
+  double* _par_last_scan_new_refs_times_ms;
+  double* _par_last_obj_copy_times_ms;
+  double* _par_last_termination_times_ms;
+
+  // there are two pases during popular pauses, so we need to store
+  // somewhere the results of the first pass
+  double* _pop_par_last_update_rs_start_times_ms;
+  double* _pop_par_last_update_rs_times_ms;
+  double* _pop_par_last_update_rs_processed_buffers;
+  double* _pop_par_last_scan_rs_start_times_ms;
+  double* _pop_par_last_scan_rs_times_ms;
+  double* _pop_par_last_closure_app_times_ms;
+
+  double _pop_compute_rc_start;
+  double _pop_evac_start;
+
+  // indicates that we are in young GC mode
+  bool _in_young_gc_mode;
+
+  // indicates whether we are in full young or partially young GC mode
+  bool _full_young_gcs;
+
+  // if true, then it tries to dynamically adjust the length of the
+  // young list
+  bool _adaptive_young_list_length;
+  size_t _young_list_min_length;
+  size_t _young_list_target_length;
+  size_t _young_list_so_prefix_length;
+  size_t _young_list_fixed_length;
+
+  size_t _young_cset_length;
+  bool   _last_young_gc_full;
+
+  double _target_pause_time_ms;
+
+  unsigned              _full_young_pause_num;
+  unsigned              _partial_young_pause_num;
+
+  bool                  _during_marking;
+  bool                  _in_marking_window;
+  bool                  _in_marking_window_im;
+
+  SurvRateGroup*        _short_lived_surv_rate_group;
+  SurvRateGroup*        _survivor_surv_rate_group;
+  // add here any more surv rate groups
+
+  bool during_marking() {
+    return _during_marking;
+  }
+
+  // <NEW PREDICTION>
+
+private:
+  enum PredictionConstants {
+    TruncatedSeqLength = 10
+  };
+
+  TruncatedSeq* _alloc_rate_ms_seq;
+  double        _prev_collection_pause_end_ms;
+
+  TruncatedSeq* _pending_card_diff_seq;
+  TruncatedSeq* _rs_length_diff_seq;
+  TruncatedSeq* _cost_per_card_ms_seq;
+  TruncatedSeq* _cost_per_scan_only_region_ms_seq;
+  TruncatedSeq* _fully_young_cards_per_entry_ratio_seq;
+  TruncatedSeq* _partially_young_cards_per_entry_ratio_seq;
+  TruncatedSeq* _cost_per_entry_ms_seq;
+  TruncatedSeq* _partially_young_cost_per_entry_ms_seq;
+  TruncatedSeq* _cost_per_byte_ms_seq;
+  TruncatedSeq* _constant_other_time_ms_seq;
+  TruncatedSeq* _young_other_cost_per_region_ms_seq;
+  TruncatedSeq* _non_young_other_cost_per_region_ms_seq;
+
+  TruncatedSeq* _pending_cards_seq;
+  TruncatedSeq* _scanned_cards_seq;
+  TruncatedSeq* _rs_lengths_seq;
+
+  TruncatedSeq* _cost_per_byte_ms_during_cm_seq;
+  TruncatedSeq* _cost_per_scan_only_region_ms_during_cm_seq;
+
+  TruncatedSeq* _young_gc_eff_seq;
+
+  TruncatedSeq* _max_conc_overhead_seq;
+
+  size_t _recorded_young_regions;
+  size_t _recorded_scan_only_regions;
+  size_t _recorded_non_young_regions;
+  size_t _recorded_region_num;
+
+  size_t _free_regions_at_end_of_collection;
+  size_t _scan_only_regions_at_end_of_collection;
+
+  size_t _recorded_rs_lengths;
+  size_t _max_rs_lengths;
+
+  size_t _recorded_marked_bytes;
+  size_t _recorded_young_bytes;
+
+  size_t _predicted_pending_cards;
+  size_t _predicted_cards_scanned;
+  size_t _predicted_rs_lengths;
+  size_t _predicted_bytes_to_copy;
+
+  double _predicted_survival_ratio;
+  double _predicted_rs_update_time_ms;
+  double _predicted_rs_scan_time_ms;
+  double _predicted_scan_only_scan_time_ms;
+  double _predicted_object_copy_time_ms;
+  double _predicted_constant_other_time_ms;
+  double _predicted_young_other_time_ms;
+  double _predicted_non_young_other_time_ms;
+  double _predicted_pause_time_ms;
+
+  double _vtime_diff_ms;
+
+  double _recorded_young_free_cset_time_ms;
+  double _recorded_non_young_free_cset_time_ms;
+
+  double _sigma;
+  double _expensive_region_limit_ms;
+
+  size_t _rs_lengths_prediction;
+
+  size_t _known_garbage_bytes;
+  double _known_garbage_ratio;
+
+  double sigma() {
+    return _sigma;
+  }
+
+  // A function that prevents us putting too much stock in small sample
+  // sets.  Returns a number between 2.0 and 1.0, depending on the number
+  // of samples.  5 or more samples yields one; fewer scales linearly from
+  // 2.0 at 1 sample to 1.0 at 5.
+  double confidence_factor(int samples) {
+    if (samples > 4) return 1.0;
+    else return  1.0 + sigma() * ((double)(5 - samples))/2.0;
+  }
+
+  double get_new_neg_prediction(TruncatedSeq* seq) {
+    return seq->davg() - sigma() * seq->dsd();
+  }
+
+#ifndef PRODUCT
+  bool verify_young_ages(HeapRegion* head, SurvRateGroup *surv_rate_group);
+#endif // PRODUCT
+
+protected:
+  double _pause_time_target_ms;
+  double _recorded_young_cset_choice_time_ms;
+  double _recorded_non_young_cset_choice_time_ms;
+  bool   _within_target;
+  size_t _pending_cards;
+  size_t _max_pending_cards;
+
+public:
+
+  void set_region_short_lived(HeapRegion* hr) {
+    hr->install_surv_rate_group(_short_lived_surv_rate_group);
+  }
+
+  void set_region_survivors(HeapRegion* hr) {
+    hr->install_surv_rate_group(_survivor_surv_rate_group);
+  }
+
+#ifndef PRODUCT
+  bool verify_young_ages();
+#endif // PRODUCT
+
+  void tag_scan_only(size_t short_lived_scan_only_length);
+
+  double get_new_prediction(TruncatedSeq* seq) {
+    return MAX2(seq->davg() + sigma() * seq->dsd(),
+                seq->davg() * confidence_factor(seq->num()));
+  }
+
+  size_t young_cset_length() {
+    return _young_cset_length;
+  }
+
+  void record_max_rs_lengths(size_t rs_lengths) {
+    _max_rs_lengths = rs_lengths;
+  }
+
+  size_t predict_pending_card_diff() {
+    double prediction = get_new_neg_prediction(_pending_card_diff_seq);
+    if (prediction < 0.00001)
+      return 0;
+    else
+      return (size_t) prediction;
+  }
+
+  size_t predict_pending_cards() {
+    size_t max_pending_card_num = _g1->max_pending_card_num();
+    size_t diff = predict_pending_card_diff();
+    size_t prediction;
+    if (diff > max_pending_card_num)
+      prediction = max_pending_card_num;
+    else
+      prediction = max_pending_card_num - diff;
+
+    return prediction;
+  }
+
+  size_t predict_rs_length_diff() {
+    return (size_t) get_new_prediction(_rs_length_diff_seq);
+  }
+
+  double predict_alloc_rate_ms() {
+    return get_new_prediction(_alloc_rate_ms_seq);
+  }
+
+  double predict_cost_per_card_ms() {
+    return get_new_prediction(_cost_per_card_ms_seq);
+  }
+
+  double predict_rs_update_time_ms(size_t pending_cards) {
+    return (double) pending_cards * predict_cost_per_card_ms();
+  }
+
+  double predict_fully_young_cards_per_entry_ratio() {
+    return get_new_prediction(_fully_young_cards_per_entry_ratio_seq);
+  }
+
+  double predict_partially_young_cards_per_entry_ratio() {
+    if (_partially_young_cards_per_entry_ratio_seq->num() < 2)
+      return predict_fully_young_cards_per_entry_ratio();
+    else
+      return get_new_prediction(_partially_young_cards_per_entry_ratio_seq);
+  }
+
+  size_t predict_young_card_num(size_t rs_length) {
+    return (size_t) ((double) rs_length *
+                     predict_fully_young_cards_per_entry_ratio());
+  }
+
+  size_t predict_non_young_card_num(size_t rs_length) {
+    return (size_t) ((double) rs_length *
+                     predict_partially_young_cards_per_entry_ratio());
+  }
+
+  double predict_rs_scan_time_ms(size_t card_num) {
+    if (full_young_gcs())
+      return (double) card_num * get_new_prediction(_cost_per_entry_ms_seq);
+    else
+      return predict_partially_young_rs_scan_time_ms(card_num);
+  }
+
+  double predict_partially_young_rs_scan_time_ms(size_t card_num) {
+    if (_partially_young_cost_per_entry_ms_seq->num() < 3)
+      return (double) card_num * get_new_prediction(_cost_per_entry_ms_seq);
+    else
+      return (double) card_num *
+        get_new_prediction(_partially_young_cost_per_entry_ms_seq);
+  }
+
+  double predict_scan_only_time_ms_during_cm(size_t scan_only_region_num) {
+    if (_cost_per_scan_only_region_ms_during_cm_seq->num() < 3)
+      return 1.5 * (double) scan_only_region_num *
+        get_new_prediction(_cost_per_scan_only_region_ms_seq);
+    else
+      return (double) scan_only_region_num *
+        get_new_prediction(_cost_per_scan_only_region_ms_during_cm_seq);
+  }
+
+  double predict_scan_only_time_ms(size_t scan_only_region_num) {
+    if (_in_marking_window_im)
+      return predict_scan_only_time_ms_during_cm(scan_only_region_num);
+    else
+      return (double) scan_only_region_num *
+        get_new_prediction(_cost_per_scan_only_region_ms_seq);
+  }
+
+  double predict_object_copy_time_ms_during_cm(size_t bytes_to_copy) {
+    if (_cost_per_byte_ms_during_cm_seq->num() < 3)
+      return 1.1 * (double) bytes_to_copy *
+        get_new_prediction(_cost_per_byte_ms_seq);
+    else
+      return (double) bytes_to_copy *
+        get_new_prediction(_cost_per_byte_ms_during_cm_seq);
+  }
+
+  double predict_object_copy_time_ms(size_t bytes_to_copy) {
+    if (_in_marking_window && !_in_marking_window_im)
+      return predict_object_copy_time_ms_during_cm(bytes_to_copy);
+    else
+      return (double) bytes_to_copy *
+        get_new_prediction(_cost_per_byte_ms_seq);
+  }
+
+  double predict_constant_other_time_ms() {
+    return get_new_prediction(_constant_other_time_ms_seq);
+  }
+
+  double predict_young_other_time_ms(size_t young_num) {
+    return
+      (double) young_num *
+      get_new_prediction(_young_other_cost_per_region_ms_seq);
+  }
+
+  double predict_non_young_other_time_ms(size_t non_young_num) {
+    return
+      (double) non_young_num *
+      get_new_prediction(_non_young_other_cost_per_region_ms_seq);
+  }
+
+  void check_if_region_is_too_expensive(double predicted_time_ms);
+
+  double predict_young_collection_elapsed_time_ms(size_t adjustment);
+  double predict_base_elapsed_time_ms(size_t pending_cards);
+  double predict_base_elapsed_time_ms(size_t pending_cards,
+                                      size_t scanned_cards);
+  size_t predict_bytes_to_copy(HeapRegion* hr);
+  double predict_region_elapsed_time_ms(HeapRegion* hr, bool young);
+
+  // for use by: calculate_optimal_so_length(length)
+  void predict_gc_eff(size_t young_region_num,
+                      size_t so_length,
+                      double base_time_ms,
+                      double *gc_eff,
+                      double *pause_time_ms);
+
+  // for use by: calculate_young_list_target_config(rs_length)
+  bool predict_gc_eff(size_t young_region_num,
+                      size_t so_length,
+                      double base_time_with_so_ms,
+                      size_t init_free_regions,
+                      double target_pause_time_ms,
+                      double* gc_eff);
+
+  void start_recording_regions();
+  void record_cset_region(HeapRegion* hr, bool young);
+  void record_scan_only_regions(size_t scan_only_length);
+  void end_recording_regions();
+
+  void record_vtime_diff_ms(double vtime_diff_ms) {
+    _vtime_diff_ms = vtime_diff_ms;
+  }
+
+  void record_young_free_cset_time_ms(double time_ms) {
+    _recorded_young_free_cset_time_ms = time_ms;
+  }
+
+  void record_non_young_free_cset_time_ms(double time_ms) {
+    _recorded_non_young_free_cset_time_ms = time_ms;
+  }
+
+  double predict_young_gc_eff() {
+    return get_new_neg_prediction(_young_gc_eff_seq);
+  }
+
+  // </NEW PREDICTION>
+
+public:
+  void cset_regions_freed() {
+    bool propagate = _last_young_gc_full && !_in_marking_window;
+    _short_lived_surv_rate_group->all_surviving_words_recorded(propagate);
+    _survivor_surv_rate_group->all_surviving_words_recorded(propagate);
+    // also call it on any more surv rate groups
+  }
+
+  void set_known_garbage_bytes(size_t known_garbage_bytes) {
+    _known_garbage_bytes = known_garbage_bytes;
+    size_t heap_bytes = _g1->capacity();
+    _known_garbage_ratio = (double) _known_garbage_bytes / (double) heap_bytes;
+  }
+
+  void decrease_known_garbage_bytes(size_t known_garbage_bytes) {
+    guarantee( _known_garbage_bytes >= known_garbage_bytes, "invariant" );
+
+    _known_garbage_bytes -= known_garbage_bytes;
+    size_t heap_bytes = _g1->capacity();
+    _known_garbage_ratio = (double) _known_garbage_bytes / (double) heap_bytes;
+  }
+
+  G1MMUTracker* mmu_tracker() {
+    return _mmu_tracker;
+  }
+
+  double predict_init_time_ms() {
+    return get_new_prediction(_concurrent_mark_init_times_ms);
+  }
+
+  double predict_remark_time_ms() {
+    return get_new_prediction(_concurrent_mark_remark_times_ms);
+  }
+
+  double predict_cleanup_time_ms() {
+    return get_new_prediction(_concurrent_mark_cleanup_times_ms);
+  }
+
+  // Returns an estimate of the survival rate of the region at yg-age
+  // "yg_age".
+  double predict_yg_surv_rate(int age) {
+    TruncatedSeq* seq = _short_lived_surv_rate_group->get_seq(age);
+    if (seq->num() == 0)
+      gclog_or_tty->print("BARF! age is %d", age);
+    guarantee( seq->num() > 0, "invariant" );
+    double pred = get_new_prediction(seq);
+    if (pred > 1.0)
+      pred = 1.0;
+    return pred;
+  }
+
+  double accum_yg_surv_rate_pred(int age) {
+    return _short_lived_surv_rate_group->accum_surv_rate_pred(age);
+  }
+
+protected:
+  void print_stats (int level, const char* str, double value);
+  void print_stats (int level, const char* str, int value);
+  void print_par_stats (int level, const char* str, double* data) {
+    print_par_stats(level, str, data, true);
+  }
+  void print_par_stats (int level, const char* str, double* data, bool summary);
+  void print_par_buffers (int level, const char* str, double* data, bool summary);
+
+  void check_other_times(int level,
+                         NumberSeq* other_times_ms,
+                         NumberSeq* calc_other_times_ms) const;
+
+  void print_summary (PauseSummary* stats) const;
+  void print_abandoned_summary(PauseSummary* non_pop_summary,
+                               PauseSummary* pop_summary) const;
+
+  void print_summary (int level, const char* str, NumberSeq* seq) const;
+  void print_summary_sd (int level, const char* str, NumberSeq* seq) const;
+
+  double avg_value (double* data);
+  double max_value (double* data);
+  double sum_of_values (double* data);
+  double max_sum (double* data1, double* data2);
+
+  int _last_satb_drain_processed_buffers;
+  int _last_update_rs_processed_buffers;
+  double _last_pause_time_ms;
+
+  size_t _bytes_in_to_space_before_gc;
+  size_t _bytes_in_to_space_after_gc;
+  size_t bytes_in_to_space_during_gc() {
+    return
+      _bytes_in_to_space_after_gc - _bytes_in_to_space_before_gc;
+  }
+  size_t _bytes_in_collection_set_before_gc;
+  // Used to count used bytes in CS.
+  friend class CountCSClosure;
+
+  // Statistics kept per GC stoppage, pause or full.
+  TruncatedSeq* _recent_prev_end_times_for_all_gcs_sec;
+
+  // We track markings.
+  int _num_markings;
+  double _mark_thread_startup_sec;       // Time at startup of marking thread
+
+  // Add a new GC of the given duration and end time to the record.
+  void update_recent_gc_times(double end_time_sec, double elapsed_ms);
+
+  // The head of the list (via "next_in_collection_set()") representing the
+  // current collection set.
+  HeapRegion* _collection_set;
+  size_t _collection_set_size;
+  size_t _collection_set_bytes_used_before;
+
+  // Info about marking.
+  int _n_marks; // Sticky at 2, so we know when we've done at least 2.
+
+  // The number of collection pauses at the end of the last mark.
+  size_t _n_pauses_at_mark_end;
+
+  // ==== This section is for stats related to starting Conc Refinement on time.
+  size_t _conc_refine_enabled;
+  size_t _conc_refine_zero_traversals;
+  size_t _conc_refine_max_traversals;
+  // In # of heap regions.
+  size_t _conc_refine_current_delta;
+
+  // At the beginning of a collection pause, update the variables above,
+  // especially the "delta".
+  void update_conc_refine_data();
+  // ====
+
+  // Stash a pointer to the g1 heap.
+  G1CollectedHeap* _g1;
+
+  // The average time in ms per collection pause, averaged over recent pauses.
+  double recent_avg_time_for_pauses_ms();
+
+  // The average time in ms for processing CollectedHeap strong roots, per
+  // collection pause, averaged over recent pauses.
+  double recent_avg_time_for_CH_strong_ms();
+
+  // The average time in ms for processing the G1 remembered set, per
+  // pause, averaged over recent pauses.
+  double recent_avg_time_for_G1_strong_ms();
+
+  // The average time in ms for "evacuating followers", per pause, averaged
+  // over recent pauses.
+  double recent_avg_time_for_evac_ms();
+
+  // The number of "recent" GCs recorded in the number sequences
+  int number_of_recent_gcs();
+
+  // The average survival ratio, computed by the total number of bytes
+  // suriviving / total number of bytes before collection over the last
+  // several recent pauses.
+  double recent_avg_survival_fraction();
+  // The survival fraction of the most recent pause; if there have been no
+  // pauses, returns 1.0.
+  double last_survival_fraction();
+
+  // Returns a "conservative" estimate of the recent survival rate, i.e.,
+  // one that may be higher than "recent_avg_survival_fraction".
+  // This is conservative in several ways:
+  //   If there have been few pauses, it will assume a potential high
+  //     variance, and err on the side of caution.
+  //   It puts a lower bound (currently 0.1) on the value it will return.
+  //   To try to detect phase changes, if the most recent pause ("latest") has a
+  //     higher-than average ("avg") survival rate, it returns that rate.
+  // "work" version is a utility function; young is restricted to young regions.
+  double conservative_avg_survival_fraction_work(double avg,
+                                                 double latest);
+
+  // The arguments are the two sequences that keep track of the number of bytes
+  //   surviving and the total number of bytes before collection, resp.,
+  //   over the last evereal recent pauses
+  // Returns the survival rate for the category in the most recent pause.
+  // If there have been no pauses, returns 1.0.
+  double last_survival_fraction_work(TruncatedSeq* surviving,
+                                     TruncatedSeq* before);
+
+  // The arguments are the two sequences that keep track of the number of bytes
+  //   surviving and the total number of bytes before collection, resp.,
+  //   over the last several recent pauses
+  // Returns the average survival ration over the last several recent pauses
+  // If there have been no pauses, return 1.0
+  double recent_avg_survival_fraction_work(TruncatedSeq* surviving,
+                                           TruncatedSeq* before);
+
+  double conservative_avg_survival_fraction() {
+    double avg = recent_avg_survival_fraction();
+    double latest = last_survival_fraction();
+    return conservative_avg_survival_fraction_work(avg, latest);
+  }
+
+  // The ratio of gc time to elapsed time, computed over recent pauses.
+  double _recent_avg_pause_time_ratio;
+
+  double recent_avg_pause_time_ratio() {
+    return _recent_avg_pause_time_ratio;
+  }
+
+  // Number of pauses between concurrent marking.
+  size_t _pauses_btwn_concurrent_mark;
+
+  size_t _n_marks_since_last_pause;
+
+  // True iff CM has been initiated.
+  bool _conc_mark_initiated;
+
+  // True iff CM should be initiated
+  bool _should_initiate_conc_mark;
+  bool _should_revert_to_full_young_gcs;
+  bool _last_full_young_gc;
+
+  // This set of variables tracks the collector efficiency, in order to
+  // determine whether we should initiate a new marking.
+  double _cur_mark_stop_world_time_ms;
+  double _mark_init_start_sec;
+  double _mark_remark_start_sec;
+  double _mark_cleanup_start_sec;
+  double _mark_closure_time_ms;
+
+  void   calculate_young_list_min_length();
+  void   calculate_young_list_target_config();
+  void   calculate_young_list_target_config(size_t rs_lengths);
+  size_t calculate_optimal_so_length(size_t young_list_length);
+
+public:
+
+  G1CollectorPolicy();
+
+  virtual G1CollectorPolicy* as_g1_policy() { return this; }
+
+  virtual CollectorPolicy::Name kind() {
+    return CollectorPolicy::G1CollectorPolicyKind;
+  }
+
+  void check_prediction_validity();
+
+  size_t bytes_in_collection_set() {
+    return _bytes_in_collection_set_before_gc;
+  }
+
+  size_t bytes_in_to_space() {
+    return bytes_in_to_space_during_gc();
+  }
+
+  unsigned calc_gc_alloc_time_stamp() {
+    return _all_pause_times_ms->num() + 1;
+  }
+
+protected:
+
+  // Count the number of bytes used in the CS.
+  void count_CS_bytes_used();
+
+  // Together these do the base cleanup-recording work.  Subclasses might
+  // want to put something between them.
+  void record_concurrent_mark_cleanup_end_work1(size_t freed_bytes,
+                                                size_t max_live_bytes);
+  void record_concurrent_mark_cleanup_end_work2();
+
+public:
+
+  virtual void init();
+
+  virtual HeapWord* mem_allocate_work(size_t size,
+                                      bool is_tlab,
+                                      bool* gc_overhead_limit_was_exceeded);
+
+  // This method controls how a collector handles one or more
+  // of its generations being fully allocated.
+  virtual HeapWord* satisfy_failed_allocation(size_t size,
+                                              bool is_tlab);
+
+  BarrierSet::Name barrier_set_name() { return BarrierSet::G1SATBCTLogging; }
+
+  GenRemSet::Name  rem_set_name()     { return GenRemSet::CardTable; }
+
+  // The number of collection pauses so far.
+  long n_pauses() const { return _n_pauses; }
+
+  // Update the heuristic info to record a collection pause of the given
+  // start time, where the given number of bytes were used at the start.
+  // This may involve changing the desired size of a collection set.
+
+  virtual void record_stop_world_start();
+
+  virtual void record_collection_pause_start(double start_time_sec,
+                                             size_t start_used);
+
+  virtual void record_popular_pause_preamble_start();
+  virtual void record_popular_pause_preamble_end();
+
+  // Must currently be called while the world is stopped.
+  virtual void record_concurrent_mark_init_start();
+  virtual void record_concurrent_mark_init_end();
+  void record_concurrent_mark_init_end_pre(double
+                                           mark_init_elapsed_time_ms);
+
+  void record_mark_closure_time(double mark_closure_time_ms);
+
+  virtual void record_concurrent_mark_remark_start();
+  virtual void record_concurrent_mark_remark_end();
+
+  virtual void record_concurrent_mark_cleanup_start();
+  virtual void record_concurrent_mark_cleanup_end(size_t freed_bytes,
+                                                  size_t max_live_bytes);
+  virtual void record_concurrent_mark_cleanup_completed();
+
+  virtual void record_concurrent_pause();
+  virtual void record_concurrent_pause_end();
+
+  virtual void record_collection_pause_end_CH_strong_roots();
+  virtual void record_collection_pause_end_G1_strong_roots();
+
+  virtual void record_collection_pause_end(bool popular, bool abandoned);
+
+  // Record the fact that a full collection occurred.
+  virtual void record_full_collection_start();
+  virtual void record_full_collection_end();
+
+  void record_ext_root_scan_time(int worker_i, double ms) {
+    _par_last_ext_root_scan_times_ms[worker_i] = ms;
+  }
+
+  void record_mark_stack_scan_time(int worker_i, double ms) {
+    _par_last_mark_stack_scan_times_ms[worker_i] = ms;
+  }
+
+  void record_scan_only_time(int worker_i, double ms, int n) {
+    _par_last_scan_only_times_ms[worker_i] = ms;
+    _par_last_scan_only_regions_scanned[worker_i] = (double) n;
+  }
+
+  void record_satb_drain_time(double ms) {
+    _cur_satb_drain_time_ms = ms;
+    _satb_drain_time_set    = true;
+  }
+
+  void record_satb_drain_processed_buffers (int processed_buffers) {
+    _last_satb_drain_processed_buffers = processed_buffers;
+  }
+
+  void record_mod_union_time(double ms) {
+    _all_mod_union_times_ms->add(ms);
+  }
+
+  void record_update_rs_start_time(int thread, double ms) {
+    _par_last_update_rs_start_times_ms[thread] = ms;
+  }
+
+  void record_update_rs_time(int thread, double ms) {
+    _par_last_update_rs_times_ms[thread] = ms;
+  }
+
+  void record_update_rs_processed_buffers (int thread,
+                                           double processed_buffers) {
+    _par_last_update_rs_processed_buffers[thread] = processed_buffers;
+  }
+
+  void record_scan_rs_start_time(int thread, double ms) {
+    _par_last_scan_rs_start_times_ms[thread] = ms;
+  }
+
+  void record_scan_rs_time(int thread, double ms) {
+    _par_last_scan_rs_times_ms[thread] = ms;
+  }
+
+  void record_scan_new_refs_time(int thread, double ms) {
+    _par_last_scan_new_refs_times_ms[thread] = ms;
+  }
+
+  double get_scan_new_refs_time(int thread) {
+    return _par_last_scan_new_refs_times_ms[thread];
+  }
+
+  void reset_obj_copy_time(int thread) {
+    _par_last_obj_copy_times_ms[thread] = 0.0;
+  }
+
+  void reset_obj_copy_time() {
+    reset_obj_copy_time(0);
+  }
+
+  void record_obj_copy_time(int thread, double ms) {
+    _par_last_obj_copy_times_ms[thread] += ms;
+  }
+
+  void record_obj_copy_time(double ms) {
+    record_obj_copy_time(0, ms);
+  }
+
+  void record_termination_time(int thread, double ms) {
+    _par_last_termination_times_ms[thread] = ms;
+  }
+
+  void record_termination_time(double ms) {
+    record_termination_time(0, ms);
+  }
+
+  void record_pause_time(double ms) {
+    _last_pause_time_ms = ms;
+  }
+
+  void record_clear_ct_time(double ms) {
+    _cur_clear_ct_time_ms = ms;
+  }
+
+  void record_par_time(double ms) {
+    _cur_collection_par_time_ms = ms;
+  }
+
+  void record_aux_start_time(int i) {
+    guarantee(i < _aux_num, "should be within range");
+    _cur_aux_start_times_ms[i] = os::elapsedTime() * 1000.0;
+  }
+
+  void record_aux_end_time(int i) {
+    guarantee(i < _aux_num, "should be within range");
+    double ms = os::elapsedTime() * 1000.0 - _cur_aux_start_times_ms[i];
+    _cur_aux_times_set[i] = true;
+    _cur_aux_times_ms[i] += ms;
+  }
+
+  void record_pop_compute_rc_start();
+  void record_pop_compute_rc_end();
+
+  void record_pop_evac_start();
+  void record_pop_evac_end();
+
+  // Record the fact that "bytes" bytes allocated in a region.
+  void record_before_bytes(size_t bytes);
+  void record_after_bytes(size_t bytes);
+
+  // Returns "true" if this is a good time to do a collection pause.
+  // The "word_size" argument, if non-zero, indicates the size of an
+  // allocation request that is prompting this query.
+  virtual bool should_do_collection_pause(size_t word_size) = 0;
+
+  // Choose a new collection set.  Marks the chosen regions as being
+  // "in_collection_set", and links them together.  The head and number of
+  // the collection set are available via access methods.
+  // If "pop_region" is non-NULL, it is a popular region that has already
+  // been added to the collection set.
+  virtual void choose_collection_set(HeapRegion* pop_region = NULL) = 0;
+
+  void clear_collection_set() { _collection_set = NULL; }
+
+  // The head of the list (via "next_in_collection_set()") representing the
+  // current collection set.
+  HeapRegion* collection_set() { return _collection_set; }
+
+  // Sets the collection set to the given single region.
+  virtual void set_single_region_collection_set(HeapRegion* hr);
+
+  // The number of elements in the current collection set.
+  size_t collection_set_size() { return _collection_set_size; }
+
+  // Add "hr" to the CS.
+  void add_to_collection_set(HeapRegion* hr);
+
+  bool should_initiate_conc_mark()      { return _should_initiate_conc_mark; }
+  void set_should_initiate_conc_mark()  { _should_initiate_conc_mark = true; }
+  void unset_should_initiate_conc_mark(){ _should_initiate_conc_mark = false; }
+
+  void checkpoint_conc_overhead();
+
+  // If an expansion would be appropriate, because recent GC overhead had
+  // exceeded the desired limit, return an amount to expand by.
+  virtual size_t expansion_amount();
+
+  // note start of mark thread
+  void note_start_of_mark_thread();
+
+  // The marked bytes of the "r" has changed; reclassify it's desirability
+  // for marking.  Also asserts that "r" is eligible for a CS.
+  virtual void note_change_in_marked_bytes(HeapRegion* r) = 0;
+
+#ifndef PRODUCT
+  // Check any appropriate marked bytes info, asserting false if
+  // something's wrong, else returning "true".
+  virtual bool assertMarkedBytesDataOK() = 0;
+#endif
+
+  // Print tracing information.
+  void print_tracing_info() const;
+
+  // Print stats on young survival ratio
+  void print_yg_surv_rate_info() const;
+
+  void finished_recalculating_age_indexes() {
+    _short_lived_surv_rate_group->finished_recalculating_age_indexes();
+    // do that for any other surv rate groups
+  }
+
+  bool should_add_next_region_to_young_list();
+
+  bool in_young_gc_mode() {
+    return _in_young_gc_mode;
+  }
+  void set_in_young_gc_mode(bool in_young_gc_mode) {
+    _in_young_gc_mode = in_young_gc_mode;
+  }
+
+  bool full_young_gcs() {
+    return _full_young_gcs;
+  }
+  void set_full_young_gcs(bool full_young_gcs) {
+    _full_young_gcs = full_young_gcs;
+  }
+
+  bool adaptive_young_list_length() {
+    return _adaptive_young_list_length;
+  }
+  void set_adaptive_young_list_length(bool adaptive_young_list_length) {
+    _adaptive_young_list_length = adaptive_young_list_length;
+  }
+
+  inline double get_gc_eff_factor() {
+    double ratio = _known_garbage_ratio;
+
+    double square = ratio * ratio;
+    // square = square * square;
+    double ret = square * 9.0 + 1.0;
+#if 0
+    gclog_or_tty->print_cr("ratio = %1.2lf, ret = %1.2lf", ratio, ret);
+#endif // 0
+    guarantee(0.0 <= ret && ret < 10.0, "invariant!");
+    return ret;
+  }
+
+  //
+  // Survivor regions policy.
+  //
+protected:
+
+  // Current tenuring threshold, set to 0 if the collector reaches the
+  // maximum amount of suvivors regions.
+  int _tenuring_threshold;
+
+public:
+
+  inline GCAllocPurpose
+    evacuation_destination(HeapRegion* src_region, int age, size_t word_sz) {
+      if (age < _tenuring_threshold && src_region->is_young()) {
+        return GCAllocForSurvived;
+      } else {
+        return GCAllocForTenured;
+      }
+  }
+
+  inline bool track_object_age(GCAllocPurpose purpose) {
+    return purpose == GCAllocForSurvived;
+  }
+
+  inline GCAllocPurpose alternative_purpose(int purpose) {
+    return GCAllocForTenured;
+  }
+
+  uint max_regions(int purpose);
+
+  // The limit on regions for a particular purpose is reached.
+  void note_alloc_region_limit_reached(int purpose) {
+    if (purpose == GCAllocForSurvived) {
+      _tenuring_threshold = 0;
+    }
+  }
+
+  void note_start_adding_survivor_regions() {
+    _survivor_surv_rate_group->start_adding_regions();
+  }
+
+  void note_stop_adding_survivor_regions() {
+    _survivor_surv_rate_group->stop_adding_regions();
+  }
+};
+
+// This encapsulates a particular strategy for a g1 Collector.
+//
+//      Start a concurrent mark when our heap size is n bytes
+//            greater then our heap size was at the last concurrent
+//            mark.  Where n is a function of the CMSTriggerRatio
+//            and the MinHeapFreeRatio.
+//
+//      Start a g1 collection pause when we have allocated the
+//            average number of bytes currently being freed in
+//            a collection, but only if it is at least one region
+//            full
+//
+//      Resize Heap based on desired
+//      allocation space, where desired allocation space is
+//      a function of survival rate and desired future to size.
+//
+//      Choose collection set by first picking all older regions
+//      which have a survival rate which beats our projected young
+//      survival rate.  Then fill out the number of needed regions
+//      with young regions.
+
+class G1CollectorPolicy_BestRegionsFirst: public G1CollectorPolicy {
+  CollectionSetChooser* _collectionSetChooser;
+  // If the estimated is less then desirable, resize if possible.
+  void expand_if_possible(size_t numRegions);
+
+  virtual void choose_collection_set(HeapRegion* pop_region = NULL);
+  virtual void record_collection_pause_start(double start_time_sec,
+                                             size_t start_used);
+  virtual void record_concurrent_mark_cleanup_end(size_t freed_bytes,
+                                                  size_t max_live_bytes);
+  virtual void record_full_collection_end();
+
+public:
+  G1CollectorPolicy_BestRegionsFirst() {
+    _collectionSetChooser = new CollectionSetChooser();
+  }
+  void record_collection_pause_end(bool popular, bool abandoned);
+  bool should_do_collection_pause(size_t word_size);
+  virtual void set_single_region_collection_set(HeapRegion* hr);
+  // This is not needed any more, after the CSet choosing code was
+  // changed to use the pause prediction work. But let's leave the
+  // hook in just in case.
+  void note_change_in_marked_bytes(HeapRegion* r) { }
+#ifndef PRODUCT
+  bool assertMarkedBytesDataOK();
+#endif
+};
+
+// This should move to some place more general...
+
+// If we have "n" measurements, and we've kept track of their "sum" and the
+// "sum_of_squares" of the measurements, this returns the variance of the
+// sequence.
+inline double variance(int n, double sum_of_squares, double sum) {
+  double n_d = (double)n;
+  double avg = sum/n_d;
+  return (sum_of_squares - 2.0 * avg * sum + n_d * avg * avg) / n_d;
+}
+
+// Local Variables: ***
+// c-indentation-style: gnu ***
+// End: ***
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1MMUTracker.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1MMUTracker.cpp.incl"
+
+#define _DISABLE_MMU                             0
+
+// can't rely on comparing doubles with tolerating a small margin for error
+#define SMALL_MARGIN 0.0000001
+#define is_double_leq_0(_value) ( (_value) < SMALL_MARGIN )
+#define is_double_leq(_val1, _val2) is_double_leq_0((_val1) - (_val2))
+#define is_double_geq(_val1, _val2) is_double_leq_0((_val2) - (_val1))
+
+/***** ALL TIMES ARE IN SECS!!!!!!! *****/
+
+G1MMUTracker::G1MMUTracker(double time_slice, double max_gc_time) :
+  _time_slice(time_slice),
+  _max_gc_time(max_gc_time),
+  _conc_overhead_time_sec(0.0) { }
+
+void
+G1MMUTracker::update_conc_overhead(double conc_overhead) {
+  double conc_overhead_time_sec = _time_slice * conc_overhead;
+  if (conc_overhead_time_sec > 0.9 * _max_gc_time) {
+    // We are screwed, as we only seem to have <10% of the soft
+    // real-time goal available for pauses. Let's admit defeat and
+    // allow something more generous as a pause target.
+    conc_overhead_time_sec = 0.75 * _max_gc_time;
+  }
+
+  _conc_overhead_time_sec = conc_overhead_time_sec;
+}
+
+G1MMUTrackerQueue::G1MMUTrackerQueue(double time_slice, double max_gc_time) :
+  G1MMUTracker(time_slice, max_gc_time),
+  _head_index(0),
+  _tail_index(trim_index(_head_index+1)),
+  _no_entries(0) { }
+
+void G1MMUTrackerQueue::remove_expired_entries(double current_time) {
+  double limit = current_time - _time_slice;
+  while (_no_entries > 0) {
+    if (is_double_geq(limit, _array[_tail_index].end_time())) {
+      _tail_index = trim_index(_tail_index + 1);
+      --_no_entries;
+    } else
+      return;
+  }
+  guarantee(_no_entries == 0, "should have no entries in the array");
+}
+
+double G1MMUTrackerQueue::calculate_gc_time(double current_time) {
+  double gc_time = 0.0;
+  double limit = current_time - _time_slice;
+  for (int i = 0; i < _no_entries; ++i) {
+    int index = trim_index(_tail_index + i);
+    G1MMUTrackerQueueElem *elem = &_array[index];
+    if (elem->end_time() > limit) {
+      if (elem->start_time() > limit)
+        gc_time += elem->duration();
+      else
+        gc_time += elem->end_time() - limit;
+    }
+  }
+  return gc_time;
+}
+
+void G1MMUTrackerQueue::add_pause(double start, double end, bool gc_thread) {
+  double longest_allowed = longest_pause_internal(start);
+  if (longest_allowed < 0.0)
+    longest_allowed = 0.0;
+  double duration = end - start;
+
+  remove_expired_entries(end);
+  if (_no_entries == QueueLength) {
+    // OK, right now when we fill up we bomb out
+    // there are a few ways of dealing with this "gracefully"
+    //   increase the array size (:-)
+    //   remove the oldest entry (this might allow more GC time for
+    //     the time slice than what's allowed)
+    //   concolidate the two entries with the minimum gap between them
+    //     (this mighte allow less GC time than what's allowed)
+    guarantee(0, "array full, currently we can't recover");
+  }
+  _head_index = trim_index(_head_index + 1);
+  ++_no_entries;
+  _array[_head_index] = G1MMUTrackerQueueElem(start, end);
+}
+
+// basically the _internal call does not remove expired entries
+// this is for trying things out in the future and a couple
+// of other places (debugging)
+
+double G1MMUTrackerQueue::longest_pause(double current_time) {
+  if (_DISABLE_MMU)
+    return _max_gc_time;
+
+  MutexLockerEx x(MMUTracker_lock, Mutex::_no_safepoint_check_flag);
+  remove_expired_entries(current_time);
+
+  return longest_pause_internal(current_time);
+}
+
+double G1MMUTrackerQueue::longest_pause_internal(double current_time) {
+  double target_time = _max_gc_time;
+
+  while( 1 ) {
+    double gc_time =
+      calculate_gc_time(current_time + target_time) + _conc_overhead_time_sec;
+    double diff = target_time + gc_time - _max_gc_time;
+    if (!is_double_leq_0(diff)) {
+      target_time -= diff;
+      if (is_double_leq_0(target_time)) {
+        target_time = -1.0;
+        break;
+      }
+    } else {
+      break;
+    }
+  }
+
+  return target_time;
+}
+
+// basically the _internal call does not remove expired entries
+// this is for trying things out in the future and a couple
+// of other places (debugging)
+
+double G1MMUTrackerQueue::when_sec(double current_time, double pause_time) {
+  if (_DISABLE_MMU)
+    return 0.0;
+
+  MutexLockerEx x(MMUTracker_lock, Mutex::_no_safepoint_check_flag);
+  remove_expired_entries(current_time);
+
+  return when_internal(current_time, pause_time);
+}
+
+double G1MMUTrackerQueue::when_internal(double current_time,
+                                        double pause_time) {
+  // if the pause is over the maximum, just assume that it's the maximum
+  double adjusted_pause_time =
+    (pause_time > max_gc_time()) ? max_gc_time() : pause_time;
+  double earliest_end = current_time + adjusted_pause_time;
+  double limit = earliest_end - _time_slice;
+  double gc_time = calculate_gc_time(earliest_end);
+  double diff = gc_time + adjusted_pause_time - max_gc_time();
+  if (is_double_leq_0(diff))
+    return 0.0;
+
+  int index = _tail_index;
+  while ( 1 ) {
+    G1MMUTrackerQueueElem *elem = &_array[index];
+    if (elem->end_time() > limit) {
+      if (elem->start_time() > limit)
+        diff -= elem->duration();
+      else
+        diff -= elem->end_time() - limit;
+      if (is_double_leq_0(diff))
+        return  elem->end_time() + diff + _time_slice - adjusted_pause_time - current_time;
+    }
+    index = trim_index(index+1);
+    guarantee(index != trim_index(_head_index + 1), "should not go past head");
+  }
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1MMUTracker.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Keeps track of the GC work and decides when it is OK to do GC work
+// and for how long so that the MMU invariants are maintained.
+
+/***** ALL TIMES ARE IN SECS!!!!!!! *****/
+
+// this is the "interface"
+class G1MMUTracker {
+protected:
+  double          _time_slice;
+  double          _max_gc_time; // this is per time slice
+
+  double          _conc_overhead_time_sec;
+
+public:
+  G1MMUTracker(double time_slice, double max_gc_time);
+
+  void update_conc_overhead(double conc_overhead);
+
+  virtual void add_pause(double start, double end, bool gc_thread) = 0;
+  virtual double longest_pause(double current_time) = 0;
+  virtual double when_sec(double current_time, double pause_time) = 0;
+
+  double max_gc_time() {
+    return _max_gc_time - _conc_overhead_time_sec;
+  }
+
+  inline bool now_max_gc(double current_time) {
+    return when_sec(current_time, max_gc_time()) < 0.00001;
+  }
+
+  inline double when_max_gc_sec(double current_time) {
+    return when_sec(current_time, max_gc_time());
+  }
+
+  inline jlong when_max_gc_ms(double current_time) {
+    double when = when_max_gc_sec(current_time);
+    return (jlong) (when * 1000.0);
+  }
+
+  inline jlong when_ms(double current_time, double pause_time) {
+    double when = when_sec(current_time, pause_time);
+    return (jlong) (when * 1000.0);
+  }
+};
+
+class G1MMUTrackerQueueElem {
+private:
+  double _start_time;
+  double _end_time;
+
+public:
+  inline double start_time() { return _start_time; }
+  inline double end_time()   { return _end_time; }
+  inline double duration()   { return _end_time - _start_time; }
+
+  G1MMUTrackerQueueElem() {
+    _start_time = 0.0;
+    _end_time   = 0.0;
+  }
+
+  G1MMUTrackerQueueElem(double start_time, double end_time) {
+    _start_time = start_time;
+    _end_time   = end_time;
+  }
+};
+
+// this is an implementation of the MMUTracker using a (fixed-size) queue
+// that keeps track of all the recent pause times
+class G1MMUTrackerQueue: public G1MMUTracker {
+private:
+  enum PrivateConstants {
+    QueueLength = 64
+  };
+
+  // The array keeps track of all the pauses that fall within a time
+  // slice (the last time slice during which pauses took place).
+  // The data structure implemented is a circular queue.
+  // Head "points" to the most recent addition, tail to the oldest one.
+  // The array is of fixed size and I don't think we'll need more than
+  // two or three entries with the current behaviour of G1 pauses.
+  // If the array is full, an easy fix is to look for the pauses with
+  // the shortest gap between them and concolidate them.
+
+  G1MMUTrackerQueueElem _array[QueueLength];
+  int                   _head_index;
+  int                   _tail_index;
+  int                   _no_entries;
+
+  inline int trim_index(int index) {
+    return (index + QueueLength) % QueueLength;
+  }
+
+  void remove_expired_entries(double current_time);
+  double calculate_gc_time(double current_time);
+
+  double longest_pause_internal(double current_time);
+  double when_internal(double current_time, double pause_time);
+
+public:
+  G1MMUTrackerQueue(double time_slice, double max_gc_time);
+
+  virtual void add_pause(double start, double end, bool gc_thread);
+
+  virtual double longest_pause(double current_time);
+  virtual double when_sec(double current_time, double pause_time);
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1MarkSweep.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,381 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1MarkSweep.cpp.incl"
+
+class HeapRegion;
+
+void G1MarkSweep::invoke_at_safepoint(ReferenceProcessor* rp,
+                                      bool clear_all_softrefs) {
+  assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
+
+  // hook up weak ref data so it can be used during Mark-Sweep
+  assert(GenMarkSweep::ref_processor() == NULL, "no stomping");
+  GenMarkSweep::_ref_processor = rp;
+  assert(rp != NULL, "should be non-NULL");
+
+  // When collecting the permanent generation methodOops may be moving,
+  // so we either have to flush all bcp data or convert it into bci.
+  CodeCache::gc_prologue();
+  Threads::gc_prologue();
+
+  // Increment the invocation count for the permanent generation, since it is
+  // implicitly collected whenever we do a full mark sweep collection.
+  SharedHeap* sh = SharedHeap::heap();
+  sh->perm_gen()->stat_record()->invocations++;
+
+  bool marked_for_unloading = false;
+
+  allocate_stacks();
+
+  mark_sweep_phase1(marked_for_unloading, clear_all_softrefs);
+
+  if (G1VerifyConcMark) {
+      G1CollectedHeap* g1h = G1CollectedHeap::heap();
+      g1h->checkConcurrentMark();
+  }
+
+  mark_sweep_phase2();
+
+  // Don't add any more derived pointers during phase3
+  COMPILER2_PRESENT(DerivedPointerTable::set_active(false));
+
+  mark_sweep_phase3();
+
+  mark_sweep_phase4();
+
+  GenMarkSweep::restore_marks();
+
+  GenMarkSweep::deallocate_stacks();
+
+  // We must invalidate the perm-gen rs, so that it gets rebuilt.
+  GenRemSet* rs = sh->rem_set();
+  rs->invalidate(sh->perm_gen()->used_region(), true /*whole_heap*/);
+
+  // "free at last gc" is calculated from these.
+  // CHF: cheating for now!!!
+  //  Universe::set_heap_capacity_at_last_gc(Universe::heap()->capacity());
+  //  Universe::set_heap_used_at_last_gc(Universe::heap()->used());
+
+  Threads::gc_epilogue();
+  CodeCache::gc_epilogue();
+
+  // refs processing: clean slate
+  GenMarkSweep::_ref_processor = NULL;
+}
+
+
+void G1MarkSweep::allocate_stacks() {
+  GenMarkSweep::_preserved_count_max = 0;
+  GenMarkSweep::_preserved_marks = NULL;
+  GenMarkSweep::_preserved_count = 0;
+  GenMarkSweep::_preserved_mark_stack = NULL;
+  GenMarkSweep::_preserved_oop_stack = NULL;
+
+  GenMarkSweep::_marking_stack =
+    new (ResourceObj::C_HEAP) GrowableArray<oop>(4000, true);
+
+  size_t size = SystemDictionary::number_of_classes() * 2;
+  GenMarkSweep::_revisit_klass_stack =
+    new (ResourceObj::C_HEAP) GrowableArray<Klass*>((int)size, true);
+}
+
+void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading,
+                                    bool clear_all_softrefs) {
+  // Recursively traverse all live objects and mark them
+  EventMark m("1 mark object");
+  TraceTime tm("phase 1", PrintGC && Verbose, true, gclog_or_tty);
+  GenMarkSweep::trace(" 1");
+
+  SharedHeap* sh = SharedHeap::heap();
+
+  sh->process_strong_roots(true,  // Collecting permanent generation.
+                           SharedHeap::SO_SystemClasses,
+                           &GenMarkSweep::follow_root_closure,
+                           &GenMarkSweep::follow_root_closure);
+
+  // Process reference objects found during marking
+  ReferencePolicy *soft_ref_policy;
+  if (clear_all_softrefs) {
+    soft_ref_policy = new AlwaysClearPolicy();
+  } else {
+#ifdef COMPILER2
+    soft_ref_policy = new LRUMaxHeapPolicy();
+#else
+    soft_ref_policy = new LRUCurrentHeapPolicy();
+#endif
+  }
+  assert(soft_ref_policy != NULL,"No soft reference policy");
+  GenMarkSweep::ref_processor()->process_discovered_references(
+                                   soft_ref_policy,
+                                   &GenMarkSweep::is_alive,
+                                   &GenMarkSweep::keep_alive,
+                                   &GenMarkSweep::follow_stack_closure,
+                                   NULL);
+
+  // Follow system dictionary roots and unload classes
+  bool purged_class = SystemDictionary::do_unloading(&GenMarkSweep::is_alive);
+  assert(GenMarkSweep::_marking_stack->is_empty(),
+         "stack should be empty by now");
+
+  // Follow code cache roots (has to be done after system dictionary,
+  // assumes all live klasses are marked)
+  CodeCache::do_unloading(&GenMarkSweep::is_alive,
+                                   &GenMarkSweep::keep_alive,
+                                   purged_class);
+           GenMarkSweep::follow_stack();
+
+  // Update subklass/sibling/implementor links of live klasses
+  GenMarkSweep::follow_weak_klass_links();
+  assert(GenMarkSweep::_marking_stack->is_empty(),
+         "stack should be empty by now");
+
+  // Visit symbol and interned string tables and delete unmarked oops
+  SymbolTable::unlink(&GenMarkSweep::is_alive);
+  StringTable::unlink(&GenMarkSweep::is_alive);
+
+  assert(GenMarkSweep::_marking_stack->is_empty(),
+         "stack should be empty by now");
+}
+
+class G1PrepareCompactClosure: public HeapRegionClosure {
+  ModRefBarrierSet* _mrbs;
+  CompactPoint _cp;
+  bool _popular_only;
+
+  void free_humongous_region(HeapRegion* hr) {
+    HeapWord* bot = hr->bottom();
+    HeapWord* end = hr->end();
+    assert(hr->startsHumongous(),
+           "Only the start of a humongous region should be freed.");
+    G1CollectedHeap::heap()->free_region(hr);
+    hr->prepare_for_compaction(&_cp);
+    // Also clear the part of the card table that will be unused after
+    // compaction.
+    _mrbs->clear(MemRegion(hr->compaction_top(), hr->end()));
+  }
+
+public:
+  G1PrepareCompactClosure(CompactibleSpace* cs, bool popular_only) :
+    _cp(NULL, cs, cs->initialize_threshold()),
+    _mrbs(G1CollectedHeap::heap()->mr_bs()),
+    _popular_only(popular_only)
+  {}
+  bool doHeapRegion(HeapRegion* hr) {
+    if (_popular_only && !hr->popular())
+      return true; // terminate early
+    else if (!_popular_only && hr->popular())
+      return false; // skip this one.
+
+    if (hr->isHumongous()) {
+      if (hr->startsHumongous()) {
+        oop obj = oop(hr->bottom());
+        if (obj->is_gc_marked()) {
+          obj->forward_to(obj);
+        } else  {
+          free_humongous_region(hr);
+        }
+      } else {
+        assert(hr->continuesHumongous(), "Invalid humongous.");
+      }
+    } else {
+      hr->prepare_for_compaction(&_cp);
+      // Also clear the part of the card table that will be unused after
+      // compaction.
+      _mrbs->clear(MemRegion(hr->compaction_top(), hr->end()));
+    }
+    return false;
+  }
+};
+// Stolen verbatim from g1CollectedHeap.cpp
+class FindFirstRegionClosure: public HeapRegionClosure {
+  HeapRegion* _a_region;
+  bool _find_popular;
+public:
+  FindFirstRegionClosure(bool find_popular) :
+    _a_region(NULL), _find_popular(find_popular) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->popular() == _find_popular) {
+      _a_region = r;
+      return true;
+    } else {
+      return false;
+    }
+  }
+  HeapRegion* result() { return _a_region; }
+};
+
+void G1MarkSweep::mark_sweep_phase2() {
+  // Now all live objects are marked, compute the new object addresses.
+
+  // It is imperative that we traverse perm_gen LAST. If dead space is
+  // allowed a range of dead object may get overwritten by a dead int
+  // array. If perm_gen is not traversed last a klassOop may get
+  // overwritten. This is fine since it is dead, but if the class has dead
+  // instances we have to skip them, and in order to find their size we
+  // need the klassOop!
+  //
+  // It is not required that we traverse spaces in the same order in
+  // phase2, phase3 and phase4, but the ValidateMarkSweep live oops
+  // tracking expects us to do so. See comment under phase4.
+
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  Generation* pg = g1h->perm_gen();
+
+  EventMark m("2 compute new addresses");
+  TraceTime tm("phase 2", PrintGC && Verbose, true, gclog_or_tty);
+  GenMarkSweep::trace("2");
+
+  // First we compact the popular regions.
+  if (G1NumPopularRegions > 0) {
+    CompactibleSpace* sp = g1h->first_compactible_space();
+    FindFirstRegionClosure cl(true /*find_popular*/);
+    g1h->heap_region_iterate(&cl);
+    HeapRegion *r = cl.result();
+    assert(r->popular(), "should have found a popular region.");
+    assert(r == sp, "first popular heap region should "
+                    "== first compactible space");
+    G1PrepareCompactClosure blk(sp, true/*popular_only*/);
+    g1h->heap_region_iterate(&blk);
+  }
+
+  // Now we do the regular regions.
+  FindFirstRegionClosure cl(false /*find_popular*/);
+  g1h->heap_region_iterate(&cl);
+  HeapRegion *r = cl.result();
+  assert(!r->popular(), "should have founda non-popular region.");
+  CompactibleSpace* sp = r;
+  if (r->isHumongous() && oop(r->bottom())->is_gc_marked()) {
+    sp = r->next_compaction_space();
+  }
+
+  G1PrepareCompactClosure blk(sp, false/*popular_only*/);
+  g1h->heap_region_iterate(&blk);
+
+  CompactPoint perm_cp(pg, NULL, NULL);
+  pg->prepare_for_compaction(&perm_cp);
+}
+
+class G1AdjustPointersClosure: public HeapRegionClosure {
+ public:
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->isHumongous()) {
+      if (r->startsHumongous()) {
+        // We must adjust the pointers on the single H object.
+        oop obj = oop(r->bottom());
+        debug_only(GenMarkSweep::track_interior_pointers(obj));
+        // point all the oops to the new location
+        obj->adjust_pointers();
+        debug_only(GenMarkSweep::check_interior_pointers());
+      }
+    } else {
+      // This really ought to be "as_CompactibleSpace"...
+      r->adjust_pointers();
+    }
+    return false;
+  }
+};
+
+void G1MarkSweep::mark_sweep_phase3() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  Generation* pg = g1h->perm_gen();
+
+  // Adjust the pointers to reflect the new locations
+  EventMark m("3 adjust pointers");
+  TraceTime tm("phase 3", PrintGC && Verbose, true, gclog_or_tty);
+  GenMarkSweep::trace("3");
+
+  SharedHeap* sh = SharedHeap::heap();
+
+  sh->process_strong_roots(true,  // Collecting permanent generation.
+                           SharedHeap::SO_AllClasses,
+                           &GenMarkSweep::adjust_root_pointer_closure,
+                           &GenMarkSweep::adjust_pointer_closure);
+
+  g1h->ref_processor()->weak_oops_do(&GenMarkSweep::adjust_root_pointer_closure);
+
+  // Now adjust pointers in remaining weak roots.  (All of which should
+  // have been cleared if they pointed to non-surviving objects.)
+  g1h->g1_process_weak_roots(&GenMarkSweep::adjust_root_pointer_closure,
+                             &GenMarkSweep::adjust_pointer_closure);
+
+  GenMarkSweep::adjust_marks();
+
+  G1AdjustPointersClosure blk;
+  g1h->heap_region_iterate(&blk);
+  pg->adjust_pointers();
+}
+
+class G1SpaceCompactClosure: public HeapRegionClosure {
+public:
+  G1SpaceCompactClosure() {}
+
+  bool doHeapRegion(HeapRegion* hr) {
+    if (hr->isHumongous()) {
+      if (hr->startsHumongous()) {
+        oop obj = oop(hr->bottom());
+        if (obj->is_gc_marked()) {
+          obj->init_mark();
+        } else {
+          assert(hr->is_empty(), "Should have been cleared in phase 2.");
+        }
+        hr->reset_during_compaction();
+      }
+    } else {
+      hr->compact();
+    }
+    return false;
+  }
+};
+
+void G1MarkSweep::mark_sweep_phase4() {
+  // All pointers are now adjusted, move objects accordingly
+
+  // It is imperative that we traverse perm_gen first in phase4. All
+  // classes must be allocated earlier than their instances, and traversing
+  // perm_gen first makes sure that all klassOops have moved to their new
+  // location before any instance does a dispatch through it's klass!
+
+  // The ValidateMarkSweep live oops tracking expects us to traverse spaces
+  // in the same order in phase2, phase3 and phase4. We don't quite do that
+  // here (perm_gen first rather than last), so we tell the validate code
+  // to use a higher index (saved from phase2) when verifying perm_gen.
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  Generation* pg = g1h->perm_gen();
+
+  EventMark m("4 compact heap");
+  TraceTime tm("phase 4", PrintGC && Verbose, true, gclog_or_tty);
+  GenMarkSweep::trace("4");
+
+  pg->compact();
+
+  G1SpaceCompactClosure blk;
+  g1h->heap_region_iterate(&blk);
+
+}
+
+// Local Variables: ***
+// c-indentation-style: gnu ***
+// End: ***
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1MarkSweep.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class ReferenceProcessor;
+
+// G1MarkSweep takes care of global mark-compact garbage collection for a
+// G1CollectedHeap using a four-phase pointer forwarding algorithm.  All
+// generations are assumed to support marking; those that can also support
+// compaction.
+//
+// Class unloading will only occur when a full gc is invoked.
+
+
+class G1MarkSweep : AllStatic {
+  friend class VM_G1MarkSweep;
+  friend class Scavenge;
+
+ public:
+
+  static void invoke_at_safepoint(ReferenceProcessor* rp,
+                                  bool clear_all_softrefs);
+
+ private:
+
+  // Mark live objects
+  static void mark_sweep_phase1(bool& marked_for_deopt,
+                                bool clear_all_softrefs);
+  // Calculate new addresses
+  static void mark_sweep_phase2();
+  // Update pointers
+  static void mark_sweep_phase3();
+  // Move objects to new positions
+  static void mark_sweep_phase4();
+
+  static void allocate_stacks();
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1OopClosures.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,202 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class HeapRegion;
+class G1CollectedHeap;
+class G1RemSet;
+class HRInto_G1RemSet;
+class G1RemSet;
+class ConcurrentMark;
+class DirtyCardToOopClosure;
+class CMBitMap;
+class CMMarkStack;
+class G1ParScanThreadState;
+
+// A class that scans oops in a given heap region (much as OopsInGenClosure
+// scans oops in a generation.)
+class OopsInHeapRegionClosure: public OopsInGenClosure {
+protected:
+  HeapRegion* _from;
+public:
+  virtual void set_region(HeapRegion* from) { _from = from; }
+};
+
+
+class G1ScanAndBalanceClosure : public OopClosure {
+  G1CollectedHeap* _g1;
+  static int _nq;
+public:
+  G1ScanAndBalanceClosure(G1CollectedHeap* g1) : _g1(g1) { }
+  inline  void do_oop_nv(oop* p);
+  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p)    { guarantee(false, "NYI"); }
+};
+
+class G1ParClosureSuper : public OopsInHeapRegionClosure {
+protected:
+  G1CollectedHeap* _g1;
+  G1RemSet* _g1_rem;
+  ConcurrentMark* _cm;
+  G1ParScanThreadState* _par_scan_state;
+public:
+  G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state);
+  bool apply_to_weak_ref_discovered_field() { return true; }
+};
+
+class G1ParScanClosure : public G1ParClosureSuper {
+public:
+  G1ParScanClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+    G1ParClosureSuper(g1, par_scan_state) { }
+  void do_oop_nv(oop* p);   // should be made inline
+  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p)          { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)    { do_oop_nv(p); }
+};
+
+#define G1_PARTIAL_ARRAY_MASK 1
+
+class G1ParScanPartialArrayClosure : public G1ParClosureSuper {
+  G1ParScanClosure _scanner;
+  template <class T> void process_array_chunk(oop obj, int start, int end);
+public:
+  G1ParScanPartialArrayClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+    G1ParClosureSuper(g1, par_scan_state), _scanner(g1, par_scan_state) { }
+  void do_oop_nv(oop* p);
+  void do_oop_nv(narrowOop* p)      { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p)       { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+};
+
+
+class G1ParCopyHelper : public G1ParClosureSuper {
+  G1ParScanClosure *_scanner;
+protected:
+  void mark_forwardee(oop* p);
+  oop copy_to_survivor_space(oop obj);
+public:
+  G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
+                  G1ParScanClosure *scanner) :
+    G1ParClosureSuper(g1, par_scan_state), _scanner(scanner) { }
+};
+
+template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
+class G1ParCopyClosure : public G1ParCopyHelper {
+  G1ParScanClosure _scanner;
+  void do_oop_work(oop* p);
+  void do_oop_work(narrowOop* p) { guarantee(false, "NYI"); }
+public:
+  G1ParCopyClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+    _scanner(g1, par_scan_state), G1ParCopyHelper(g1, par_scan_state, &_scanner) { }
+  inline void do_oop_nv(oop* p) {
+    do_oop_work(p);
+    if (do_mark_forwardee)
+      mark_forwardee(p);
+  }
+  inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p)       { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+};
+
+typedef G1ParCopyClosure<false, G1BarrierNone, false> G1ParScanExtRootClosure;
+typedef G1ParCopyClosure<true, G1BarrierNone, false> G1ParScanPermClosure;
+typedef G1ParCopyClosure<false, G1BarrierNone, true> G1ParScanAndMarkExtRootClosure;
+typedef G1ParCopyClosure<true, G1BarrierNone, true> G1ParScanAndMarkPermClosure;
+typedef G1ParCopyClosure<false, G1BarrierRS, false> G1ParScanHeapRSClosure;
+typedef G1ParCopyClosure<false, G1BarrierRS, true> G1ParScanAndMarkHeapRSClosure;
+typedef G1ParCopyClosure<false, G1BarrierEvac, false> G1ParScanHeapEvacClosure;
+
+
+class FilterIntoCSClosure: public OopClosure {
+  G1CollectedHeap* _g1;
+  OopClosure* _oc;
+  DirtyCardToOopClosure* _dcto_cl;
+public:
+  FilterIntoCSClosure(  DirtyCardToOopClosure* dcto_cl,
+                        G1CollectedHeap* g1, OopClosure* oc) :
+    _dcto_cl(dcto_cl), _g1(g1), _oc(oc)
+  {}
+  inline void do_oop_nv(oop* p);
+  inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p)   { guarantee(false, "NYI"); }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+  bool do_header() { return false; }
+};
+
+class FilterInHeapRegionAndIntoCSClosure : public OopsInHeapRegionClosure {
+  G1CollectedHeap* _g1;
+  OopsInHeapRegionClosure* _oc;
+public:
+  FilterInHeapRegionAndIntoCSClosure(G1CollectedHeap* g1,
+                                     OopsInHeapRegionClosure* oc) :
+    _g1(g1), _oc(oc)
+  {}
+  inline  void do_oop_nv(oop* p);
+  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p)    { guarantee(false, "NYI"); }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+  bool do_header() { return false; }
+  void set_region(HeapRegion* from) {
+    _oc->set_region(from);
+  }
+};
+
+class FilterAndMarkInHeapRegionAndIntoCSClosure : public OopsInHeapRegionClosure {
+  G1CollectedHeap* _g1;
+  ConcurrentMark* _cm;
+  OopsInHeapRegionClosure* _oc;
+public:
+  FilterAndMarkInHeapRegionAndIntoCSClosure(G1CollectedHeap* g1,
+                                            OopsInHeapRegionClosure* oc,
+                                            ConcurrentMark* cm)
+  : _g1(g1), _oc(oc), _cm(cm) { }
+
+  inline  void do_oop_nv(oop* p);
+  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p)    { guarantee(false, "NYI"); }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+  bool do_header() { return false; }
+  void set_region(HeapRegion* from) {
+    _oc->set_region(from);
+  }
+};
+
+class FilterOutOfRegionClosure: public OopClosure {
+  HeapWord* _r_bottom;
+  HeapWord* _r_end;
+  OopClosure* _oc;
+  int _out_of_region;
+public:
+  FilterOutOfRegionClosure(HeapRegion* r, OopClosure* oc);
+  inline  void do_oop_nv(oop* p);
+  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p)   { guarantee(false, "NYI"); }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+  bool do_header() { return false; }
+  int out_of_region() { return _out_of_region; }
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/*
+ * This really ought to be an inline function, but apparently the C++
+ * compiler sometimes sees fit to ignore inline declarations.  Sigh.
+ */
+
+// This must a ifdef'ed because the counting it controls is in a
+// perf-critical inner loop.
+#define FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT 0
+
+inline void FilterIntoCSClosure::do_oop_nv(oop* p) {
+  oop obj = *p;
+  if (obj != NULL && _g1->obj_in_cs(obj)) {
+    _oc->do_oop(p);
+#if FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT
+    _dcto_cl->incr_count();
+#endif
+  }
+}
+
+inline void FilterIntoCSClosure::do_oop(oop* p)
+{
+  do_oop_nv(p);
+}
+
+#define FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT 0
+
+inline void FilterOutOfRegionClosure::do_oop_nv(oop* p) {
+  oop obj = *p;
+  HeapWord* obj_hw = (HeapWord*)obj;
+  if (obj_hw != NULL && (obj_hw < _r_bottom || obj_hw >= _r_end)) {
+    _oc->do_oop(p);
+#if FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT
+    _out_of_region++;
+#endif
+  }
+}
+
+inline void FilterOutOfRegionClosure::do_oop(oop* p)
+{
+  do_oop_nv(p);
+}
+
+inline void FilterInHeapRegionAndIntoCSClosure::do_oop_nv(oop* p) {
+  oop obj = *p;
+  if (obj != NULL && _g1->obj_in_cs(obj))
+    _oc->do_oop(p);
+}
+
+inline void FilterInHeapRegionAndIntoCSClosure::do_oop(oop* p)
+{
+  do_oop_nv(p);
+}
+
+
+inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop_nv(oop* p) {
+  oop obj = *p;
+  if (obj != NULL) {
+    HeapRegion* hr = _g1->heap_region_containing((HeapWord*) obj);
+    if (hr != NULL) {
+      if (hr->in_collection_set())
+        _oc->do_oop(p);
+      else if (!hr->is_young())
+        _cm->grayRoot(obj);
+    }
+  }
+}
+
+inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop(oop* p)
+{
+  do_oop_nv(p);
+}
+
+inline void G1ScanAndBalanceClosure::do_oop_nv(oop* p) {
+  RefToScanQueue* q;
+  if (ParallelGCThreads > 0) {
+    // Deal the work out equally.
+    _nq = (_nq + 1) % ParallelGCThreads;
+    q = _g1->task_queue(_nq);
+  } else {
+    q = _g1->task_queue(0);
+  }
+  bool nooverflow = q->push(p);
+  guarantee(nooverflow, "Overflow during poplularity region processing");
+}
+
+inline void G1ScanAndBalanceClosure::do_oop(oop* p) {
+  do_oop_nv(p);
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1RemSet.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,1003 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1RemSet.cpp.incl"
+
+#define CARD_REPEAT_HISTO 0
+
+#if CARD_REPEAT_HISTO
+static size_t ct_freq_sz;
+static jbyte* ct_freq = NULL;
+
+void init_ct_freq_table(size_t heap_sz_bytes) {
+  if (ct_freq == NULL) {
+    ct_freq_sz = heap_sz_bytes/CardTableModRefBS::card_size;
+    ct_freq = new jbyte[ct_freq_sz];
+    for (size_t j = 0; j < ct_freq_sz; j++) ct_freq[j] = 0;
+  }
+}
+
+void ct_freq_note_card(size_t index) {
+  assert(0 <= index && index < ct_freq_sz, "Bounds error.");
+  if (ct_freq[index] < 100) { ct_freq[index]++; }
+}
+
+static IntHistogram card_repeat_count(10, 10);
+
+void ct_freq_update_histo_and_reset() {
+  for (size_t j = 0; j < ct_freq_sz; j++) {
+    card_repeat_count.add_entry(ct_freq[j]);
+    ct_freq[j] = 0;
+  }
+
+}
+#endif
+
+
+class IntoCSOopClosure: public OopsInHeapRegionClosure {
+  OopsInHeapRegionClosure* _blk;
+  G1CollectedHeap* _g1;
+public:
+  IntoCSOopClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* blk) :
+    _g1(g1), _blk(blk) {}
+  void set_region(HeapRegion* from) {
+    _blk->set_region(from);
+  }
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  virtual void do_oop(oop* p) {
+    oop obj = *p;
+    if (_g1->obj_in_cs(obj)) _blk->do_oop(p);
+  }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+  bool idempotent() { return true; }
+};
+
+class IntoCSRegionClosure: public HeapRegionClosure {
+  IntoCSOopClosure _blk;
+  G1CollectedHeap* _g1;
+public:
+  IntoCSRegionClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* blk) :
+    _g1(g1), _blk(g1, blk) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->in_collection_set()) {
+      _blk.set_region(r);
+      if (r->isHumongous()) {
+        if (r->startsHumongous()) {
+          oop obj = oop(r->bottom());
+          obj->oop_iterate(&_blk);
+        }
+      } else {
+        r->oop_before_save_marks_iterate(&_blk);
+      }
+    }
+    return false;
+  }
+};
+
+void
+StupidG1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
+                                            int worker_i) {
+  IntoCSRegionClosure rc(_g1, oc);
+  _g1->heap_region_iterate(&rc);
+}
+
+class UpdateRSOopClosure: public OopClosure {
+  HeapRegion* _from;
+  HRInto_G1RemSet* _rs;
+  int _worker_i;
+public:
+  UpdateRSOopClosure(HRInto_G1RemSet* rs, int worker_i = 0) :
+    _from(NULL), _rs(rs), _worker_i(worker_i) {
+    guarantee(_rs != NULL, "Requires an HRIntoG1RemSet");
+  }
+
+  void set_from(HeapRegion* from) {
+    assert(from != NULL, "from region must be non-NULL");
+    _from = from;
+  }
+
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  virtual void do_oop(oop* p) {
+    assert(_from != NULL, "from region must be non-NULL");
+    _rs->par_write_ref(_from, p, _worker_i);
+  }
+  // Override: this closure is idempotent.
+  //  bool idempotent() { return true; }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+};
+
+class UpdateRSOutOfRegionClosure: public HeapRegionClosure {
+  G1CollectedHeap*    _g1h;
+  ModRefBarrierSet*   _mr_bs;
+  UpdateRSOopClosure  _cl;
+  int _worker_i;
+public:
+  UpdateRSOutOfRegionClosure(G1CollectedHeap* g1, int worker_i = 0) :
+    _cl(g1->g1_rem_set()->as_HRInto_G1RemSet(), worker_i),
+    _mr_bs(g1->mr_bs()),
+    _worker_i(worker_i),
+    _g1h(g1)
+    {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->in_collection_set() && !r->continuesHumongous()) {
+      _cl.set_from(r);
+      r->set_next_filter_kind(HeapRegionDCTOC::OutOfRegionFilterKind);
+      _mr_bs->mod_oop_in_space_iterate(r, &_cl, true, true);
+    }
+    return false;
+  }
+};
+
+class VerifyRSCleanCardOopClosure: public OopClosure {
+  G1CollectedHeap* _g1;
+public:
+  VerifyRSCleanCardOopClosure(G1CollectedHeap* g1) : _g1(g1) {}
+
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  virtual void do_oop(oop* p) {
+    oop obj = *p;
+    HeapRegion* to = _g1->heap_region_containing(obj);
+    guarantee(to == NULL || !to->in_collection_set(),
+              "Missed a rem set member.");
+  }
+};
+
+HRInto_G1RemSet::HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs)
+  : G1RemSet(g1), _ct_bs(ct_bs), _g1p(_g1->g1_policy()),
+    _cg1r(g1->concurrent_g1_refine()),
+    _par_traversal_in_progress(false), _new_refs(NULL),
+    _cards_scanned(NULL), _total_cards_scanned(0)
+{
+  _seq_task = new SubTasksDone(NumSeqTasks);
+  _new_refs = NEW_C_HEAP_ARRAY(GrowableArray<oop*>*, ParallelGCThreads);
+}
+
+HRInto_G1RemSet::~HRInto_G1RemSet() {
+  delete _seq_task;
+}
+
+void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
+  if (_g1->is_in_g1_reserved(mr.start())) {
+    _n += (int) ((mr.byte_size() / CardTableModRefBS::card_size));
+    if (_start_first == NULL) _start_first = mr.start();
+  }
+}
+
+class ScanRSClosure : public HeapRegionClosure {
+  size_t _cards_done, _cards;
+  G1CollectedHeap* _g1h;
+  OopsInHeapRegionClosure* _oc;
+  G1BlockOffsetSharedArray* _bot_shared;
+  CardTableModRefBS *_ct_bs;
+  int _worker_i;
+  bool _try_claimed;
+public:
+  ScanRSClosure(OopsInHeapRegionClosure* oc, int worker_i) :
+    _oc(oc),
+    _cards(0),
+    _cards_done(0),
+    _worker_i(worker_i),
+    _try_claimed(false)
+  {
+    _g1h = G1CollectedHeap::heap();
+    _bot_shared = _g1h->bot_shared();
+    _ct_bs = (CardTableModRefBS*) (_g1h->barrier_set());
+  }
+
+  void set_try_claimed() { _try_claimed = true; }
+
+  void scanCard(size_t index, HeapRegion *r) {
+    _cards_done++;
+    DirtyCardToOopClosure* cl =
+      r->new_dcto_closure(_oc,
+                         CardTableModRefBS::Precise,
+                         HeapRegionDCTOC::IntoCSFilterKind);
+
+    // Set the "from" region in the closure.
+    _oc->set_region(r);
+    HeapWord* card_start = _bot_shared->address_for_index(index);
+    HeapWord* card_end = card_start + G1BlockOffsetSharedArray::N_words;
+    Space *sp = SharedHeap::heap()->space_containing(card_start);
+    MemRegion sm_region;
+    if (ParallelGCThreads > 0) {
+      // first find the used area
+      sm_region = sp->used_region_at_save_marks();
+    } else {
+      // The closure is not idempotent.  We shouldn't look at objects
+      // allocated during the GC.
+      sm_region = sp->used_region_at_save_marks();
+    }
+    MemRegion mr = sm_region.intersection(MemRegion(card_start,card_end));
+    if (!mr.is_empty()) {
+      cl->do_MemRegion(mr);
+    }
+  }
+
+  void printCard(HeapRegion* card_region, size_t card_index,
+                 HeapWord* card_start) {
+    gclog_or_tty->print_cr("T %d Region [" PTR_FORMAT ", " PTR_FORMAT ") "
+                           "RS names card %p: "
+                           "[" PTR_FORMAT ", " PTR_FORMAT ")",
+                           _worker_i,
+                           card_region->bottom(), card_region->end(),
+                           card_index,
+                           card_start, card_start + G1BlockOffsetSharedArray::N_words);
+  }
+
+  bool doHeapRegion(HeapRegion* r) {
+    assert(r->in_collection_set(), "should only be called on elements of CS.");
+    HeapRegionRemSet* hrrs = r->rem_set();
+    if (hrrs->iter_is_complete()) return false; // All done.
+    if (!_try_claimed && !hrrs->claim_iter()) return false;
+    // If we didn't return above, then
+    //   _try_claimed || r->claim_iter()
+    // is true: either we're supposed to work on claimed-but-not-complete
+    // regions, or we successfully claimed the region.
+    HeapRegionRemSetIterator* iter = _g1h->rem_set_iterator(_worker_i);
+    hrrs->init_iterator(iter);
+    size_t card_index;
+    while (iter->has_next(card_index)) {
+      HeapWord* card_start = _g1h->bot_shared()->address_for_index(card_index);
+
+#if 0
+      gclog_or_tty->print("Rem set iteration yielded card [" PTR_FORMAT ", " PTR_FORMAT ").\n",
+                          card_start, card_start + CardTableModRefBS::card_size_in_words);
+#endif
+
+      HeapRegion* card_region = _g1h->heap_region_containing(card_start);
+      assert(card_region != NULL, "Yielding cards not in the heap?");
+      _cards++;
+
+      if (!card_region->in_collection_set()) {
+        // If the card is dirty, then we will scan it during updateRS.
+        if (!_ct_bs->is_card_claimed(card_index) &&
+            !_ct_bs->is_card_dirty(card_index)) {
+          assert(_ct_bs->is_card_clean(card_index) ||
+                 _ct_bs->is_card_claimed(card_index),
+                 "Card is either dirty, clean, or claimed");
+          if (_ct_bs->claim_card(card_index))
+            scanCard(card_index, card_region);
+        }
+      }
+    }
+    hrrs->set_iter_complete();
+    return false;
+  }
+  // Set all cards back to clean.
+  void cleanup() {_g1h->cleanUpCardTable();}
+  size_t cards_done() { return _cards_done;}
+  size_t cards_looked_up() { return _cards;}
+};
+
+// We want the parallel threads to start their scanning at
+// different collection set regions to avoid contention.
+// If we have:
+//          n collection set regions
+//          p threads
+// Then thread t will start at region t * floor (n/p)
+
+HeapRegion* HRInto_G1RemSet::calculateStartRegion(int worker_i) {
+  HeapRegion* result = _g1p->collection_set();
+  if (ParallelGCThreads > 0) {
+    size_t cs_size = _g1p->collection_set_size();
+    int n_workers = _g1->workers()->total_workers();
+    size_t cs_spans = cs_size / n_workers;
+    size_t ind      = cs_spans * worker_i;
+    for (size_t i = 0; i < ind; i++)
+      result = result->next_in_collection_set();
+  }
+  return result;
+}
+
+void HRInto_G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) {
+  double rs_time_start = os::elapsedTime();
+  HeapRegion *startRegion = calculateStartRegion(worker_i);
+
+  BufferingOopsInHeapRegionClosure boc(oc);
+  ScanRSClosure scanRScl(&boc, worker_i);
+  _g1->collection_set_iterate_from(startRegion, &scanRScl);
+  scanRScl.set_try_claimed();
+  _g1->collection_set_iterate_from(startRegion, &scanRScl);
+
+  boc.done();
+  double closure_app_time_sec = boc.closure_app_seconds();
+  double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) -
+    closure_app_time_sec;
+  double closure_app_time_ms = closure_app_time_sec * 1000.0;
+
+  assert( _cards_scanned != NULL, "invariant" );
+  _cards_scanned[worker_i] = scanRScl.cards_done();
+
+  _g1p->record_scan_rs_start_time(worker_i, rs_time_start * 1000.0);
+  _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
+  if (ParallelGCThreads > 0) {
+    // In this case, we called scanNewRefsRS and recorded the corresponding
+    // time.
+    double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i);
+    if (scan_new_refs_time_ms > 0.0) {
+      closure_app_time_ms += scan_new_refs_time_ms;
+    }
+  }
+  _g1p->record_obj_copy_time(worker_i, closure_app_time_ms);
+}
+
+void HRInto_G1RemSet::updateRS(int worker_i) {
+  ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
+
+  double start = os::elapsedTime();
+  _g1p->record_update_rs_start_time(worker_i, start * 1000.0);
+
+  if (G1RSBarrierUseQueue && !cg1r->do_traversal()) {
+    // Apply the appropriate closure to all remaining log entries.
+    _g1->iterate_dirty_card_closure(false, worker_i);
+    // Now there should be no dirty cards.
+    if (G1RSLogCheckCardTable) {
+      CountNonCleanMemRegionClosure cl(_g1);
+      _ct_bs->mod_card_iterate(&cl);
+      // XXX This isn't true any more: keeping cards of young regions
+      // marked dirty broke it.  Need some reasonable fix.
+      guarantee(cl.n() == 0, "Card table should be clean.");
+    }
+  } else {
+    UpdateRSOutOfRegionClosure update_rs(_g1, worker_i);
+    _g1->heap_region_iterate(&update_rs);
+    // We did a traversal; no further one is necessary.
+    if (G1RSBarrierUseQueue) {
+      assert(cg1r->do_traversal(), "Or we shouldn't have gotten here.");
+      cg1r->set_pya_cancel();
+    }
+    if (_cg1r->use_cache()) {
+      _cg1r->clear_and_record_card_counts();
+      _cg1r->clear_hot_cache();
+    }
+  }
+  _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
+}
+
+#ifndef PRODUCT
+class PrintRSClosure : public HeapRegionClosure {
+  int _count;
+public:
+  PrintRSClosure() : _count(0) {}
+  bool doHeapRegion(HeapRegion* r) {
+    HeapRegionRemSet* hrrs = r->rem_set();
+    _count += (int) hrrs->occupied();
+    if (hrrs->occupied() == 0) {
+      gclog_or_tty->print("Heap Region [" PTR_FORMAT ", " PTR_FORMAT ") "
+                          "has no remset entries\n",
+                          r->bottom(), r->end());
+    } else {
+      gclog_or_tty->print("Printing rem set for heap region [" PTR_FORMAT ", " PTR_FORMAT ")\n",
+                          r->bottom(), r->end());
+      r->print();
+      hrrs->print();
+      gclog_or_tty->print("\nDone printing rem set\n");
+    }
+    return false;
+  }
+  int occupied() {return _count;}
+};
+#endif
+
+class CountRSSizeClosure: public HeapRegionClosure {
+  size_t _n;
+  size_t _tot;
+  size_t _max;
+  HeapRegion* _max_r;
+  enum {
+    N = 20,
+    MIN = 6
+  };
+  int _histo[N];
+public:
+  CountRSSizeClosure() : _n(0), _tot(0), _max(0), _max_r(NULL) {
+    for (int i = 0; i < N; i++) _histo[i] = 0;
+  }
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->continuesHumongous()) {
+      size_t occ = r->rem_set()->occupied();
+      _n++;
+      _tot += occ;
+      if (occ > _max) {
+        _max = occ;
+        _max_r = r;
+      }
+      // Fit it into a histo bin.
+      int s = 1 << MIN;
+      int i = 0;
+      while (occ > (size_t) s && i < (N-1)) {
+        s = s << 1;
+        i++;
+      }
+      _histo[i]++;
+    }
+    return false;
+  }
+  size_t n() { return _n; }
+  size_t tot() { return _tot; }
+  size_t mx() { return _max; }
+  HeapRegion* mxr() { return _max_r; }
+  void print_histo() {
+    int mx = N;
+    while (mx >= 0) {
+      if (_histo[mx-1] > 0) break;
+      mx--;
+    }
+    gclog_or_tty->print_cr("Number of regions with given RS sizes:");
+    gclog_or_tty->print_cr("           <= %8d   %8d", 1 << MIN, _histo[0]);
+    for (int i = 1; i < mx-1; i++) {
+      gclog_or_tty->print_cr("  %8d  - %8d   %8d",
+                    (1 << (MIN + i - 1)) + 1,
+                    1 << (MIN + i),
+                    _histo[i]);
+    }
+    gclog_or_tty->print_cr("            > %8d   %8d", (1 << (MIN+mx-2))+1, _histo[mx-1]);
+  }
+};
+
+void
+HRInto_G1RemSet::scanNewRefsRS(OopsInHeapRegionClosure* oc,
+                                             int worker_i) {
+  double scan_new_refs_start_sec = os::elapsedTime();
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  CardTableModRefBS* ct_bs = (CardTableModRefBS*) (g1h->barrier_set());
+  while (_new_refs[worker_i]->is_nonempty()) {
+    oop* p = _new_refs[worker_i]->pop();
+    oop obj = *p;
+    // *p was in the collection set when p was pushed on "_new_refs", but
+    // another thread may have processed this location from an RS, so it
+    // might not point into the CS any longer.  If so, it's obviously been
+    // processed, and we don't need to do anything further.
+    if (g1h->obj_in_cs(obj)) {
+      HeapRegion* r = g1h->heap_region_containing(p);
+
+      DEBUG_ONLY(HeapRegion* to = g1h->heap_region_containing(obj));
+      assert(ParallelGCThreads > 1
+             || to->rem_set()->contains_reference(p),
+             "Invariant: pushed after being added."
+             "(Not reliable in parallel code.)");
+      oc->set_region(r);
+      // If "p" has already been processed concurrently, this is
+      // idempotent.
+      oc->do_oop(p);
+    }
+  }
+  _g1p->record_scan_new_refs_time(worker_i,
+                                  (os::elapsedTime() - scan_new_refs_start_sec)
+                                  * 1000.0);
+}
+
+void HRInto_G1RemSet::set_par_traversal(bool b) {
+  _par_traversal_in_progress = b;
+  HeapRegionRemSet::set_par_traversal(b);
+}
+
+void HRInto_G1RemSet::cleanupHRRS() {
+  HeapRegionRemSet::cleanup();
+}
+
+void
+HRInto_G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
+                                             int worker_i) {
+#if CARD_REPEAT_HISTO
+  ct_freq_update_histo_and_reset();
+#endif
+  if (worker_i == 0) {
+    _cg1r->clear_and_record_card_counts();
+  }
+
+  // Make this into a command-line flag...
+  if (G1RSCountHisto && (ParallelGCThreads == 0 || worker_i == 0)) {
+    CountRSSizeClosure count_cl;
+    _g1->heap_region_iterate(&count_cl);
+    gclog_or_tty->print_cr("Avg of %d RS counts is %f, max is %d, "
+                  "max region is " PTR_FORMAT,
+                  count_cl.n(), (float)count_cl.tot()/(float)count_cl.n(),
+                  count_cl.mx(), count_cl.mxr());
+    count_cl.print_histo();
+  }
+
+  if (ParallelGCThreads > 0) {
+    // This is a temporary change to serialize the update and scanning
+    // of remembered sets. There are some race conditions when this is
+    // done in parallel and they are causing failures. When we resolve
+    // said race conditions, we'll revert back to parallel remembered
+    // set updating and scanning. See CRs 6677707 and 6677708.
+    if (worker_i == 0) {
+      updateRS(worker_i);
+      scanNewRefsRS(oc, worker_i);
+      scanRS(oc, worker_i);
+    }
+  } else {
+    assert(worker_i == 0, "invariant");
+
+    updateRS(0);
+    scanRS(oc, 0);
+  }
+}
+
+void HRInto_G1RemSet::
+prepare_for_oops_into_collection_set_do() {
+#if G1_REM_SET_LOGGING
+  PrintRSClosure cl;
+  _g1->collection_set_iterate(&cl);
+#endif
+  cleanupHRRS();
+  ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
+  _g1->set_refine_cte_cl_concurrency(false);
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  dcqs.concatenate_logs();
+
+  assert(!_par_traversal_in_progress, "Invariant between iterations.");
+  if (ParallelGCThreads > 0) {
+    set_par_traversal(true);
+    int n_workers = _g1->workers()->total_workers();
+    _seq_task->set_par_threads(n_workers);
+    for (uint i = 0; i < ParallelGCThreads; i++)
+      _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray<oop*>(8192,true);
+
+    if (cg1r->do_traversal()) {
+      updateRS(0);
+      // Have to do this again after updaters
+      cleanupHRRS();
+    }
+  }
+  guarantee( _cards_scanned == NULL, "invariant" );
+  _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers());
+  _total_cards_scanned = 0;
+}
+
+
+class cleanUpIteratorsClosure : public HeapRegionClosure {
+  bool doHeapRegion(HeapRegion *r) {
+    HeapRegionRemSet* hrrs = r->rem_set();
+    hrrs->init_for_par_iteration();
+    return false;
+  }
+};
+
+void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() {
+  guarantee( _cards_scanned != NULL, "invariant" );
+  _total_cards_scanned = 0;
+  for (uint i = 0; i < n_workers(); ++i)
+    _total_cards_scanned += _cards_scanned[i];
+  FREE_C_HEAP_ARRAY(size_t, _cards_scanned);
+  _cards_scanned = NULL;
+  // Cleanup after copy
+#if G1_REM_SET_LOGGING
+  PrintRSClosure cl;
+  _g1->heap_region_iterate(&cl);
+#endif
+  _g1->set_refine_cte_cl_concurrency(true);
+  cleanUpIteratorsClosure iterClosure;
+  _g1->collection_set_iterate(&iterClosure);
+  // Set all cards back to clean.
+  _g1->cleanUpCardTable();
+  if (ParallelGCThreads > 0) {
+    ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
+    if (cg1r->do_traversal()) {
+      cg1r->cg1rThread()->set_do_traversal(false);
+    }
+    for (uint i = 0; i < ParallelGCThreads; i++) {
+      delete _new_refs[i];
+    }
+    set_par_traversal(false);
+  }
+  assert(!_par_traversal_in_progress, "Invariant between iterations.");
+}
+
+class UpdateRSObjectClosure: public ObjectClosure {
+  UpdateRSOopClosure* _update_rs_oop_cl;
+public:
+  UpdateRSObjectClosure(UpdateRSOopClosure* update_rs_oop_cl) :
+    _update_rs_oop_cl(update_rs_oop_cl) {}
+  void do_object(oop obj) {
+    obj->oop_iterate(_update_rs_oop_cl);
+  }
+
+};
+
+class ScrubRSClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
+  BitMap* _region_bm;
+  BitMap* _card_bm;
+  CardTableModRefBS* _ctbs;
+public:
+  ScrubRSClosure(BitMap* region_bm, BitMap* card_bm) :
+    _g1h(G1CollectedHeap::heap()),
+    _region_bm(region_bm), _card_bm(card_bm),
+    _ctbs(NULL)
+  {
+    ModRefBarrierSet* bs = _g1h->mr_bs();
+    guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
+    _ctbs = (CardTableModRefBS*)bs;
+  }
+
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->continuesHumongous()) {
+      r->rem_set()->scrub(_ctbs, _region_bm, _card_bm);
+    }
+    return false;
+  }
+};
+
+void HRInto_G1RemSet::scrub(BitMap* region_bm, BitMap* card_bm) {
+  ScrubRSClosure scrub_cl(region_bm, card_bm);
+  _g1->heap_region_iterate(&scrub_cl);
+}
+
+void HRInto_G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm,
+                                int worker_num, int claim_val) {
+  ScrubRSClosure scrub_cl(region_bm, card_bm);
+  _g1->heap_region_par_iterate_chunked(&scrub_cl, worker_num, claim_val);
+}
+
+
+class ConcRefineRegionClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
+  CardTableModRefBS* _ctbs;
+  ConcurrentGCThread* _cgc_thrd;
+  ConcurrentG1Refine* _cg1r;
+  unsigned _cards_processed;
+  UpdateRSOopClosure _update_rs_oop_cl;
+public:
+  ConcRefineRegionClosure(CardTableModRefBS* ctbs,
+                          ConcurrentG1Refine* cg1r,
+                          HRInto_G1RemSet* g1rs) :
+    _ctbs(ctbs), _cg1r(cg1r), _cgc_thrd(cg1r->cg1rThread()),
+    _update_rs_oop_cl(g1rs), _cards_processed(0),
+    _g1h(G1CollectedHeap::heap())
+  {}
+
+  bool doHeapRegion(HeapRegion* r) {
+    if (!r->in_collection_set() &&
+        !r->continuesHumongous() &&
+        !r->is_young()) {
+      _update_rs_oop_cl.set_from(r);
+      UpdateRSObjectClosure update_rs_obj_cl(&_update_rs_oop_cl);
+
+      // For each run of dirty card in the region:
+      //   1) Clear the cards.
+      //   2) Process the range corresponding to the run, adding any
+      //      necessary RS entries.
+      // 1 must precede 2, so that a concurrent modification redirties the
+      // card.  If a processing attempt does not succeed, because it runs
+      // into an unparseable region, we will do binary search to find the
+      // beginning of the next parseable region.
+      HeapWord* startAddr = r->bottom();
+      HeapWord* endAddr = r->used_region().end();
+      HeapWord* lastAddr;
+      HeapWord* nextAddr;
+
+      for (nextAddr = lastAddr = startAddr;
+           nextAddr < endAddr;
+           nextAddr = lastAddr) {
+        MemRegion dirtyRegion;
+
+        // Get and clear dirty region from card table
+        MemRegion next_mr(nextAddr, endAddr);
+        dirtyRegion =
+          _ctbs->dirty_card_range_after_reset(
+                           next_mr,
+                           true, CardTableModRefBS::clean_card_val());
+        assert(dirtyRegion.start() >= nextAddr,
+               "returned region inconsistent?");
+
+        if (!dirtyRegion.is_empty()) {
+          HeapWord* stop_point =
+            r->object_iterate_mem_careful(dirtyRegion,
+                                          &update_rs_obj_cl);
+          if (stop_point == NULL) {
+            lastAddr = dirtyRegion.end();
+            _cards_processed +=
+              (int) (dirtyRegion.word_size() / CardTableModRefBS::card_size_in_words);
+          } else {
+            // We're going to skip one or more cards that we can't parse.
+            HeapWord* next_parseable_card =
+              r->next_block_start_careful(stop_point);
+            // Round this up to a card boundary.
+            next_parseable_card =
+              _ctbs->addr_for(_ctbs->byte_after_const(next_parseable_card));
+            // Now we invalidate the intervening cards so we'll see them
+            // again.
+            MemRegion remaining_dirty =
+              MemRegion(stop_point, dirtyRegion.end());
+            MemRegion skipped =
+              MemRegion(stop_point, next_parseable_card);
+            _ctbs->invalidate(skipped.intersection(remaining_dirty));
+
+            // Now start up again where we can parse.
+            lastAddr = next_parseable_card;
+
+            // Count how many we did completely.
+            _cards_processed +=
+              (stop_point - dirtyRegion.start()) /
+              CardTableModRefBS::card_size_in_words;
+          }
+          // Allow interruption at regular intervals.
+          // (Might need to make them more regular, if we get big
+          // dirty regions.)
+          if (_cgc_thrd != NULL) {
+            if (_cgc_thrd->should_yield()) {
+              _cgc_thrd->yield();
+              switch (_cg1r->get_pya()) {
+              case PYA_continue:
+                // This may have changed: re-read.
+                endAddr = r->used_region().end();
+                continue;
+              case PYA_restart: case PYA_cancel:
+                return true;
+              }
+            }
+          }
+        } else {
+          break;
+        }
+      }
+    }
+    // A good yield opportunity.
+    if (_cgc_thrd != NULL) {
+      if (_cgc_thrd->should_yield()) {
+        _cgc_thrd->yield();
+        switch (_cg1r->get_pya()) {
+        case PYA_restart: case PYA_cancel:
+          return true;
+        default:
+          break;
+        }
+
+      }
+    }
+    return false;
+  }
+
+  unsigned cards_processed() { return _cards_processed; }
+};
+
+
+void HRInto_G1RemSet::concurrentRefinementPass(ConcurrentG1Refine* cg1r) {
+  ConcRefineRegionClosure cr_cl(ct_bs(), cg1r, this);
+  _g1->heap_region_iterate(&cr_cl);
+  _conc_refine_traversals++;
+  _conc_refine_cards += cr_cl.cards_processed();
+}
+
+static IntHistogram out_of_histo(50, 50);
+
+
+
+void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {
+  // If the card is no longer dirty, nothing to do.
+  if (*card_ptr != CardTableModRefBS::dirty_card_val()) return;
+
+  // Construct the region representing the card.
+  HeapWord* start = _ct_bs->addr_for(card_ptr);
+  // And find the region containing it.
+  HeapRegion* r = _g1->heap_region_containing(start);
+  if (r == NULL) {
+    guarantee(_g1->is_in_permanent(start), "Or else where?");
+    return;  // Not in the G1 heap (might be in perm, for example.)
+  }
+  // Why do we have to check here whether a card is on a young region,
+  // given that we dirty young regions and, as a result, the
+  // post-barrier is supposed to filter them out and never to enqueue
+  // them? When we allocate a new region as the "allocation region" we
+  // actually dirty its cards after we release the lock, since card
+  // dirtying while holding the lock was a performance bottleneck. So,
+  // as a result, it is possible for other threads to actually
+  // allocate objects in the region (after the acquire the lock)
+  // before all the cards on the region are dirtied. This is unlikely,
+  // and it doesn't happen often, but it can happen. So, the extra
+  // check below filters out those cards.
+  if (r->is_young()) {
+    return;
+  }
+  // While we are processing RSet buffers during the collection, we
+  // actually don't want to scan any cards on the collection set,
+  // since we don't want to update remebered sets with entries that
+  // point into the collection set, given that live objects from the
+  // collection set are about to move and such entries will be stale
+  // very soon. This change also deals with a reliability issue which
+  // involves scanning a card in the collection set and coming across
+  // an array that was being chunked and looking malformed. Note,
+  // however, that if evacuation fails, we have to scan any objects
+  // that were not moved and create any missing entries.
+  if (r->in_collection_set()) {
+    return;
+  }
+
+  // Should we defer it?
+  if (_cg1r->use_cache()) {
+    card_ptr = _cg1r->cache_insert(card_ptr);
+    // If it was not an eviction, nothing to do.
+    if (card_ptr == NULL) return;
+
+    // OK, we have to reset the card start, region, etc.
+    start = _ct_bs->addr_for(card_ptr);
+    r = _g1->heap_region_containing(start);
+    if (r == NULL) {
+      guarantee(_g1->is_in_permanent(start), "Or else where?");
+      return;  // Not in the G1 heap (might be in perm, for example.)
+    }
+    guarantee(!r->is_young(), "It was evicted in the current minor cycle.");
+  }
+
+  HeapWord* end   = _ct_bs->addr_for(card_ptr + 1);
+  MemRegion dirtyRegion(start, end);
+
+#if CARD_REPEAT_HISTO
+  init_ct_freq_table(_g1->g1_reserved_obj_bytes());
+  ct_freq_note_card(_ct_bs->index_for(start));
+#endif
+
+  UpdateRSOopClosure update_rs_oop_cl(this, worker_i);
+  update_rs_oop_cl.set_from(r);
+  FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r, &update_rs_oop_cl);
+
+  // Undirty the card.
+  *card_ptr = CardTableModRefBS::clean_card_val();
+  // We must complete this write before we do any of the reads below.
+  OrderAccess::storeload();
+  // And process it, being careful of unallocated portions of TLAB's.
+  HeapWord* stop_point =
+    r->oops_on_card_seq_iterate_careful(dirtyRegion,
+                                        &filter_then_update_rs_oop_cl);
+  // If stop_point is non-null, then we encountered an unallocated region
+  // (perhaps the unfilled portion of a TLAB.)  For now, we'll dirty the
+  // card and re-enqueue: if we put off the card until a GC pause, then the
+  // unallocated portion will be filled in.  Alternatively, we might try
+  // the full complexity of the technique used in "regular" precleaning.
+  if (stop_point != NULL) {
+    // The card might have gotten re-dirtied and re-enqueued while we
+    // worked.  (In fact, it's pretty likely.)
+    if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
+      *card_ptr = CardTableModRefBS::dirty_card_val();
+      MutexLockerEx x(Shared_DirtyCardQ_lock,
+                      Mutex::_no_safepoint_check_flag);
+      DirtyCardQueue* sdcq =
+        JavaThread::dirty_card_queue_set().shared_dirty_card_queue();
+      sdcq->enqueue(card_ptr);
+    }
+  } else {
+    out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region());
+    _conc_refine_cards++;
+  }
+}
+
+class HRRSStatsIter: public HeapRegionClosure {
+  size_t _occupied;
+  size_t _total_mem_sz;
+  size_t _max_mem_sz;
+  HeapRegion* _max_mem_sz_region;
+public:
+  HRRSStatsIter() :
+    _occupied(0),
+    _total_mem_sz(0),
+    _max_mem_sz(0),
+    _max_mem_sz_region(NULL)
+  {}
+
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->continuesHumongous()) return false;
+    size_t mem_sz = r->rem_set()->mem_size();
+    if (mem_sz > _max_mem_sz) {
+      _max_mem_sz = mem_sz;
+      _max_mem_sz_region = r;
+    }
+    _total_mem_sz += mem_sz;
+    size_t occ = r->rem_set()->occupied();
+    _occupied += occ;
+    return false;
+  }
+  size_t total_mem_sz() { return _total_mem_sz; }
+  size_t max_mem_sz() { return _max_mem_sz; }
+  size_t occupied() { return _occupied; }
+  HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; }
+};
+
+void HRInto_G1RemSet::print_summary_info() {
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  ConcurrentG1RefineThread* cg1r_thrd =
+    g1->concurrent_g1_refine()->cg1rThread();
+
+#if CARD_REPEAT_HISTO
+  gclog_or_tty->print_cr("\nG1 card_repeat count histogram: ");
+  gclog_or_tty->print_cr("  # of repeats --> # of cards with that number.");
+  card_repeat_count.print_on(gclog_or_tty);
+#endif
+
+  if (FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT) {
+    gclog_or_tty->print_cr("\nG1 rem-set out-of-region histogram: ");
+    gclog_or_tty->print_cr("  # of CS ptrs --> # of cards with that number.");
+    out_of_histo.print_on(gclog_or_tty);
+  }
+  gclog_or_tty->print_cr("\n Concurrent RS processed %d cards in "
+                "%5.2fs.",
+                _conc_refine_cards, cg1r_thrd->vtime_accum());
+
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  jint tot_processed_buffers =
+    dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread();
+  gclog_or_tty->print_cr("  Of %d completed buffers:", tot_processed_buffers);
+  gclog_or_tty->print_cr("     %8d (%5.1f%%) by conc RS thread.",
+                dcqs.processed_buffers_rs_thread(),
+                100.0*(float)dcqs.processed_buffers_rs_thread()/
+                (float)tot_processed_buffers);
+  gclog_or_tty->print_cr("     %8d (%5.1f%%) by mutator threads.",
+                dcqs.processed_buffers_mut(),
+                100.0*(float)dcqs.processed_buffers_mut()/
+                (float)tot_processed_buffers);
+  gclog_or_tty->print_cr("   Did %d concurrent refinement traversals.",
+                _conc_refine_traversals);
+  if (!G1RSBarrierUseQueue) {
+    gclog_or_tty->print_cr("   Scanned %8.2f cards/traversal.",
+                  _conc_refine_traversals > 0 ?
+                  (float)_conc_refine_cards/(float)_conc_refine_traversals :
+                  0);
+  }
+  gclog_or_tty->print_cr("");
+  if (G1UseHRIntoRS) {
+    HRRSStatsIter blk;
+    g1->heap_region_iterate(&blk);
+    gclog_or_tty->print_cr("  Total heap region rem set sizes = " SIZE_FORMAT "K."
+                           "  Max = " SIZE_FORMAT "K.",
+                           blk.total_mem_sz()/K, blk.max_mem_sz()/K);
+    gclog_or_tty->print_cr("  Static structures = " SIZE_FORMAT "K,"
+                           " free_lists = " SIZE_FORMAT "K.",
+                           HeapRegionRemSet::static_mem_size()/K,
+                           HeapRegionRemSet::fl_mem_size()/K);
+    gclog_or_tty->print_cr("    %d occupied cards represented.",
+                           blk.occupied());
+    gclog_or_tty->print_cr("    Max sz region = [" PTR_FORMAT ", " PTR_FORMAT " )"
+                           " %s, cap = " SIZE_FORMAT "K, occ = " SIZE_FORMAT "K.",
+                           blk.max_mem_sz_region()->bottom(), blk.max_mem_sz_region()->end(),
+                           (blk.max_mem_sz_region()->popular() ? "POP" : ""),
+                           (blk.max_mem_sz_region()->rem_set()->mem_size() + K - 1)/K,
+                           (blk.max_mem_sz_region()->rem_set()->occupied() + K - 1)/K);
+    gclog_or_tty->print_cr("    Did %d coarsenings.",
+                  HeapRegionRemSet::n_coarsenings());
+
+  }
+}
+void HRInto_G1RemSet::prepare_for_verify() {
+  if (G1HRRSFlushLogBuffersOnVerify && VerifyBeforeGC && !_g1->full_collection()) {
+    cleanupHRRS();
+    _g1->set_refine_cte_cl_concurrency(false);
+    if (SafepointSynchronize::is_at_safepoint()) {
+      DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+      dcqs.concatenate_logs();
+    }
+    bool cg1r_use_cache = _cg1r->use_cache();
+    _cg1r->set_use_cache(false);
+    updateRS(0);
+    _cg1r->set_use_cache(cg1r_use_cache);
+  }
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1RemSet.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,216 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// A G1RemSet provides ways of iterating over pointers into a selected
+// collection set.
+
+class G1CollectedHeap;
+class CardTableModRefBarrierSet;
+class HRInto_G1RemSet;
+class ConcurrentG1Refine;
+
+class G1RemSet {
+protected:
+  G1CollectedHeap* _g1;
+
+  unsigned _conc_refine_traversals;
+  unsigned _conc_refine_cards;
+
+  size_t n_workers();
+
+public:
+  G1RemSet(G1CollectedHeap* g1) :
+    _g1(g1), _conc_refine_traversals(0), _conc_refine_cards(0)
+  {}
+
+  // Invoke "blk->do_oop" on all pointers into the CS in object in regions
+  // outside the CS (having invoked "blk->set_region" to set the "from"
+  // region correctly beforehand.) The "worker_i" param is for the
+  // parallel case where the number of the worker thread calling this
+  // function can be helpful in partitioning the work to be done. It
+  // should be the same as the "i" passed to the calling thread's
+  // work(i) function. In the sequential case this param will be ingored.
+  virtual void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
+                                           int worker_i) = 0;
+
+  // Prepare for and cleanup after an oops_into_collection_set_do
+  // call.  Must call each of these once before and after (in sequential
+  // code) any threads call oops into collection set do.  (This offers an
+  // opportunity to sequential setup and teardown of structures needed by a
+  // parallel iteration over the CS's RS.)
+  virtual void prepare_for_oops_into_collection_set_do() = 0;
+  virtual void cleanup_after_oops_into_collection_set_do() = 0;
+
+  // If "this" is of the given subtype, return "this", else "NULL".
+  virtual HRInto_G1RemSet* as_HRInto_G1RemSet() { return NULL; }
+
+  // Record, if necessary, the fact that *p (where "p" is in region "from")
+  // has changed to its new value.
+  virtual void write_ref(HeapRegion* from, oop* p) = 0;
+  virtual void par_write_ref(HeapRegion* from, oop* p, int tid) = 0;
+
+  // Requires "region_bm" and "card_bm" to be bitmaps with 1 bit per region
+  // or card, respectively, such that a region or card with a corresponding
+  // 0 bit contains no part of any live object.  Eliminates any remembered
+  // set entries that correspond to dead heap ranges.
+  virtual void scrub(BitMap* region_bm, BitMap* card_bm) = 0;
+  // Like the above, but assumes is called in parallel: "worker_num" is the
+  // parallel thread id of the current thread, and "claim_val" is the
+  // value that should be used to claim heap regions.
+  virtual void scrub_par(BitMap* region_bm, BitMap* card_bm,
+                         int worker_num, int claim_val) = 0;
+
+  // Do any "refinement" activity that might be appropriate to the given
+  // G1RemSet.  If "refinement" has iterateive "passes", do one pass.
+  // If "t" is non-NULL, it is the thread performing the refinement.
+  // Default implementation does nothing.
+  virtual void concurrentRefinementPass(ConcurrentG1Refine* cg1r) {}
+
+  // Refine the card corresponding to "card_ptr".  If "sts" is non-NULL,
+  // join and leave around parts that must be atomic wrt GC.  (NULL means
+  // being done at a safepoint.)
+  virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {}
+
+  unsigned conc_refine_cards() { return _conc_refine_cards; }
+
+  // Print any relevant summary info.
+  virtual void print_summary_info() {}
+
+  // Prepare remebered set for verification.
+  virtual void prepare_for_verify() {};
+};
+
+
+// The simplest possible G1RemSet: iterates over all objects in non-CS
+// regions, searching for pointers into the CS.
+class StupidG1RemSet: public G1RemSet {
+public:
+  StupidG1RemSet(G1CollectedHeap* g1) : G1RemSet(g1) {}
+
+  void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
+                                   int worker_i);
+
+  void prepare_for_oops_into_collection_set_do() {}
+  void cleanup_after_oops_into_collection_set_do() {}
+
+  // Nothing is necessary in the version below.
+  void write_ref(HeapRegion* from, oop* p) {}
+  void par_write_ref(HeapRegion* from, oop* p, int tid) {}
+
+  void scrub(BitMap* region_bm, BitMap* card_bm) {}
+  void scrub_par(BitMap* region_bm, BitMap* card_bm,
+                 int worker_num, int claim_val) {}
+
+};
+
+// A G1RemSet in which each heap region has a rem set that records the
+// external heap references into it.  Uses a mod ref bs to track updates,
+// so that they can be used to update the individual region remsets.
+
+class HRInto_G1RemSet: public G1RemSet {
+protected:
+  enum SomePrivateConstants {
+    UpdateRStoMergeSync  = 0,
+    MergeRStoDoDirtySync = 1,
+    DoDirtySync          = 2,
+    LastSync             = 3,
+
+    SeqTask              = 0,
+    NumSeqTasks          = 1
+  };
+
+  CardTableModRefBS*             _ct_bs;
+  SubTasksDone*                  _seq_task;
+  G1CollectorPolicy* _g1p;
+
+  ConcurrentG1Refine* _cg1r;
+
+  size_t*             _cards_scanned;
+  size_t              _total_cards_scanned;
+
+  // _par_traversal_in_progress is "true" iff a parallel traversal is in
+  // progress.  If so, then cards added to remembered sets should also have
+  // their references into the collection summarized in "_new_refs".
+  bool _par_traversal_in_progress;
+  void set_par_traversal(bool b);
+  GrowableArray<oop*>** _new_refs;
+
+public:
+  // This is called to reset dual hash tables after the gc pause
+  // is finished and the initial hash table is no longer being
+  // scanned.
+  void cleanupHRRS();
+
+  HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs);
+  ~HRInto_G1RemSet();
+
+  void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
+                                   int worker_i);
+
+  void prepare_for_oops_into_collection_set_do();
+  void cleanup_after_oops_into_collection_set_do();
+  void scanRS(OopsInHeapRegionClosure* oc, int worker_i);
+  void scanNewRefsRS(OopsInHeapRegionClosure* oc, int worker_i);
+  void updateRS(int worker_i);
+  HeapRegion* calculateStartRegion(int i);
+
+  HRInto_G1RemSet* as_HRInto_G1RemSet() { return this; }
+
+  CardTableModRefBS* ct_bs() { return _ct_bs; }
+  size_t cardsScanned() { return _total_cards_scanned; }
+
+  // Record, if necessary, the fact that *p (where "p" is in region "from",
+  // which is required to be non-NULL) has changed to a new non-NULL value.
+  inline void write_ref(HeapRegion* from, oop* p);
+  // The "_nv" version is the same; it exists just so that it is not virtual.
+  inline void write_ref_nv(HeapRegion* from, oop* p);
+
+  inline bool self_forwarded(oop obj);
+  inline void par_write_ref(HeapRegion* from, oop* p, int tid);
+
+  void scrub(BitMap* region_bm, BitMap* card_bm);
+  void scrub_par(BitMap* region_bm, BitMap* card_bm,
+                 int worker_num, int claim_val);
+
+  virtual void concurrentRefinementPass(ConcurrentG1Refine* t);
+  virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i);
+
+  virtual void print_summary_info();
+  virtual void prepare_for_verify();
+};
+
+#define G1_REM_SET_LOGGING 0
+
+class CountNonCleanMemRegionClosure: public MemRegionClosure {
+  G1CollectedHeap* _g1;
+  int _n;
+  HeapWord* _start_first;
+public:
+  CountNonCleanMemRegionClosure(G1CollectedHeap* g1) :
+    _g1(g1), _n(0), _start_first(NULL)
+  {}
+  void do_MemRegion(MemRegion mr);
+  int n() { return _n; };
+  HeapWord* start_first() { return _start_first; }
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+inline size_t G1RemSet::n_workers() {
+  if (_g1->workers() != NULL) {
+    return _g1->workers()->total_workers();
+  } else {
+    return 1;
+  }
+}
+
+inline void HRInto_G1RemSet::write_ref_nv(HeapRegion* from, oop* p) {
+  oop obj = *p;
+  assert(from != NULL && from->is_in_reserved(p),
+         "p is not in a from");
+  HeapRegion* to = _g1->heap_region_containing(obj);
+  if (from != to && to != NULL) {
+    if (!to->popular() && !from->is_survivor()) {
+#if G1_REM_SET_LOGGING
+      gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS"
+                             " for region [" PTR_FORMAT ", " PTR_FORMAT ")",
+                             p, obj,
+                             to->bottom(), to->end());
+#endif
+      assert(to->rem_set() != NULL, "Need per-region 'into' remsets.");
+      if (to->rem_set()->add_reference(p)) {
+        _g1->schedule_popular_region_evac(to);
+      }
+    }
+  }
+}
+
+inline void HRInto_G1RemSet::write_ref(HeapRegion* from, oop* p) {
+  write_ref_nv(from, p);
+}
+
+inline bool HRInto_G1RemSet::self_forwarded(oop obj) {
+  bool result =  (obj->is_forwarded() && (obj->forwardee()== obj));
+  return result;
+}
+
+inline void HRInto_G1RemSet::par_write_ref(HeapRegion* from, oop* p, int tid) {
+  oop obj = *p;
+#ifdef ASSERT
+  // can't do because of races
+  // assert(obj == NULL || obj->is_oop(), "expected an oop");
+
+  // Do the safe subset of is_oop
+  if (obj != NULL) {
+#ifdef CHECK_UNHANDLED_OOPS
+    oopDesc* o = obj.obj();
+#else
+    oopDesc* o = obj;
+#endif // CHECK_UNHANDLED_OOPS
+    assert((intptr_t)o % MinObjAlignmentInBytes == 0, "not oop aligned");
+    assert(Universe::heap()->is_in_reserved(obj), "must be in heap");
+  }
+#endif // ASSERT
+  assert(from == NULL || from->is_in_reserved(p),
+         "p is not in from");
+  HeapRegion* to = _g1->heap_region_containing(obj);
+  // The test below could be optimized by applying a bit op to to and from.
+  if (to != NULL && from != NULL && from != to) {
+    if (!to->popular() && !from->is_survivor()) {
+#if G1_REM_SET_LOGGING
+      gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS"
+                             " for region [" PTR_FORMAT ", " PTR_FORMAT ")",
+                             p, obj,
+                             to->bottom(), to->end());
+#endif
+      assert(to->rem_set() != NULL, "Need per-region 'into' remsets.");
+      if (to->rem_set()->add_reference(p, tid)) {
+        _g1->schedule_popular_region_evac(to);
+      }
+    }
+    // There is a tricky infinite loop if we keep pushing
+    // self forwarding pointers onto our _new_refs list.
+    if (_par_traversal_in_progress &&
+        to->in_collection_set() && !self_forwarded(obj)) {
+      _new_refs[tid]->push(p);
+    }
+  }
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,150 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1SATBCardTableModRefBS.cpp.incl"
+
+G1SATBCardTableModRefBS::G1SATBCardTableModRefBS(MemRegion whole_heap,
+                                                 int max_covered_regions) :
+    CardTableModRefBSForCTRS(whole_heap, max_covered_regions)
+{
+  _kind = G1SATBCT;
+}
+
+
+void G1SATBCardTableModRefBS::enqueue(oop pre_val) {
+  if (!JavaThread::satb_mark_queue_set().active()) return;
+  Thread* thr = Thread::current();
+  if (thr->is_Java_thread()) {
+    JavaThread* jt = (JavaThread*)thr;
+    jt->satb_mark_queue().enqueue(pre_val);
+  } else {
+    MutexLocker x(Shared_SATB_Q_lock);
+    JavaThread::satb_mark_queue_set().shared_satb_queue()->enqueue(pre_val);
+  }
+}
+
+// When we know the current java thread:
+void
+G1SATBCardTableModRefBS::write_ref_field_pre_static(void* field,
+                                                    oop newVal,
+                                                    JavaThread* jt) {
+  if (!JavaThread::satb_mark_queue_set().active()) return;
+  assert(!UseCompressedOops, "Else will need to modify this to deal with narrowOop");
+  oop preVal = *(oop*)field;
+  if (preVal != NULL) {
+    jt->satb_mark_queue().enqueue(preVal);
+  }
+}
+
+void
+G1SATBCardTableModRefBS::write_ref_array_pre(MemRegion mr) {
+  if (!JavaThread::satb_mark_queue_set().active()) return;
+  assert(!UseCompressedOops, "Else will need to modify this to deal with narrowOop");
+  oop* elem_ptr = (oop*)mr.start();
+  while ((HeapWord*)elem_ptr < mr.end()) {
+    oop elem = *elem_ptr;
+    if (elem != NULL) enqueue(elem);
+    elem_ptr++;
+  }
+}
+
+
+
+G1SATBCardTableLoggingModRefBS::
+G1SATBCardTableLoggingModRefBS(MemRegion whole_heap,
+                               int max_covered_regions) :
+  G1SATBCardTableModRefBS(whole_heap, max_covered_regions),
+  _dcqs(JavaThread::dirty_card_queue_set())
+{
+  _kind = G1SATBCTLogging;
+}
+
+void
+G1SATBCardTableLoggingModRefBS::write_ref_field_work(void* field,
+                                                     oop new_val) {
+  jbyte* byte = byte_for(field);
+  if (*byte != dirty_card) {
+    *byte = dirty_card;
+    Thread* thr = Thread::current();
+    if (thr->is_Java_thread()) {
+      JavaThread* jt = (JavaThread*)thr;
+      jt->dirty_card_queue().enqueue(byte);
+    } else {
+      MutexLockerEx x(Shared_DirtyCardQ_lock,
+                      Mutex::_no_safepoint_check_flag);
+      _dcqs.shared_dirty_card_queue()->enqueue(byte);
+    }
+  }
+}
+
+void
+G1SATBCardTableLoggingModRefBS::write_ref_field_static(void* field,
+                                                       oop new_val) {
+  uintptr_t field_uint = (uintptr_t)field;
+  uintptr_t new_val_uint = (uintptr_t)new_val;
+  uintptr_t comb = field_uint ^ new_val_uint;
+  comb = comb >> HeapRegion::LogOfHRGrainBytes;
+  if (comb == 0) return;
+  if (new_val == NULL) return;
+  // Otherwise, log it.
+  G1SATBCardTableLoggingModRefBS* g1_bs =
+    (G1SATBCardTableLoggingModRefBS*)Universe::heap()->barrier_set();
+  g1_bs->write_ref_field_work(field, new_val);
+}
+
+void
+G1SATBCardTableLoggingModRefBS::invalidate(MemRegion mr, bool whole_heap) {
+  jbyte* byte = byte_for(mr.start());
+  jbyte* last_byte = byte_for(mr.last());
+  Thread* thr = Thread::current();
+  if (whole_heap) {
+    while (byte <= last_byte) {
+      *byte = dirty_card;
+      byte++;
+    }
+  } else {
+    // Enqueue if necessary.
+    if (thr->is_Java_thread()) {
+      JavaThread* jt = (JavaThread*)thr;
+      while (byte <= last_byte) {
+        if (*byte != dirty_card) {
+          *byte = dirty_card;
+          jt->dirty_card_queue().enqueue(byte);
+        }
+        byte++;
+      }
+    } else {
+      MutexLockerEx x(Shared_DirtyCardQ_lock,
+                      Mutex::_no_safepoint_check_flag);
+      while (byte <= last_byte) {
+        if (*byte != dirty_card) {
+          *byte = dirty_card;
+          _dcqs.shared_dirty_card_queue()->enqueue(byte);
+        }
+        byte++;
+      }
+    }
+  }
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#ifndef SERIALGC
+
+class DirtyCardQueueSet;
+
+// This barrier is specialized to use a logging barrier to support
+// snapshot-at-the-beginning marking.
+
+class G1SATBCardTableModRefBS: public CardTableModRefBSForCTRS {
+private:
+  // Add "pre_val" to a set of objects that may have been disconnected from the
+  // pre-marking object graph.
+  static void enqueue(oop pre_val);
+
+public:
+  G1SATBCardTableModRefBS(MemRegion whole_heap,
+                          int max_covered_regions);
+
+  bool is_a(BarrierSet::Name bsn) {
+    return bsn == BarrierSet::G1SATBCT || CardTableModRefBS::is_a(bsn);
+  }
+
+  virtual bool has_write_ref_pre_barrier() { return true; }
+
+  // This notes that we don't need to access any BarrierSet data
+  // structures, so this can be called from a static context.
+  static void write_ref_field_pre_static(void* field, oop newVal) {
+    assert(!UseCompressedOops, "Else needs to be templatized");
+    oop preVal = *((oop*)field);
+    if (preVal != NULL) {
+      enqueue(preVal);
+    }
+  }
+
+  // When we know the current java thread:
+  static void write_ref_field_pre_static(void* field, oop newVal,
+                                         JavaThread* jt);
+
+  // We export this to make it available in cases where the static
+  // type of the barrier set is known.  Note that it is non-virtual.
+  inline void inline_write_ref_field_pre(void* field, oop newVal) {
+    write_ref_field_pre_static(field, newVal);
+  }
+
+  // This is the more general virtual version.
+  void write_ref_field_pre_work(void* field, oop new_val) {
+    inline_write_ref_field_pre(field, new_val);
+  }
+
+  virtual void write_ref_array_pre(MemRegion mr);
+
+};
+
+// Adds card-table logging to the post-barrier.
+// Usual invariant: all dirty cards are logged in the DirtyCardQueueSet.
+class G1SATBCardTableLoggingModRefBS: public G1SATBCardTableModRefBS {
+ private:
+  DirtyCardQueueSet& _dcqs;
+ public:
+  G1SATBCardTableLoggingModRefBS(MemRegion whole_heap,
+                                 int max_covered_regions);
+
+  bool is_a(BarrierSet::Name bsn) {
+    return bsn == BarrierSet::G1SATBCTLogging ||
+      G1SATBCardTableModRefBS::is_a(bsn);
+  }
+
+  void write_ref_field_work(void* field, oop new_val);
+
+  // Can be called from static contexts.
+  static void write_ref_field_static(void* field, oop new_val);
+
+  // NB: if you do a whole-heap invalidation, the "usual invariant" defined
+  // above no longer applies.
+  void invalidate(MemRegion mr, bool whole_heap = false);
+
+  void write_region_work(MemRegion mr)    { invalidate(mr); }
+  void write_ref_array_work(MemRegion mr) { invalidate(mr); }
+
+
+};
+
+
+#endif // SERIALGC
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1_globals.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1_globals.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1_globals.cpp.incl"
+
+G1_FLAGS(MATERIALIZE_DEVELOPER_FLAG, MATERIALIZE_PD_DEVELOPER_FLAG, \
+         MATERIALIZE_PRODUCT_FLAG, MATERIALIZE_PD_PRODUCT_FLAG,     \
+         MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_NOTPRODUCT_FLAG,  \
+         MATERIALIZE_MANAGEABLE_FLAG, MATERIALIZE_PRODUCT_RW_FLAG)
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1_globals.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,287 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//
+// Defines all globals flags used by the garbage-first compiler.
+//
+
+#define G1_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct, manageable, product_rw) \
+                                                                            \
+  product(intx, ParallelGCG1AllocBufferSize, 4*K,                           \
+          "Size of parallel G1 allocation buffers in to-space.")            \
+                                                                            \
+  product(intx, G1TimeSliceMS, 500,                                         \
+          "Time slice for MMU specification")                               \
+                                                                            \
+  product(intx, G1MaxPauseTimeMS, 200,                                      \
+          "Max GC time per MMU time slice")                                 \
+                                                                            \
+  product(intx, G1ConfidencePerc, 50,                                       \
+          "Confidence level for MMU/pause predictions")                     \
+                                                                            \
+  product(intx, G1MarkingOverheadPerc, 0,                                   \
+          "Overhead of concurrent marking")                                 \
+                                                                            \
+  product(bool, G1AccountConcurrentOverhead, false,                         \
+          "Whether soft real-time compliance in G1 will take into account"  \
+          "concurrent overhead")                                            \
+                                                                            \
+  product(intx, G1YoungGenSize, 0,                                          \
+          "Size of the G1 young generation, 0 is the adaptive policy")      \
+                                                                            \
+  product(bool, G1Gen, true,                                                \
+          "If true, it will enable the generational G1")                    \
+                                                                            \
+  develop(intx, G1GCPct, 10,                                                \
+          "The desired percent time spent on GC")                           \
+                                                                            \
+  product(intx, G1PolicyVerbose, 0,                                         \
+          "The verbosity level on G1 policy decisions")                     \
+                                                                            \
+  develop(bool, G1UseHRIntoRS, true,                                        \
+          "Determines whether the 'advanced' HR Into rem set is used.")     \
+                                                                            \
+  product(bool, G1VerifyRemSet, false,                                      \
+          "If true, verify the rem set functioning at each GC")             \
+                                                                            \
+  product(bool, G1VerifyConcMark, false,                                    \
+          "If true, verify the conc marking code at full GC time")          \
+                                                                            \
+  develop(intx, G1MarkingVerboseLevel, 0,                                   \
+          "Level (0-4) of verboseness of the marking code")                 \
+                                                                            \
+  develop(bool, G1VerifyConcMarkPrintReachable, true,                       \
+          "If conc mark verification fails, print reachable objects")       \
+                                                                            \
+  develop(bool, G1TraceMarkStackOverflow, false,                            \
+          "If true, extra debugging code for CM restart for ovflw.")        \
+                                                                            \
+  product(bool, G1VerifyMarkingInEvac, false,                               \
+          "If true, verify marking info during evacuation")                 \
+                                                                            \
+  develop(intx, G1PausesBtwnConcMark, -1,                                   \
+          "If positive, fixed number of pauses between conc markings")      \
+                                                                            \
+  product(intx, G1EfficiencyPctCausesMark, 80,                              \
+          "The cum gc efficiency since mark fall-off that causes "          \
+          "new marking")                                                    \
+                                                                            \
+  product(bool, TraceConcurrentMark, false,                                 \
+          "Trace concurrent mark")                                          \
+                                                                            \
+  product(bool, SummarizeG1ConcMark, false,                                 \
+          "Summarize concurrent mark info")                                 \
+                                                                            \
+  product(bool, SummarizeG1RSStats, false,                                  \
+          "Summarize remembered set processing info")                       \
+                                                                            \
+  product(bool, SummarizeG1ZFStats, false,                                  \
+          "Summarize zero-filling info")                                    \
+                                                                            \
+  product(bool, TraceG1Refine, false,                                       \
+          "Trace G1 concurrent refinement")                                 \
+                                                                            \
+  develop(bool, G1ConcMark, true,                                           \
+          "If true, run concurrent marking for G1")                         \
+                                                                            \
+  product(intx, G1CMStackSize, 2 * 1024 * 1024,                             \
+          "Size of the mark stack for concurrent marking.")                 \
+                                                                            \
+  product(intx, G1CMRegionStackSize, 1024 * 1024,                           \
+          "Size of the region stack for concurrent marking.")               \
+                                                                            \
+  develop(bool, G1ConcRefine, true,                                         \
+          "If true, run concurrent rem set refinement for G1")              \
+                                                                            \
+  develop(intx, G1ConcRefineTargTraversals, 4,                              \
+          "Number of concurrent refinement we try to achieve")              \
+                                                                            \
+  develop(intx, G1ConcRefineInitialDelta, 4,                                \
+          "Number of heap regions of alloc ahead of starting collection "   \
+          "pause to start concurrent refinement (initially)")               \
+                                                                            \
+  product(bool, G1SmoothConcRefine, true,                                   \
+          "Attempts to smooth out the overhead of concurrent refinement")   \
+                                                                            \
+  develop(bool, G1ConcZeroFill, true,                                       \
+          "If true, run concurrent zero-filling thread")                    \
+                                                                            \
+  develop(intx, G1ConcZFMaxRegions, 1,                                      \
+          "Stop zero-filling when # of zf'd regions reaches")               \
+                                                                            \
+  product(intx, G1SteadyStateUsed, 90,                                      \
+          "If non-0, try to maintain 'used' at this pct (of max)")          \
+                                                                            \
+  product(intx, G1SteadyStateUsedDelta, 30,                                 \
+          "If G1SteadyStateUsed is non-0, then do pause this number of "    \
+          "of percentage points earlier if no marking is in progress.")     \
+                                                                            \
+  develop(bool, G1SATBBarrierPrintNullPreVals, false,                       \
+          "If true, count frac of ptr writes with null pre-vals.")          \
+                                                                            \
+  product(intx, G1SATBLogBufferSize, 1*K,                                   \
+          "Number of entries in an SATB log buffer.")                       \
+                                                                            \
+  product(intx, G1SATBProcessCompletedThreshold, 20,                        \
+          "Number of completed buffers that triggers log processing.")      \
+                                                                            \
+  develop(intx, G1ExtraRegionSurvRate, 33,                                  \
+          "If the young survival rate is S, and there's room left in "      \
+          "to-space, we will allow regions whose survival rate is up to "   \
+          "S + (1 - S)*X, where X is this parameter (as a fraction.)")      \
+                                                                            \
+  develop(intx, G1InitYoungSurvRatio, 50,                                   \
+          "Expected Survival Rate for newly allocated bytes")               \
+                                                                            \
+  develop(bool, G1SATBPrintStubs, false,                                    \
+          "If true, print generated stubs for the SATB barrier")            \
+                                                                            \
+  product(intx, G1ExpandByPctOfAvail, 20,                                   \
+          "When expanding, % of uncommitted space to claim.")               \
+                                                                            \
+  develop(bool, G1RSBarrierRegionFilter, true,                              \
+          "If true, generate region filtering code in RS barrier")          \
+                                                                            \
+  develop(bool, G1RSBarrierNullFilter, true,                                \
+          "If true, generate null-pointer filtering code in RS barrier")    \
+                                                                            \
+  develop(bool, G1PrintCTFilterStats, false,                                \
+          "If true, print stats on RS filtering effectiveness")             \
+                                                                            \
+  develop(bool, G1RSBarrierUseQueue, true,                                  \
+          "If true, use queueing RS barrier")                               \
+                                                                            \
+  develop(bool, G1RSLogCheckCardTable, false,                               \
+          "If true, verify that no dirty cards remain after RS log "        \
+          "processing.")                                                    \
+                                                                            \
+  product(intx, G1MinPausesBetweenMarks, 2,                                 \
+          "Number of inefficient pauses necessary to trigger marking.")     \
+                                                                            \
+  product(intx, G1InefficientPausePct, 80,                                  \
+          "Threshold of an 'inefficient' pauses (as % of cum efficiency.")  \
+                                                                            \
+  product(intx, G1RSPopLimit, 32768,                                        \
+          "Limit that defines popularity.  Should go away! XXX")            \
+                                                                            \
+  develop(bool, G1RSCountHisto, false,                                      \
+          "If true, print a histogram of RS occupancies after each pause")  \
+                                                                            \
+  product(intx, G1ObjPopLimit, 256,                                         \
+          "Limit that defines popularity for an object.")                   \
+                                                                            \
+  product(bool, G1TraceFileOverwrite, false,                                \
+          "Allow the trace file to be overwritten")                         \
+                                                                            \
+  develop(intx, G1PrintRegionLivenessInfo, 0,                               \
+          "When > 0, print the occupancies of the <n> best and worst"       \
+          "regions.")                                                       \
+                                                                            \
+  develop(bool, G1TracePopularity, false,                                   \
+          "When true, provide detailed tracing of popularity.")             \
+                                                                            \
+  product(bool, G1SummarizePopularity, false,                               \
+          "When true, provide end-of-run-summarization of popularity.")     \
+                                                                            \
+  product(intx, G1NumPopularRegions, 1,                                     \
+          "Number of regions reserved to hold popular objects.  "           \
+          "Should go away later.")                                          \
+                                                                            \
+  develop(bool, G1PrintParCleanupStats, false,                              \
+          "When true, print extra stats about parallel cleanup.")           \
+                                                                            \
+  product(bool, G1DoAgeCohortChecks, false,                                 \
+          "When true, check well-formedness of age cohort structures.")     \
+                                                                            \
+  develop(bool, G1DisablePreBarrier, false,                                 \
+          "Disable generation of pre-barrier (i.e., marking barrier)   ")   \
+                                                                            \
+  develop(bool, G1DisablePostBarrier, false,                                \
+          "Disable generation of post-barrier (i.e., RS barrier)   ")       \
+                                                                            \
+  product(intx, G1DirtyCardQueueMax, 30,                                    \
+          "Maximum number of completed RS buffers before mutator threads "  \
+          "start processing them.")                                         \
+                                                                            \
+  develop(intx, G1ConcRSLogCacheSize, 10,                                   \
+          "Log base 2 of the length of conc RS hot-card cache.")            \
+                                                                            \
+  product(bool, G1ConcRSCountTraversals, false,                             \
+          "If true, gather data about the number of times CR traverses "    \
+          "cards ")                                                         \
+                                                                            \
+  product(intx, G1ConcRSHotCardLimit, 4,                                    \
+          "The threshold that defines (>=) a hot card.")                    \
+                                                                            \
+  develop(bool, G1PrintOopAppls, false,                                     \
+          "When true, print applications of closures to external locs.")    \
+                                                                            \
+  product(intx, G1LogRSRegionEntries, 7,                                    \
+          "Log_2 of max number of regions for which we keep bitmaps.")      \
+                                                                            \
+  develop(bool, G1RecordHRRSOops, false,                                    \
+          "When true, record recent calls to rem set operations.")          \
+                                                                            \
+  develop(bool, G1RecordHRRSEvents, false,                                  \
+          "When true, record recent calls to rem set operations.")          \
+                                                                            \
+  develop(intx, G1MaxVerifyFailures, -1,                                    \
+          "The maximum number of verification failrues to print.  "         \
+          "-1 means print all.")                                            \
+                                                                            \
+  develop(bool, G1ScrubRemSets, true,                                       \
+          "When true, do RS scrubbing after cleanup.")                      \
+                                                                            \
+  develop(bool, G1RSScrubVerbose, false,                                    \
+          "When true, do RS scrubbing with verbose output.")                \
+                                                                            \
+  develop(bool, G1YoungSurvRateVerbose, false,                              \
+          "print out the survival rate of young regions according to age.") \
+                                                                            \
+  develop(intx, G1YoungSurvRateNumRegionsSummary, 0,                        \
+          "the number of regions for which we'll print a surv rate "        \
+          "summary.")                                                       \
+                                                                            \
+  product(bool, G1UseScanOnlyPrefix, false,                                 \
+          "It determines whether the system will calculate an optimum "     \
+          "scan-only set.")                                                 \
+                                                                            \
+  product(intx, G1MinReservePerc, 10,                                       \
+          "It determines the minimum reserve we should have in the heap "   \
+          "to minimize the probability of promotion failure.")              \
+                                                                            \
+  product(bool, G1TraceRegions, false,                                      \
+          "If set G1 will print information on which regions are being "    \
+          "allocated and which are reclaimed.")                             \
+                                                                            \
+  develop(bool, G1HRRSUseSparseTable, true,                                 \
+          "When true, use sparse table to save space.")                     \
+                                                                            \
+  develop(bool, G1HRRSFlushLogBuffersOnVerify, false,                       \
+          "Forces flushing of log buffers before verification.")            \
+                                                                            \
+  product(intx, G1MaxSurvivorRegions, 0,                                    \
+          "The maximum number of survivor regions")
+
+G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// The following OopClosure types get specialized versions of
+// "oop_oop_iterate" that invoke the closures' do_oop methods
+// non-virtually, using a mechanism defined in this file.  Extend these
+// macros in the obvious way to add specializations for new closures.
+
+// Forward declarations.
+enum G1Barrier {
+  G1BarrierNone, G1BarrierRS, G1BarrierEvac
+};
+
+template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
+class G1ParCopyClosure;
+class G1ParScanClosure;
+
+typedef G1ParCopyClosure<false, G1BarrierEvac, false> G1ParScanHeapEvacClosure;
+
+class FilterIntoCSClosure;
+class FilterOutOfRegionClosure;
+class FilterInHeapRegionAndIntoCSClosure;
+class FilterAndMarkInHeapRegionAndIntoCSClosure;
+class G1ScanAndBalanceClosure;
+
+#ifdef FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES
+#error "FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES already defined."
+#endif
+
+#define FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f) \
+      f(G1ParScanHeapEvacClosure,_nv)                   \
+      f(G1ParScanClosure,_nv)                           \
+      f(FilterIntoCSClosure,_nv)                        \
+      f(FilterOutOfRegionClosure,_nv)                   \
+      f(FilterInHeapRegionAndIntoCSClosure,_nv)         \
+      f(FilterAndMarkInHeapRegionAndIntoCSClosure,_nv)  \
+      f(G1ScanAndBalanceClosure,_nv)
+
+#ifdef FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES
+#error "FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES already defined."
+#endif
+
+#define FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(f)
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/heapRegion.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,874 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_heapRegion.cpp.incl"
+
+HeapRegionDCTOC::HeapRegionDCTOC(G1CollectedHeap* g1,
+                                 HeapRegion* hr, OopClosure* cl,
+                                 CardTableModRefBS::PrecisionStyle precision,
+                                 FilterKind fk) :
+  ContiguousSpaceDCTOC(hr, cl, precision, NULL),
+  _hr(hr), _fk(fk), _g1(g1)
+{}
+
+FilterOutOfRegionClosure::FilterOutOfRegionClosure(HeapRegion* r,
+                                                   OopClosure* oc) :
+  _r_bottom(r->bottom()), _r_end(r->end()),
+  _oc(oc), _out_of_region(0)
+{}
+
+class VerifyLiveClosure: public OopClosure {
+  G1CollectedHeap* _g1h;
+  CardTableModRefBS* _bs;
+  oop _containing_obj;
+  bool _failures;
+  int _n_failures;
+public:
+  VerifyLiveClosure(G1CollectedHeap* g1h) :
+    _g1h(g1h), _bs(NULL), _containing_obj(NULL),
+    _failures(false), _n_failures(0)
+  {
+    BarrierSet* bs = _g1h->barrier_set();
+    if (bs->is_a(BarrierSet::CardTableModRef))
+      _bs = (CardTableModRefBS*)bs;
+  }
+
+  void set_containing_obj(oop obj) {
+    _containing_obj = obj;
+  }
+
+  bool failures() { return _failures; }
+  int n_failures() { return _n_failures; }
+
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  void do_oop(oop* p) {
+    assert(_containing_obj != NULL, "Precondition");
+    assert(!_g1h->is_obj_dead(_containing_obj), "Precondition");
+    oop obj = *p;
+    if (obj != NULL) {
+      bool failed = false;
+      if (!_g1h->is_in_closed_subset(obj) || _g1h->is_obj_dead(obj)) {
+        if (!_failures) {
+          gclog_or_tty->print_cr("");
+          gclog_or_tty->print_cr("----------");
+        }
+        if (!_g1h->is_in_closed_subset(obj)) {
+          gclog_or_tty->print_cr("Field "PTR_FORMAT
+                        " of live obj "PTR_FORMAT
+                        " points to obj "PTR_FORMAT
+                        " not in the heap.",
+                        p, (void*) _containing_obj, (void*) obj);
+        } else {
+          gclog_or_tty->print_cr("Field "PTR_FORMAT
+                        " of live obj "PTR_FORMAT
+                        " points to dead obj "PTR_FORMAT".",
+                        p, (void*) _containing_obj, (void*) obj);
+        }
+        gclog_or_tty->print_cr("Live obj:");
+        _containing_obj->print_on(gclog_or_tty);
+        gclog_or_tty->print_cr("Bad referent:");
+        obj->print_on(gclog_or_tty);
+        gclog_or_tty->print_cr("----------");
+        _failures = true;
+        failed = true;
+        _n_failures++;
+      }
+
+      if (!_g1h->full_collection()) {
+        HeapRegion* from = _g1h->heap_region_containing(p);
+        HeapRegion* to   = _g1h->heap_region_containing(*p);
+        if (from != NULL && to != NULL &&
+            from != to &&
+            !to->popular() &&
+            !to->isHumongous()) {
+          jbyte cv_obj = *_bs->byte_for_const(_containing_obj);
+          jbyte cv_field = *_bs->byte_for_const(p);
+          const jbyte dirty = CardTableModRefBS::dirty_card_val();
+
+          bool is_bad = !(from->is_young()
+                          || to->rem_set()->contains_reference(p)
+                          || !G1HRRSFlushLogBuffersOnVerify && // buffers were not flushed
+                              (_containing_obj->is_objArray() ?
+                                  cv_field == dirty
+                               : cv_obj == dirty || cv_field == dirty));
+          if (is_bad) {
+            if (!_failures) {
+              gclog_or_tty->print_cr("");
+              gclog_or_tty->print_cr("----------");
+            }
+            gclog_or_tty->print_cr("Missing rem set entry:");
+            gclog_or_tty->print_cr("Field "PTR_FORMAT
+                          " of obj "PTR_FORMAT
+                          ", in region %d ["PTR_FORMAT
+                          ", "PTR_FORMAT"),",
+                          p, (void*) _containing_obj,
+                          from->hrs_index(),
+                          from->bottom(),
+                          from->end());
+            _containing_obj->print_on(gclog_or_tty);
+            gclog_or_tty->print_cr("points to obj "PTR_FORMAT
+                          " in region %d ["PTR_FORMAT
+                          ", "PTR_FORMAT").",
+                          (void*) obj, to->hrs_index(),
+                          to->bottom(), to->end());
+            obj->print_on(gclog_or_tty);
+            gclog_or_tty->print_cr("Obj head CTE = %d, field CTE = %d.",
+                          cv_obj, cv_field);
+            gclog_or_tty->print_cr("----------");
+            _failures = true;
+            if (!failed) _n_failures++;
+          }
+        }
+      }
+    }
+  }
+};
+
+template<class ClosureType>
+HeapWord* walk_mem_region_loop(ClosureType* cl, G1CollectedHeap* g1h,
+                               HeapRegion* hr,
+                               HeapWord* cur, HeapWord* top) {
+  oop cur_oop = oop(cur);
+  int oop_size = cur_oop->size();
+  HeapWord* next_obj = cur + oop_size;
+  while (next_obj < top) {
+    // Keep filtering the remembered set.
+    if (!g1h->is_obj_dead(cur_oop, hr)) {
+      // Bottom lies entirely below top, so we can call the
+      // non-memRegion version of oop_iterate below.
+#ifndef PRODUCT
+      if (G1VerifyMarkingInEvac) {
+        VerifyLiveClosure vl_cl(g1h);
+        cur_oop->oop_iterate(&vl_cl);
+      }
+#endif
+      cur_oop->oop_iterate(cl);
+    }
+    cur = next_obj;
+    cur_oop = oop(cur);
+    oop_size = cur_oop->size();
+    next_obj = cur + oop_size;
+  }
+  return cur;
+}
+
+void HeapRegionDCTOC::walk_mem_region_with_cl(MemRegion mr,
+                                              HeapWord* bottom,
+                                              HeapWord* top,
+                                              OopClosure* cl) {
+  G1CollectedHeap* g1h = _g1;
+
+  int oop_size;
+
+  OopClosure* cl2 = cl;
+  FilterIntoCSClosure intoCSFilt(this, g1h, cl);
+  FilterOutOfRegionClosure outOfRegionFilt(_hr, cl);
+  switch (_fk) {
+  case IntoCSFilterKind:      cl2 = &intoCSFilt; break;
+  case OutOfRegionFilterKind: cl2 = &outOfRegionFilt; break;
+  }
+
+  // Start filtering what we add to the remembered set. If the object is
+  // not considered dead, either because it is marked (in the mark bitmap)
+  // or it was allocated after marking finished, then we add it. Otherwise
+  // we can safely ignore the object.
+  if (!g1h->is_obj_dead(oop(bottom), _hr)) {
+#ifndef PRODUCT
+    if (G1VerifyMarkingInEvac) {
+      VerifyLiveClosure vl_cl(g1h);
+      oop(bottom)->oop_iterate(&vl_cl, mr);
+    }
+#endif
+    oop_size = oop(bottom)->oop_iterate(cl2, mr);
+  } else {
+    oop_size = oop(bottom)->size();
+  }
+
+  bottom += oop_size;
+
+  if (bottom < top) {
+    // We replicate the loop below for several kinds of possible filters.
+    switch (_fk) {
+    case NoFilterKind:
+      bottom = walk_mem_region_loop(cl, g1h, _hr, bottom, top);
+      break;
+    case IntoCSFilterKind: {
+      FilterIntoCSClosure filt(this, g1h, cl);
+      bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top);
+      break;
+    }
+    case OutOfRegionFilterKind: {
+      FilterOutOfRegionClosure filt(_hr, cl);
+      bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top);
+      break;
+    }
+    default:
+      ShouldNotReachHere();
+    }
+
+    // Last object. Need to do dead-obj filtering here too.
+    if (!g1h->is_obj_dead(oop(bottom), _hr)) {
+#ifndef PRODUCT
+      if (G1VerifyMarkingInEvac) {
+        VerifyLiveClosure vl_cl(g1h);
+        oop(bottom)->oop_iterate(&vl_cl, mr);
+      }
+#endif
+      oop(bottom)->oop_iterate(cl2, mr);
+    }
+  }
+}
+
+void HeapRegion::reset_after_compaction() {
+  G1OffsetTableContigSpace::reset_after_compaction();
+  // After a compaction the mark bitmap is invalid, so we must
+  // treat all objects as being inside the unmarked area.
+  zero_marked_bytes();
+  init_top_at_mark_start();
+}
+
+
+
+DirtyCardToOopClosure*
+HeapRegion::new_dcto_closure(OopClosure* cl,
+                             CardTableModRefBS::PrecisionStyle precision,
+                             HeapRegionDCTOC::FilterKind fk) {
+  return new HeapRegionDCTOC(G1CollectedHeap::heap(),
+                             this, cl, precision, fk);
+}
+
+void HeapRegion::hr_clear(bool par, bool clear_space) {
+  _humongous = false;
+  _humongous_start = false;
+  _humongous_start_region = NULL;
+  _in_collection_set = false;
+  _is_gc_alloc_region = false;
+
+  // Age stuff (if parallel, this will be done separately, since it needs
+  // to be sequential).
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  set_young_index_in_cset(-1);
+  uninstall_surv_rate_group();
+  set_young_type(NotYoung);
+
+  // In case it had been the start of a humongous sequence, reset its end.
+  set_end(_orig_end);
+
+  if (!par) {
+    // If this is parallel, this will be done later.
+    HeapRegionRemSet* hrrs = rem_set();
+    if (hrrs != NULL) hrrs->clear();
+    _claimed = 0;
+  }
+  zero_marked_bytes();
+  set_sort_index(-1);
+  if ((uintptr_t)bottom() >= (uintptr_t)g1h->popular_object_boundary())
+    set_popular(false);
+
+  _offsets.resize(HeapRegion::GrainWords);
+  init_top_at_mark_start();
+  if (clear_space) clear();
+}
+
+// <PREDICTION>
+void HeapRegion::calc_gc_efficiency() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  _gc_efficiency = (double) garbage_bytes() /
+                            g1h->predict_region_elapsed_time_ms(this, false);
+}
+// </PREDICTION>
+
+void HeapRegion::set_startsHumongous() {
+  _humongous_start = true; _humongous = true;
+  _humongous_start_region = this;
+  assert(end() == _orig_end, "Should be normal before alloc.");
+}
+
+bool HeapRegion::claimHeapRegion(jint claimValue) {
+  jint current = _claimed;
+  if (current != claimValue) {
+    jint res = Atomic::cmpxchg(claimValue, &_claimed, current);
+    if (res == current) {
+      return true;
+    }
+  }
+  return false;
+}
+
+HeapWord* HeapRegion::next_block_start_careful(HeapWord* addr) {
+  HeapWord* low = addr;
+  HeapWord* high = end();
+  while (low < high) {
+    size_t diff = pointer_delta(high, low);
+    // Must add one below to bias toward the high amount.  Otherwise, if
+  // "high" were at the desired value, and "low" were one less, we
+    // would not converge on "high".  This is not symmetric, because
+    // we set "high" to a block start, which might be the right one,
+    // which we don't do for "low".
+    HeapWord* middle = low + (diff+1)/2;
+    if (middle == high) return high;
+    HeapWord* mid_bs = block_start_careful(middle);
+    if (mid_bs < addr) {
+      low = middle;
+    } else {
+      high = mid_bs;
+    }
+  }
+  assert(low == high && low >= addr, "Didn't work.");
+  return low;
+}
+
+void HeapRegion::set_next_on_unclean_list(HeapRegion* r) {
+  assert(r == NULL || r->is_on_unclean_list(), "Malformed unclean list.");
+  _next_in_special_set = r;
+}
+
+void HeapRegion::set_on_unclean_list(bool b) {
+  _is_on_unclean_list = b;
+}
+
+void HeapRegion::initialize(MemRegion mr, bool clear_space) {
+  G1OffsetTableContigSpace::initialize(mr, false);
+  hr_clear(false/*par*/, clear_space);
+}
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+
+HeapRegion::
+HeapRegion(G1BlockOffsetSharedArray* sharedOffsetArray,
+                     MemRegion mr, bool is_zeroed)
+  : G1OffsetTableContigSpace(sharedOffsetArray, mr, is_zeroed),
+    _next_fk(HeapRegionDCTOC::NoFilterKind),
+    _hrs_index(-1),
+    _humongous(false), _humongous_start(false), _humongous_start_region(NULL),
+    _in_collection_set(false), _is_gc_alloc_region(false),
+    _is_on_free_list(false), _is_on_unclean_list(false),
+    _next_in_special_set(NULL), _orig_end(NULL),
+    _claimed(0), _evacuation_failed(false),
+    _prev_marked_bytes(0), _next_marked_bytes(0), _sort_index(-1),
+    _popularity(NotPopular),
+    _young_type(NotYoung), _next_young_region(NULL),
+    _young_index_in_cset(-1), _surv_rate_group(NULL), _age_index(-1),
+    _rem_set(NULL), _zfs(NotZeroFilled)
+{
+  _orig_end = mr.end();
+  // Note that initialize() will set the start of the unmarked area of the
+  // region.
+  this->initialize(mr, !is_zeroed);
+
+  _rem_set =  new HeapRegionRemSet(sharedOffsetArray, this);
+
+  assert(HeapRegionRemSet::num_par_rem_sets() > 0, "Invariant.");
+  // In case the region is allocated during a pause, note the top.
+  // We haven't done any counting on a brand new region.
+  _top_at_conc_mark_count = bottom();
+}
+
+class NextCompactionHeapRegionClosure: public HeapRegionClosure {
+  const HeapRegion* _target;
+  bool _target_seen;
+  HeapRegion* _last;
+  CompactibleSpace* _res;
+public:
+  NextCompactionHeapRegionClosure(const HeapRegion* target) :
+    _target(target), _target_seen(false), _res(NULL) {}
+  bool doHeapRegion(HeapRegion* cur) {
+    if (_target_seen) {
+      if (!cur->isHumongous()) {
+        _res = cur;
+        return true;
+      }
+    } else if (cur == _target) {
+      _target_seen = true;
+    }
+    return false;
+  }
+  CompactibleSpace* result() { return _res; }
+};
+
+CompactibleSpace* HeapRegion::next_compaction_space() const {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  // cast away const-ness
+  HeapRegion* r = (HeapRegion*) this;
+  NextCompactionHeapRegionClosure blk(r);
+  g1h->heap_region_iterate_from(r, &blk);
+  return blk.result();
+}
+
+void HeapRegion::set_continuesHumongous(HeapRegion* start) {
+  // The order is important here.
+  start->add_continuingHumongousRegion(this);
+  _humongous = true; _humongous_start = false;
+  _humongous_start_region = start;
+}
+
+void HeapRegion::add_continuingHumongousRegion(HeapRegion* cont) {
+  // Must join the blocks of the current H region seq with the block of the
+  // added region.
+  offsets()->join_blocks(bottom(), cont->bottom());
+  arrayOop obj = (arrayOop)(bottom());
+  obj->set_length((int) (obj->length() + cont->capacity()/jintSize));
+  set_end(cont->end());
+  set_top(cont->end());
+}
+
+void HeapRegion::save_marks() {
+  set_saved_mark();
+}
+
+void HeapRegion::oops_in_mr_iterate(MemRegion mr, OopClosure* cl) {
+  HeapWord* p = mr.start();
+  HeapWord* e = mr.end();
+  oop obj;
+  while (p < e) {
+    obj = oop(p);
+    p += obj->oop_iterate(cl);
+  }
+  assert(p == e, "bad memregion: doesn't end on obj boundary");
+}
+
+#define HeapRegion_OOP_SINCE_SAVE_MARKS_DEFN(OopClosureType, nv_suffix) \
+void HeapRegion::oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl) { \
+  ContiguousSpace::oop_since_save_marks_iterate##nv_suffix(cl);              \
+}
+SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(HeapRegion_OOP_SINCE_SAVE_MARKS_DEFN)
+
+
+void HeapRegion::oop_before_save_marks_iterate(OopClosure* cl) {
+  oops_in_mr_iterate(MemRegion(bottom(), saved_mark_word()), cl);
+}
+
+#ifdef DEBUG
+HeapWord* HeapRegion::allocate(size_t size) {
+  jint state = zero_fill_state();
+  assert(!G1CollectedHeap::heap()->allocs_are_zero_filled() ||
+         zero_fill_is_allocated(),
+         "When ZF is on, only alloc in ZF'd regions");
+  return G1OffsetTableContigSpace::allocate(size);
+}
+#endif
+
+void HeapRegion::set_zero_fill_state_work(ZeroFillState zfs) {
+  assert(top() == bottom() || zfs == Allocated,
+         "Region must be empty, or we must be setting it to allocated.");
+  assert(ZF_mon->owned_by_self() ||
+         Universe::heap()->is_gc_active(),
+         "Must hold the lock or be a full GC to modify.");
+  _zfs = zfs;
+}
+
+void HeapRegion::set_zero_fill_complete() {
+  set_zero_fill_state_work(ZeroFilled);
+  if (ZF_mon->owned_by_self()) {
+    ZF_mon->notify_all();
+  }
+}
+
+
+void HeapRegion::ensure_zero_filled() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  ensure_zero_filled_locked();
+}
+
+void HeapRegion::ensure_zero_filled_locked() {
+  assert(ZF_mon->owned_by_self(), "Precondition");
+  bool should_ignore_zf = SafepointSynchronize::is_at_safepoint();
+  assert(should_ignore_zf || Heap_lock->is_locked(),
+         "Either we're in a GC or we're allocating a region.");
+  switch (zero_fill_state()) {
+  case HeapRegion::NotZeroFilled:
+    set_zero_fill_in_progress(Thread::current());
+    {
+      ZF_mon->unlock();
+      Copy::fill_to_words(bottom(), capacity()/HeapWordSize);
+      ZF_mon->lock_without_safepoint_check();
+    }
+    // A trap.
+    guarantee(zero_fill_state() == HeapRegion::ZeroFilling
+              && zero_filler() == Thread::current(),
+              "AHA!  Tell Dave D if you see this...");
+    set_zero_fill_complete();
+    // gclog_or_tty->print_cr("Did sync ZF.");
+    ConcurrentZFThread::note_sync_zfs();
+    break;
+  case HeapRegion::ZeroFilling:
+    if (should_ignore_zf) {
+      // We can "break" the lock and take over the work.
+      Copy::fill_to_words(bottom(), capacity()/HeapWordSize);
+      set_zero_fill_complete();
+      ConcurrentZFThread::note_sync_zfs();
+      break;
+    } else {
+      ConcurrentZFThread::wait_for_ZF_completed(this);
+    }
+  case HeapRegion::ZeroFilled:
+    // Nothing to do.
+    break;
+  case HeapRegion::Allocated:
+    guarantee(false, "Should not call on allocated regions.");
+  }
+  assert(zero_fill_state() == HeapRegion::ZeroFilled, "Post");
+}
+
+HeapWord*
+HeapRegion::object_iterate_mem_careful(MemRegion mr,
+                                                 ObjectClosure* cl) {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  // We used to use "block_start_careful" here.  But we're actually happy
+  // to update the BOT while we do this...
+  HeapWord* cur = block_start(mr.start());
+  mr = mr.intersection(used_region());
+  if (mr.is_empty()) return NULL;
+  // Otherwise, find the obj that extends onto mr.start().
+
+  assert(cur <= mr.start()
+         && (oop(cur)->klass() == NULL ||
+             cur + oop(cur)->size() > mr.start()),
+         "postcondition of block_start");
+  oop obj;
+  while (cur < mr.end()) {
+    obj = oop(cur);
+    if (obj->klass() == NULL) {
+      // Ran into an unparseable point.
+      return cur;
+    } else if (!g1h->is_obj_dead(obj)) {
+      cl->do_object(obj);
+    }
+    if (cl->abort()) return cur;
+    // The check above must occur before the operation below, since an
+    // abort might invalidate the "size" operation.
+    cur += obj->size();
+  }
+  return NULL;
+}
+
+HeapWord*
+HeapRegion::
+oops_on_card_seq_iterate_careful(MemRegion mr,
+                                     FilterOutOfRegionClosure* cl) {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  // If we're within a stop-world GC, then we might look at a card in a
+  // GC alloc region that extends onto a GC LAB, which may not be
+  // parseable.  Stop such at the "saved_mark" of the region.
+  if (G1CollectedHeap::heap()->is_gc_active()) {
+    mr = mr.intersection(used_region_at_save_marks());
+  } else {
+    mr = mr.intersection(used_region());
+  }
+  if (mr.is_empty()) return NULL;
+  // Otherwise, find the obj that extends onto mr.start().
+
+  // We used to use "block_start_careful" here.  But we're actually happy
+  // to update the BOT while we do this...
+  HeapWord* cur = block_start(mr.start());
+  assert(cur <= mr.start(), "Postcondition");
+
+  while (cur <= mr.start()) {
+    if (oop(cur)->klass() == NULL) {
+      // Ran into an unparseable point.
+      return cur;
+    }
+    // Otherwise...
+    int sz = oop(cur)->size();
+    if (cur + sz > mr.start()) break;
+    // Otherwise, go on.
+    cur = cur + sz;
+  }
+  oop obj;
+  obj = oop(cur);
+  // If we finish this loop...
+  assert(cur <= mr.start()
+         && obj->klass() != NULL
+         && cur + obj->size() > mr.start(),
+         "Loop postcondition");
+  if (!g1h->is_obj_dead(obj)) {
+    obj->oop_iterate(cl, mr);
+  }
+
+  HeapWord* next;
+  while (cur < mr.end()) {
+    obj = oop(cur);
+    if (obj->klass() == NULL) {
+      // Ran into an unparseable point.
+      return cur;
+    };
+    // Otherwise:
+    next = (cur + obj->size());
+    if (!g1h->is_obj_dead(obj)) {
+      if (next < mr.end()) {
+        obj->oop_iterate(cl);
+      } else {
+        // this obj spans the boundary.  If it's an array, stop at the
+        // boundary.
+        if (obj->is_objArray()) {
+          obj->oop_iterate(cl, mr);
+        } else {
+          obj->oop_iterate(cl);
+        }
+      }
+    }
+    cur = next;
+  }
+  return NULL;
+}
+
+void HeapRegion::print() const { print_on(gclog_or_tty); }
+void HeapRegion::print_on(outputStream* st) const {
+  if (isHumongous()) {
+    if (startsHumongous())
+      st->print(" HS");
+    else
+      st->print(" HC");
+  } else {
+    st->print("   ");
+  }
+  if (in_collection_set())
+    st->print(" CS");
+  else if (is_gc_alloc_region())
+    st->print(" A ");
+  else
+    st->print("   ");
+  if (is_young())
+    st->print(is_scan_only() ? " SO" : (is_survivor() ? " SU" : " Y "));
+  else
+    st->print("   ");
+  if (is_empty())
+    st->print(" F");
+  else
+    st->print("  ");
+  st->print(" %d", _gc_time_stamp);
+  G1OffsetTableContigSpace::print_on(st);
+}
+
+#define OBJ_SAMPLE_INTERVAL 0
+#define BLOCK_SAMPLE_INTERVAL 100
+
+// This really ought to be commoned up into OffsetTableContigSpace somehow.
+// We would need a mechanism to make that code skip dead objects.
+
+void HeapRegion::verify(bool allow_dirty) const {
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  HeapWord* p = bottom();
+  HeapWord* prev_p = NULL;
+  int objs = 0;
+  int blocks = 0;
+  VerifyLiveClosure vl_cl(g1);
+  while (p < top()) {
+    size_t size = oop(p)->size();
+    if (blocks == BLOCK_SAMPLE_INTERVAL) {
+      guarantee(p == block_start_const(p + (size/2)),
+                "check offset computation");
+      blocks = 0;
+    } else {
+      blocks++;
+    }
+    if (objs == OBJ_SAMPLE_INTERVAL) {
+      oop obj = oop(p);
+      if (!g1->is_obj_dead(obj, this)) {
+        obj->verify();
+        vl_cl.set_containing_obj(obj);
+        obj->oop_iterate(&vl_cl);
+        if (G1MaxVerifyFailures >= 0
+            && vl_cl.n_failures() >= G1MaxVerifyFailures) break;
+      }
+      objs = 0;
+    } else {
+      objs++;
+    }
+    prev_p = p;
+    p += size;
+  }
+  HeapWord* rend = end();
+  HeapWord* rtop = top();
+  if (rtop < rend) {
+    guarantee(block_start_const(rtop + (rend - rtop) / 2) == rtop,
+              "check offset computation");
+  }
+  if (vl_cl.failures()) {
+    gclog_or_tty->print_cr("Heap:");
+    G1CollectedHeap::heap()->print();
+    gclog_or_tty->print_cr("");
+  }
+  if (G1VerifyConcMark &&
+      G1VerifyConcMarkPrintReachable &&
+      vl_cl.failures()) {
+    g1->concurrent_mark()->print_prev_bitmap_reachable();
+  }
+  guarantee(!vl_cl.failures(), "should not have had any failures");
+  guarantee(p == top(), "end of last object must match end of space");
+}
+
+// G1OffsetTableContigSpace code; copied from space.cpp.  Hope this can go
+// away eventually.
+
+void G1OffsetTableContigSpace::initialize(MemRegion mr, bool clear_space) {
+  // false ==> we'll do the clearing if there's clearing to be done.
+  ContiguousSpace::initialize(mr, false);
+  _offsets.zero_bottom_entry();
+  _offsets.initialize_threshold();
+  if (clear_space) clear();
+}
+
+void G1OffsetTableContigSpace::clear() {
+  ContiguousSpace::clear();
+  _offsets.zero_bottom_entry();
+  _offsets.initialize_threshold();
+}
+
+void G1OffsetTableContigSpace::set_bottom(HeapWord* new_bottom) {
+  Space::set_bottom(new_bottom);
+  _offsets.set_bottom(new_bottom);
+}
+
+void G1OffsetTableContigSpace::set_end(HeapWord* new_end) {
+  Space::set_end(new_end);
+  _offsets.resize(new_end - bottom());
+}
+
+void G1OffsetTableContigSpace::print() const {
+  print_short();
+  gclog_or_tty->print_cr(" [" INTPTR_FORMAT ", " INTPTR_FORMAT ", "
+                INTPTR_FORMAT ", " INTPTR_FORMAT ")",
+                bottom(), top(), _offsets.threshold(), end());
+}
+
+HeapWord* G1OffsetTableContigSpace::initialize_threshold() {
+  return _offsets.initialize_threshold();
+}
+
+HeapWord* G1OffsetTableContigSpace::cross_threshold(HeapWord* start,
+                                                    HeapWord* end) {
+  _offsets.alloc_block(start, end);
+  return _offsets.threshold();
+}
+
+HeapWord* G1OffsetTableContigSpace::saved_mark_word() const {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  assert( _gc_time_stamp <= g1h->get_gc_time_stamp(), "invariant" );
+  if (_gc_time_stamp < g1h->get_gc_time_stamp())
+    return top();
+  else
+    return ContiguousSpace::saved_mark_word();
+}
+
+void G1OffsetTableContigSpace::set_saved_mark() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  unsigned curr_gc_time_stamp = g1h->get_gc_time_stamp();
+
+  if (_gc_time_stamp < curr_gc_time_stamp) {
+    // The order of these is important, as another thread might be
+    // about to start scanning this region. If it does so after
+    // set_saved_mark and before _gc_time_stamp = ..., then the latter
+    // will be false, and it will pick up top() as the high water mark
+    // of region. If it does so after _gc_time_stamp = ..., then it
+    // will pick up the right saved_mark_word() as the high water mark
+    // of the region. Either way, the behaviour will be correct.
+    ContiguousSpace::set_saved_mark();
+    OrderAccess::release_store_ptr((volatile intptr_t*) &_gc_time_stamp,
+                                   (intptr_t) curr_gc_time_stamp);
+  }
+}
+
+G1OffsetTableContigSpace::
+G1OffsetTableContigSpace(G1BlockOffsetSharedArray* sharedOffsetArray,
+                         MemRegion mr, bool is_zeroed) :
+  _offsets(sharedOffsetArray, mr),
+  _par_alloc_lock(Mutex::leaf, "OffsetTableContigSpace par alloc lock", true),
+  _gc_time_stamp(0)
+{
+  _offsets.set_space(this);
+  initialize(mr, !is_zeroed);
+}
+
+size_t RegionList::length() {
+  size_t len = 0;
+  HeapRegion* cur = hd();
+  DEBUG_ONLY(HeapRegion* last = NULL);
+  while (cur != NULL) {
+    len++;
+    DEBUG_ONLY(last = cur);
+    cur = get_next(cur);
+  }
+  assert(last == tl(), "Invariant");
+  return len;
+}
+
+void RegionList::insert_before_head(HeapRegion* r) {
+  assert(well_formed(), "Inv");
+  set_next(r, hd());
+  _hd = r;
+  _sz++;
+  if (tl() == NULL) _tl = r;
+  assert(well_formed(), "Inv");
+}
+
+void RegionList::prepend_list(RegionList* new_list) {
+  assert(well_formed(), "Precondition");
+  assert(new_list->well_formed(), "Precondition");
+  HeapRegion* new_tl = new_list->tl();
+  if (new_tl != NULL) {
+    set_next(new_tl, hd());
+    _hd = new_list->hd();
+    _sz += new_list->sz();
+    if (tl() == NULL) _tl = new_list->tl();
+  } else {
+    assert(new_list->hd() == NULL && new_list->sz() == 0, "Inv");
+  }
+  assert(well_formed(), "Inv");
+}
+
+void RegionList::delete_after(HeapRegion* r) {
+  assert(well_formed(), "Precondition");
+  HeapRegion* next = get_next(r);
+  assert(r != NULL, "Precondition");
+  HeapRegion* next_tl = get_next(next);
+  set_next(r, next_tl);
+  dec_sz();
+  if (next == tl()) {
+    assert(next_tl == NULL, "Inv");
+    _tl = r;
+  }
+  assert(well_formed(), "Inv");
+}
+
+HeapRegion* RegionList::pop() {
+  assert(well_formed(), "Inv");
+  HeapRegion* res = hd();
+  if (res != NULL) {
+    _hd = get_next(res);
+    _sz--;
+    set_next(res, NULL);
+    if (sz() == 0) _tl = NULL;
+  }
+  assert(well_formed(), "Inv");
+  return res;
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/heapRegion.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,924 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#ifndef SERIALGC
+
+// A HeapRegion is the smallest piece of a G1CollectedHeap that
+// can be collected independently.
+
+// NOTE: Although a HeapRegion is a Space, its
+// Space::initDirtyCardClosure method must not be called.
+// The problem is that the existence of this method breaks
+// the independence of barrier sets from remembered sets.
+// The solution is to remove this method from the definition
+// of a Space.
+
+class CompactibleSpace;
+class ContiguousSpace;
+class HeapRegionRemSet;
+class HeapRegionRemSetIterator;
+class HeapRegion;
+
+// A dirty card to oop closure for heap regions. It
+// knows how to get the G1 heap and how to use the bitmap
+// in the concurrent marker used by G1 to filter remembered
+// sets.
+
+class HeapRegionDCTOC : public ContiguousSpaceDCTOC {
+public:
+  // Specification of possible DirtyCardToOopClosure filtering.
+  enum FilterKind {
+    NoFilterKind,
+    IntoCSFilterKind,
+    OutOfRegionFilterKind
+  };
+
+protected:
+  HeapRegion* _hr;
+  FilterKind _fk;
+  G1CollectedHeap* _g1;
+
+  void walk_mem_region_with_cl(MemRegion mr,
+                               HeapWord* bottom, HeapWord* top,
+                               OopClosure* cl);
+
+  // We don't specialize this for FilteringClosure; filtering is handled by
+  // the "FilterKind" mechanism.  But we provide this to avoid a compiler
+  // warning.
+  void walk_mem_region_with_cl(MemRegion mr,
+                               HeapWord* bottom, HeapWord* top,
+                               FilteringClosure* cl) {
+    HeapRegionDCTOC::walk_mem_region_with_cl(mr, bottom, top,
+                                                       (OopClosure*)cl);
+  }
+
+  // Get the actual top of the area on which the closure will
+  // operate, given where the top is assumed to be (the end of the
+  // memory region passed to do_MemRegion) and where the object
+  // at the top is assumed to start. For example, an object may
+  // start at the top but actually extend past the assumed top,
+  // in which case the top becomes the end of the object.
+  HeapWord* get_actual_top(HeapWord* top, HeapWord* top_obj) {
+    return ContiguousSpaceDCTOC::get_actual_top(top, top_obj);
+  }
+
+  // Walk the given memory region from bottom to (actual) top
+  // looking for objects and applying the oop closure (_cl) to
+  // them. The base implementation of this treats the area as
+  // blocks, where a block may or may not be an object. Sub-
+  // classes should override this to provide more accurate
+  // or possibly more efficient walking.
+  void walk_mem_region(MemRegion mr, HeapWord* bottom, HeapWord* top) {
+    Filtering_DCTOC::walk_mem_region(mr, bottom, top);
+  }
+
+public:
+  HeapRegionDCTOC(G1CollectedHeap* g1,
+                  HeapRegion* hr, OopClosure* cl,
+                  CardTableModRefBS::PrecisionStyle precision,
+                  FilterKind fk);
+};
+
+
+// The complicating factor is that BlockOffsetTable diverged
+// significantly, and we need functionality that is only in the G1 version.
+// So I copied that code, which led to an alternate G1 version of
+// OffsetTableContigSpace.  If the two versions of BlockOffsetTable could
+// be reconciled, then G1OffsetTableContigSpace could go away.
+
+// The idea behind time stamps is the following. Doing a save_marks on
+// all regions at every GC pause is time consuming (if I remember
+// well, 10ms or so). So, we would like to do that only for regions
+// that are GC alloc regions. To achieve this, we use time
+// stamps. For every evacuation pause, G1CollectedHeap generates a
+// unique time stamp (essentially a counter that gets
+// incremented). Every time we want to call save_marks on a region,
+// we set the saved_mark_word to top and also copy the current GC
+// time stamp to the time stamp field of the space. Reading the
+// saved_mark_word involves checking the time stamp of the
+// region. If it is the same as the current GC time stamp, then we
+// can safely read the saved_mark_word field, as it is valid. If the
+// time stamp of the region is not the same as the current GC time
+// stamp, then we instead read top, as the saved_mark_word field is
+// invalid. Time stamps (on the regions and also on the
+// G1CollectedHeap) are reset at every cleanup (we iterate over
+// the regions anyway) and at the end of a Full GC. The current scheme
+// that uses sequential unsigned ints will fail only if we have 4b
+// evacuation pauses between two cleanups, which is _highly_ unlikely.
+
+class G1OffsetTableContigSpace: public ContiguousSpace {
+  friend class VMStructs;
+ protected:
+  G1BlockOffsetArrayContigSpace _offsets;
+  Mutex _par_alloc_lock;
+  volatile unsigned _gc_time_stamp;
+
+ public:
+  // Constructor.  If "is_zeroed" is true, the MemRegion "mr" may be
+  // assumed to contain zeros.
+  G1OffsetTableContigSpace(G1BlockOffsetSharedArray* sharedOffsetArray,
+                           MemRegion mr, bool is_zeroed = false);
+
+  void set_bottom(HeapWord* value);
+  void set_end(HeapWord* value);
+
+  virtual HeapWord* saved_mark_word() const;
+  virtual void set_saved_mark();
+  void reset_gc_time_stamp() { _gc_time_stamp = 0; }
+
+  virtual void initialize(MemRegion mr, bool clear_space);
+  virtual void clear();
+
+  HeapWord* block_start(const void* p);
+  HeapWord* block_start_const(const void* p) const;
+
+  // Add offset table update.
+  virtual HeapWord* allocate(size_t word_size);
+  HeapWord* par_allocate(size_t word_size);
+
+  // MarkSweep support phase3
+  virtual HeapWord* initialize_threshold();
+  virtual HeapWord* cross_threshold(HeapWord* start, HeapWord* end);
+
+  virtual void print() const;
+};
+
+class HeapRegion: public G1OffsetTableContigSpace {
+  friend class VMStructs;
+ private:
+
+  // The next filter kind that should be used for a "new_dcto_cl" call with
+  // the "traditional" signature.
+  HeapRegionDCTOC::FilterKind _next_fk;
+
+  // Requires that the region "mr" be dense with objects, and begin and end
+  // with an object.
+  void oops_in_mr_iterate(MemRegion mr, OopClosure* cl);
+
+  // The remembered set for this region.
+  // (Might want to make this "inline" later, to avoid some alloc failure
+  // issues.)
+  HeapRegionRemSet* _rem_set;
+
+  G1BlockOffsetArrayContigSpace* offsets() { return &_offsets; }
+
+ protected:
+  // If this region is a member of a HeapRegionSeq, the index in that
+  // sequence, otherwise -1.
+  int  _hrs_index;
+
+  bool _humongous;         // starts or continues a humongous object
+  bool _humongous_start;   // starts a humongous object
+  // For a humongous region, region in which it starts.
+  HeapRegion* _humongous_start_region;
+  // For the start region of a humongous sequence, it's original end().
+  HeapWord* _orig_end;
+
+  // True iff the region is in current collection_set.
+  bool _in_collection_set;
+
+    // True iff the region is on the unclean list, waiting to be zero filled.
+  bool _is_on_unclean_list;
+
+  // True iff the region is on the free list, ready for allocation.
+  bool _is_on_free_list;
+
+  // Is this or has it been an allocation region in the current collection
+  // pause.
+  bool _is_gc_alloc_region;
+
+  // True iff an attempt to evacuate an object in the region failed.
+  bool _evacuation_failed;
+
+  // A heap region may be a member one of a number of special subsets, each
+  // represented as linked lists through the field below.  Currently, these
+  // sets include:
+  //   The collection set.
+  //   The set of allocation regions used in a collection pause.
+  //   Spaces that may contain gray objects.
+  HeapRegion* _next_in_special_set;
+
+  // next region in the young "generation" region set
+  HeapRegion* _next_young_region;
+
+  // For parallel heapRegion traversal.
+  jint _claimed;
+
+  // We use concurrent marking to determine the amount of live data
+  // in each heap region.
+  size_t _prev_marked_bytes;    // Bytes known to be live via last completed marking.
+  size_t _next_marked_bytes;    // Bytes known to be live via in-progress marking.
+
+  // See "sort_index" method.  -1 means is not in the array.
+  int _sort_index;
+
+  // Means it has (or at least had) a very large RS, and should not be
+  // considered for membership in a collection set.
+  enum PopularityState {
+    NotPopular,
+    PopularPending,
+    Popular
+  };
+  PopularityState _popularity;
+
+  // <PREDICTION>
+  double _gc_efficiency;
+  // </PREDICTION>
+
+  enum YoungType {
+    NotYoung,                   // a region is not young
+    ScanOnly,                   // a region is young and scan-only
+    Young,                      // a region is young
+    Survivor                    // a region is young and it contains
+                                // survivor
+  };
+
+  YoungType _young_type;
+  int  _young_index_in_cset;
+  SurvRateGroup* _surv_rate_group;
+  int  _age_index;
+
+  // The start of the unmarked area. The unmarked area extends from this
+  // word until the top and/or end of the region, and is the part
+  // of the region for which no marking was done, i.e. objects may
+  // have been allocated in this part since the last mark phase.
+  // "prev" is the top at the start of the last completed marking.
+  // "next" is the top at the start of the in-progress marking (if any.)
+  HeapWord* _prev_top_at_mark_start;
+  HeapWord* _next_top_at_mark_start;
+  // If a collection pause is in progress, this is the top at the start
+  // of that pause.
+
+  // We've counted the marked bytes of objects below here.
+  HeapWord* _top_at_conc_mark_count;
+
+  void init_top_at_mark_start() {
+    assert(_prev_marked_bytes == 0 &&
+           _next_marked_bytes == 0,
+           "Must be called after zero_marked_bytes.");
+    HeapWord* bot = bottom();
+    _prev_top_at_mark_start = bot;
+    _next_top_at_mark_start = bot;
+    _top_at_conc_mark_count = bot;
+  }
+
+  jint _zfs;  // A member of ZeroFillState.  Protected by ZF_lock.
+  Thread* _zero_filler; // If _zfs is ZeroFilling, the thread that (last)
+                        // made it so.
+
+  void set_young_type(YoungType new_type) {
+    //assert(_young_type != new_type, "setting the same type" );
+    // TODO: add more assertions here
+    _young_type = new_type;
+  }
+
+ public:
+  // If "is_zeroed" is "true", the region "mr" can be assumed to contain zeros.
+  HeapRegion(G1BlockOffsetSharedArray* sharedOffsetArray,
+             MemRegion mr, bool is_zeroed);
+
+  enum SomePublicConstants {
+    // HeapRegions are GrainBytes-aligned
+    // and have sizes that are multiples of GrainBytes.
+    LogOfHRGrainBytes = 20,
+    LogOfHRGrainWords = LogOfHRGrainBytes - LogHeapWordSize,
+    GrainBytes = 1 << LogOfHRGrainBytes,
+    GrainWords = 1 <<LogOfHRGrainWords,
+    MaxAge = 2, NoOfAges = MaxAge+1
+  };
+
+  // Concurrent refinement requires contiguous heap regions (in which TLABs
+  // might be allocated) to be zero-filled.  Each region therefore has a
+  // zero-fill-state.
+  enum ZeroFillState {
+    NotZeroFilled,
+    ZeroFilling,
+    ZeroFilled,
+    Allocated
+  };
+
+  // If this region is a member of a HeapRegionSeq, the index in that
+  // sequence, otherwise -1.
+  int hrs_index() const { return _hrs_index; }
+  void set_hrs_index(int index) { _hrs_index = index; }
+
+  // The number of bytes marked live in the region in the last marking phase.
+  size_t marked_bytes()    { return _prev_marked_bytes; }
+  // The number of bytes counted in the next marking.
+  size_t next_marked_bytes() { return _next_marked_bytes; }
+  // The number of bytes live wrt the next marking.
+  size_t next_live_bytes() {
+    return (top() - next_top_at_mark_start())
+      * HeapWordSize
+      + next_marked_bytes();
+  }
+
+  // A lower bound on the amount of garbage bytes in the region.
+  size_t garbage_bytes() {
+    size_t used_at_mark_start_bytes =
+      (prev_top_at_mark_start() - bottom()) * HeapWordSize;
+    assert(used_at_mark_start_bytes >= marked_bytes(),
+           "Can't mark more than we have.");
+    return used_at_mark_start_bytes - marked_bytes();
+  }
+
+  // An upper bound on the number of live bytes in the region.
+  size_t max_live_bytes() { return used() - garbage_bytes(); }
+
+  void add_to_marked_bytes(size_t incr_bytes) {
+    _next_marked_bytes = _next_marked_bytes + incr_bytes;
+    guarantee( _next_marked_bytes <= used(), "invariant" );
+  }
+
+  void zero_marked_bytes()      {
+    _prev_marked_bytes = _next_marked_bytes = 0;
+  }
+
+  bool isHumongous() const { return _humongous; }
+  bool startsHumongous() const { return _humongous_start; }
+  bool continuesHumongous() const { return _humongous && ! _humongous_start; }
+  // For a humongous region, region in which it starts.
+  HeapRegion* humongous_start_region() const {
+    return _humongous_start_region;
+  }
+
+  // Causes the current region to represent a humongous object spanning "n"
+  // regions.
+  virtual void set_startsHumongous();
+
+  // The regions that continue a humongous sequence should be added using
+  // this method, in increasing address order.
+  void set_continuesHumongous(HeapRegion* start);
+
+  void add_continuingHumongousRegion(HeapRegion* cont);
+
+  // If the region has a remembered set, return a pointer to it.
+  HeapRegionRemSet* rem_set() const {
+    return _rem_set;
+  }
+
+  // True iff the region is in current collection_set.
+  bool in_collection_set() const {
+    return _in_collection_set;
+  }
+  void set_in_collection_set(bool b) {
+    _in_collection_set = b;
+  }
+  HeapRegion* next_in_collection_set() {
+    assert(in_collection_set(), "should only invoke on member of CS.");
+    assert(_next_in_special_set == NULL ||
+           _next_in_special_set->in_collection_set(),
+           "Malformed CS.");
+    return _next_in_special_set;
+  }
+  void set_next_in_collection_set(HeapRegion* r) {
+    assert(in_collection_set(), "should only invoke on member of CS.");
+    assert(r == NULL || r->in_collection_set(), "Malformed CS.");
+    _next_in_special_set = r;
+  }
+
+  // True iff it is or has been an allocation region in the current
+  // collection pause.
+  bool is_gc_alloc_region() const {
+    return _is_gc_alloc_region;
+  }
+  void set_is_gc_alloc_region(bool b) {
+    _is_gc_alloc_region = b;
+  }
+  HeapRegion* next_gc_alloc_region() {
+    assert(is_gc_alloc_region(), "should only invoke on member of CS.");
+    assert(_next_in_special_set == NULL ||
+           _next_in_special_set->is_gc_alloc_region(),
+           "Malformed CS.");
+    return _next_in_special_set;
+  }
+  void set_next_gc_alloc_region(HeapRegion* r) {
+    assert(is_gc_alloc_region(), "should only invoke on member of CS.");
+    assert(r == NULL || r->is_gc_alloc_region(), "Malformed CS.");
+    _next_in_special_set = r;
+  }
+
+  bool is_reserved() {
+    return popular();
+  }
+
+  bool is_on_free_list() {
+    return _is_on_free_list;
+  }
+
+  void set_on_free_list(bool b) {
+    _is_on_free_list = b;
+  }
+
+  HeapRegion* next_from_free_list() {
+    assert(is_on_free_list(),
+           "Should only invoke on free space.");
+    assert(_next_in_special_set == NULL ||
+           _next_in_special_set->is_on_free_list(),
+           "Malformed Free List.");
+    return _next_in_special_set;
+  }
+
+  void set_next_on_free_list(HeapRegion* r) {
+    assert(r == NULL || r->is_on_free_list(), "Malformed free list.");
+    _next_in_special_set = r;
+  }
+
+  bool is_on_unclean_list() {
+    return _is_on_unclean_list;
+  }
+
+  void set_on_unclean_list(bool b);
+
+  HeapRegion* next_from_unclean_list() {
+    assert(is_on_unclean_list(),
+           "Should only invoke on unclean space.");
+    assert(_next_in_special_set == NULL ||
+           _next_in_special_set->is_on_unclean_list(),
+           "Malformed unclean List.");
+    return _next_in_special_set;
+  }
+
+  void set_next_on_unclean_list(HeapRegion* r);
+
+  HeapRegion* get_next_young_region() { return _next_young_region; }
+  void set_next_young_region(HeapRegion* hr) {
+    _next_young_region = hr;
+  }
+
+  // Allows logical separation between objects allocated before and after.
+  void save_marks();
+
+  // Reset HR stuff to default values.
+  void hr_clear(bool par, bool clear_space);
+
+  void initialize(MemRegion mr, bool clear_space);
+
+  // Ensure that "this" is zero-filled.
+  void ensure_zero_filled();
+  // This one requires that the calling thread holds ZF_mon.
+  void ensure_zero_filled_locked();
+
+  // Get the start of the unmarked area in this region.
+  HeapWord* prev_top_at_mark_start() const { return _prev_top_at_mark_start; }
+  HeapWord* next_top_at_mark_start() const { return _next_top_at_mark_start; }
+
+  // Apply "cl->do_oop" to (the addresses of) all reference fields in objects
+  // allocated in the current region before the last call to "save_mark".
+  void oop_before_save_marks_iterate(OopClosure* cl);
+
+  // This call determines the "filter kind" argument that will be used for
+  // the next call to "new_dcto_cl" on this region with the "traditional"
+  // signature (i.e., the call below.)  The default, in the absence of a
+  // preceding call to this method, is "NoFilterKind", and a call to this
+  // method is necessary for each such call, or else it reverts to the
+  // default.
+  // (This is really ugly, but all other methods I could think of changed a
+  // lot of main-line code for G1.)
+  void set_next_filter_kind(HeapRegionDCTOC::FilterKind nfk) {
+    _next_fk = nfk;
+  }
+
+  DirtyCardToOopClosure*
+  new_dcto_closure(OopClosure* cl,
+                   CardTableModRefBS::PrecisionStyle precision,
+                   HeapRegionDCTOC::FilterKind fk);
+
+#if WHASSUP
+  DirtyCardToOopClosure*
+  new_dcto_closure(OopClosure* cl,
+                   CardTableModRefBS::PrecisionStyle precision,
+                   HeapWord* boundary) {
+    assert(boundary == NULL, "This arg doesn't make sense here.");
+    DirtyCardToOopClosure* res = new_dcto_closure(cl, precision, _next_fk);
+    _next_fk = HeapRegionDCTOC::NoFilterKind;
+    return res;
+  }
+#endif
+
+  //
+  // Note the start or end of marking. This tells the heap region
+  // that the collector is about to start or has finished (concurrently)
+  // marking the heap.
+  //
+
+  // Note the start of a marking phase. Record the
+  // start of the unmarked area of the region here.
+  void note_start_of_marking(bool during_initial_mark) {
+    init_top_at_conc_mark_count();
+    _next_marked_bytes = 0;
+    if (during_initial_mark && is_young() && !is_survivor())
+      _next_top_at_mark_start = bottom();
+    else
+      _next_top_at_mark_start = top();
+  }
+
+  // Note the end of a marking phase. Install the start of
+  // the unmarked area that was captured at start of marking.
+  void note_end_of_marking() {
+    _prev_top_at_mark_start = _next_top_at_mark_start;
+    _prev_marked_bytes = _next_marked_bytes;
+    _next_marked_bytes = 0;
+
+    guarantee(_prev_marked_bytes <=
+              (size_t) (prev_top_at_mark_start() - bottom()) * HeapWordSize,
+              "invariant");
+  }
+
+  // After an evacuation, we need to update _next_top_at_mark_start
+  // to be the current top.  Note this is only valid if we have only
+  // ever evacuated into this region.  If we evacuate, allocate, and
+  // then evacuate we are in deep doodoo.
+  void note_end_of_copying() {
+    assert(top() >= _next_top_at_mark_start,
+           "Increase only");
+    _next_top_at_mark_start = top();
+  }
+
+  // Returns "false" iff no object in the region was allocated when the
+  // last mark phase ended.
+  bool is_marked() { return _prev_top_at_mark_start != bottom(); }
+
+  // If "is_marked()" is true, then this is the index of the region in
+  // an array constructed at the end of marking of the regions in a
+  // "desirability" order.
+  int sort_index() {
+    return _sort_index;
+  }
+  void set_sort_index(int i) {
+    _sort_index = i;
+  }
+
+  void init_top_at_conc_mark_count() {
+    _top_at_conc_mark_count = bottom();
+  }
+
+  void set_top_at_conc_mark_count(HeapWord *cur) {
+    assert(bottom() <= cur && cur <= end(), "Sanity.");
+    _top_at_conc_mark_count = cur;
+  }
+
+  HeapWord* top_at_conc_mark_count() {
+    return _top_at_conc_mark_count;
+  }
+
+  void reset_during_compaction() {
+    guarantee( isHumongous() && startsHumongous(),
+               "should only be called for humongous regions");
+
+    zero_marked_bytes();
+    init_top_at_mark_start();
+  }
+
+  bool popular() { return _popularity == Popular; }
+  void set_popular(bool b) {
+    if (b) {
+      _popularity = Popular;
+    } else {
+      _popularity = NotPopular;
+    }
+  }
+  bool popular_pending() { return _popularity == PopularPending; }
+  void set_popular_pending(bool b) {
+    if (b) {
+      _popularity = PopularPending;
+    } else {
+      _popularity = NotPopular;
+    }
+  }
+
+  // <PREDICTION>
+  void calc_gc_efficiency(void);
+  double gc_efficiency() { return _gc_efficiency;}
+  // </PREDICTION>
+
+  bool is_young() const     { return _young_type != NotYoung; }
+  bool is_scan_only() const { return _young_type == ScanOnly; }
+  bool is_survivor() const  { return _young_type == Survivor; }
+
+  int  young_index_in_cset() const { return _young_index_in_cset; }
+  void set_young_index_in_cset(int index) {
+    assert( (index == -1) || is_young(), "pre-condition" );
+    _young_index_in_cset = index;
+  }
+
+  int age_in_surv_rate_group() {
+    assert( _surv_rate_group != NULL, "pre-condition" );
+    assert( _age_index > -1, "pre-condition" );
+    return _surv_rate_group->age_in_group(_age_index);
+  }
+
+  void recalculate_age_in_surv_rate_group() {
+    assert( _surv_rate_group != NULL, "pre-condition" );
+    assert( _age_index > -1, "pre-condition" );
+    _age_index = _surv_rate_group->recalculate_age_index(_age_index);
+  }
+
+  void record_surv_words_in_group(size_t words_survived) {
+    assert( _surv_rate_group != NULL, "pre-condition" );
+    assert( _age_index > -1, "pre-condition" );
+    int age_in_group = age_in_surv_rate_group();
+    _surv_rate_group->record_surviving_words(age_in_group, words_survived);
+  }
+
+  int age_in_surv_rate_group_cond() {
+    if (_surv_rate_group != NULL)
+      return age_in_surv_rate_group();
+    else
+      return -1;
+  }
+
+  SurvRateGroup* surv_rate_group() {
+    return _surv_rate_group;
+  }
+
+  void install_surv_rate_group(SurvRateGroup* surv_rate_group) {
+    assert( surv_rate_group != NULL, "pre-condition" );
+    assert( _surv_rate_group == NULL, "pre-condition" );
+    assert( is_young(), "pre-condition" );
+
+    _surv_rate_group = surv_rate_group;
+    _age_index = surv_rate_group->next_age_index();
+  }
+
+  void uninstall_surv_rate_group() {
+    if (_surv_rate_group != NULL) {
+      assert( _age_index > -1, "pre-condition" );
+      assert( is_young(), "pre-condition" );
+
+      _surv_rate_group = NULL;
+      _age_index = -1;
+    } else {
+      assert( _age_index == -1, "pre-condition" );
+    }
+  }
+
+  void set_young() { set_young_type(Young); }
+
+  void set_scan_only() { set_young_type(ScanOnly); }
+
+  void set_survivor() { set_young_type(Survivor); }
+
+  void set_not_young() { set_young_type(NotYoung); }
+
+  // Determine if an object has been allocated since the last
+  // mark performed by the collector. This returns true iff the object
+  // is within the unmarked area of the region.
+  bool obj_allocated_since_prev_marking(oop obj) const {
+    return (HeapWord *) obj >= prev_top_at_mark_start();
+  }
+  bool obj_allocated_since_next_marking(oop obj) const {
+    return (HeapWord *) obj >= next_top_at_mark_start();
+  }
+
+  // For parallel heapRegion traversal.
+  bool claimHeapRegion(int claimValue);
+  jint claim_value() { return _claimed; }
+  // Use this carefully: only when you're sure no one is claiming...
+  void set_claim_value(int claimValue) { _claimed = claimValue; }
+
+  // Returns the "evacuation_failed" property of the region.
+  bool evacuation_failed() { return _evacuation_failed; }
+
+  // Sets the "evacuation_failed" property of the region.
+  void set_evacuation_failed(bool b) {
+    _evacuation_failed = b;
+
+    if (b) {
+      init_top_at_conc_mark_count();
+      _next_marked_bytes = 0;
+    }
+  }
+
+  // Requires that "mr" be entirely within the region.
+  // Apply "cl->do_object" to all objects that intersect with "mr".
+  // If the iteration encounters an unparseable portion of the region,
+  // or if "cl->abort()" is true after a closure application,
+  // terminate the iteration and return the address of the start of the
+  // subregion that isn't done.  (The two can be distinguished by querying
+  // "cl->abort()".)  Return of "NULL" indicates that the iteration
+  // completed.
+  HeapWord*
+  object_iterate_mem_careful(MemRegion mr, ObjectClosure* cl);
+
+  HeapWord*
+  oops_on_card_seq_iterate_careful(MemRegion mr,
+                                   FilterOutOfRegionClosure* cl);
+
+  // The region "mr" is entirely in "this", and starts and ends at block
+  // boundaries. The caller declares that all the contained blocks are
+  // coalesced into one.
+  void declare_filled_region_to_BOT(MemRegion mr) {
+    _offsets.single_block(mr.start(), mr.end());
+  }
+
+  // A version of block start that is guaranteed to find *some* block
+  // boundary at or before "p", but does not object iteration, and may
+  // therefore be used safely when the heap is unparseable.
+  HeapWord* block_start_careful(const void* p) const {
+    return _offsets.block_start_careful(p);
+  }
+
+  // Requires that "addr" is within the region.  Returns the start of the
+  // first ("careful") block that starts at or after "addr", or else the
+  // "end" of the region if there is no such block.
+  HeapWord* next_block_start_careful(HeapWord* addr);
+
+  // Returns the zero-fill-state of the current region.
+  ZeroFillState zero_fill_state() { return (ZeroFillState)_zfs; }
+  bool zero_fill_is_allocated() { return _zfs == Allocated; }
+  Thread* zero_filler() { return _zero_filler; }
+
+  // Indicate that the contents of the region are unknown, and therefore
+  // might require zero-filling.
+  void set_zero_fill_needed() {
+    set_zero_fill_state_work(NotZeroFilled);
+  }
+  void set_zero_fill_in_progress(Thread* t) {
+    set_zero_fill_state_work(ZeroFilling);
+    _zero_filler = t;
+  }
+  void set_zero_fill_complete();
+  void set_zero_fill_allocated() {
+    set_zero_fill_state_work(Allocated);
+  }
+
+  void set_zero_fill_state_work(ZeroFillState zfs);
+
+  // This is called when a full collection shrinks the heap.
+  // We want to set the heap region to a value which says
+  // it is no longer part of the heap.  For now, we'll let "NotZF" fill
+  // that role.
+  void reset_zero_fill() {
+    set_zero_fill_state_work(NotZeroFilled);
+    _zero_filler = NULL;
+  }
+
+#define HeapRegion_OOP_SINCE_SAVE_MARKS_DECL(OopClosureType, nv_suffix)  \
+  virtual void oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl);
+  SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(HeapRegion_OOP_SINCE_SAVE_MARKS_DECL)
+
+  CompactibleSpace* next_compaction_space() const;
+
+  virtual void reset_after_compaction();
+
+  void print() const;
+  void print_on(outputStream* st) const;
+
+  // Override
+  virtual void verify(bool allow_dirty) const;
+
+#ifdef DEBUG
+  HeapWord* allocate(size_t size);
+#endif
+};
+
+// HeapRegionClosure is used for iterating over regions.
+// Terminates the iteration when the "doHeapRegion" method returns "true".
+class HeapRegionClosure : public StackObj {
+  friend class HeapRegionSeq;
+  friend class G1CollectedHeap;
+
+  bool _complete;
+  void incomplete() { _complete = false; }
+
+ public:
+  HeapRegionClosure(): _complete(true) {}
+
+  // Typically called on each region until it returns true.
+  virtual bool doHeapRegion(HeapRegion* r) = 0;
+
+  // True after iteration if the closure was applied to all heap regions
+  // and returned "false" in all cases.
+  bool complete() { return _complete; }
+};
+
+// A linked lists of heap regions.  It leaves the "next" field
+// unspecified; that's up to subtypes.
+class RegionList {
+protected:
+  virtual HeapRegion* get_next(HeapRegion* chr) = 0;
+  virtual void set_next(HeapRegion* chr,
+                        HeapRegion* new_next) = 0;
+
+  HeapRegion* _hd;
+  HeapRegion* _tl;
+  size_t _sz;
+
+  // Protected constructor because this type is only meaningful
+  // when the _get/_set next functions are defined.
+  RegionList() : _hd(NULL), _tl(NULL), _sz(0) {}
+public:
+  void reset() {
+    _hd = NULL;
+    _tl = NULL;
+    _sz = 0;
+  }
+  HeapRegion* hd() { return _hd; }
+  HeapRegion* tl() { return _tl; }
+  size_t sz() { return _sz; }
+  size_t length();
+
+  bool well_formed() {
+    return
+      ((hd() == NULL && tl() == NULL && sz() == 0)
+       || (hd() != NULL && tl() != NULL && sz() > 0))
+      && (sz() == length());
+  }
+  virtual void insert_before_head(HeapRegion* r);
+  void prepend_list(RegionList* new_list);
+  virtual HeapRegion* pop();
+  void dec_sz() { _sz--; }
+  // Requires that "r" is an element of the list, and is not the tail.
+  void delete_after(HeapRegion* r);
+};
+
+class EmptyNonHRegionList: public RegionList {
+protected:
+  // Protected constructor because this type is only meaningful
+  // when the _get/_set next functions are defined.
+  EmptyNonHRegionList() : RegionList() {}
+
+public:
+  void insert_before_head(HeapRegion* r) {
+    //    assert(r->is_empty(), "Better be empty");
+    assert(!r->isHumongous(), "Better not be humongous.");
+    RegionList::insert_before_head(r);
+  }
+  void prepend_list(EmptyNonHRegionList* new_list) {
+    //    assert(new_list->hd() == NULL || new_list->hd()->is_empty(),
+    //     "Better be empty");
+    assert(new_list->hd() == NULL || !new_list->hd()->isHumongous(),
+           "Better not be humongous.");
+    //    assert(new_list->tl() == NULL || new_list->tl()->is_empty(),
+    //     "Better be empty");
+    assert(new_list->tl() == NULL || !new_list->tl()->isHumongous(),
+           "Better not be humongous.");
+    RegionList::prepend_list(new_list);
+  }
+};
+
+class UncleanRegionList: public EmptyNonHRegionList {
+public:
+  HeapRegion* get_next(HeapRegion* hr) {
+    return hr->next_from_unclean_list();
+  }
+  void set_next(HeapRegion* hr, HeapRegion* new_next) {
+    hr->set_next_on_unclean_list(new_next);
+  }
+
+  UncleanRegionList() : EmptyNonHRegionList() {}
+
+  void insert_before_head(HeapRegion* r) {
+    assert(!r->is_on_free_list(),
+           "Better not already be on free list");
+    assert(!r->is_on_unclean_list(),
+           "Better not already be on unclean list");
+    r->set_zero_fill_needed();
+    r->set_on_unclean_list(true);
+    EmptyNonHRegionList::insert_before_head(r);
+  }
+  void prepend_list(UncleanRegionList* new_list) {
+    assert(new_list->tl() == NULL || !new_list->tl()->is_on_free_list(),
+           "Better not already be on free list");
+    assert(new_list->tl() == NULL || new_list->tl()->is_on_unclean_list(),
+           "Better already be marked as on unclean list");
+    assert(new_list->hd() == NULL || !new_list->hd()->is_on_free_list(),
+           "Better not already be on free list");
+    assert(new_list->hd() == NULL || new_list->hd()->is_on_unclean_list(),
+           "Better already be marked as on unclean list");
+    EmptyNonHRegionList::prepend_list(new_list);
+  }
+  HeapRegion* pop() {
+    HeapRegion* res = RegionList::pop();
+    if (res != NULL) res->set_on_unclean_list(false);
+    return res;
+  }
+};
+
+// Local Variables: ***
+// c-indentation-style: gnu ***
+// End: ***
+
+#endif // SERIALGC
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/heapRegion.inline.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+inline HeapWord* G1OffsetTableContigSpace::allocate(size_t size) {
+  HeapWord* res = ContiguousSpace::allocate(size);
+  if (res != NULL) {
+    _offsets.alloc_block(res, size);
+  }
+  return res;
+}
+
+// Because of the requirement of keeping "_offsets" up to date with the
+// allocations, we sequentialize these with a lock.  Therefore, best if
+// this is used for larger LAB allocations only.
+inline HeapWord* G1OffsetTableContigSpace::par_allocate(size_t size) {
+  MutexLocker x(&_par_alloc_lock);
+  // This ought to be just "allocate", because of the lock above, but that
+  // ContiguousSpace::allocate asserts that either the allocating thread
+  // holds the heap lock or it is the VM thread and we're at a safepoint.
+  // The best I (dld) could figure was to put a field in ContiguousSpace
+  // meaning "locking at safepoint taken care of", and set/reset that
+  // here.  But this will do for now, especially in light of the comment
+  // above.  Perhaps in the future some lock-free manner of keeping the
+  // coordination.
+  HeapWord* res = ContiguousSpace::par_allocate(size);
+  if (res != NULL) {
+    _offsets.alloc_block(res, size);
+  }
+  return res;
+}
+
+inline HeapWord* G1OffsetTableContigSpace::block_start(const void* p) {
+  return _offsets.block_start(p);
+}
+
+inline HeapWord*
+G1OffsetTableContigSpace::block_start_const(const void* p) const {
+  return _offsets.block_start_const(p);
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,1443 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_heapRegionRemSet.cpp.incl"
+
+#define HRRS_VERBOSE 0
+
+#define PRT_COUNT_OCCUPIED 1
+
+// OtherRegionsTable
+
+class PerRegionTable: public CHeapObj {
+  friend class OtherRegionsTable;
+  friend class HeapRegionRemSetIterator;
+
+  HeapRegion*     _hr;
+  BitMap          _bm;
+#if PRT_COUNT_OCCUPIED
+  jint            _occupied;
+#endif
+  PerRegionTable* _next_free;
+
+  PerRegionTable* next_free() { return _next_free; }
+  void set_next_free(PerRegionTable* prt) { _next_free = prt; }
+
+
+  static PerRegionTable* _free_list;
+
+#ifdef _MSC_VER
+  // For some reason even though the classes are marked as friend they are unable
+  // to access CardsPerRegion when private/protected. Only the windows c++ compiler
+  // says this Sun CC and linux gcc don't have a problem with access when private
+
+  public:
+
+#endif // _MSC_VER
+
+  enum SomePrivateConstants {
+    CardsPerRegion = HeapRegion::GrainBytes >> CardTableModRefBS::card_shift
+  };
+
+protected:
+  // We need access in order to union things into the base table.
+  BitMap* bm() { return &_bm; }
+
+  void recount_occupied() {
+    _occupied = (jint) bm()->count_one_bits();
+  }
+
+  PerRegionTable(HeapRegion* hr) :
+    _hr(hr),
+#if PRT_COUNT_OCCUPIED
+    _occupied(0),
+#endif
+    _bm(CardsPerRegion, false /* in-resource-area */)
+  {}
+
+  static void free(PerRegionTable* prt) {
+    while (true) {
+      PerRegionTable* fl = _free_list;
+      prt->set_next_free(fl);
+      PerRegionTable* res =
+        (PerRegionTable*)
+        Atomic::cmpxchg_ptr(prt, &_free_list, fl);
+      if (res == fl) return;
+    }
+    ShouldNotReachHere();
+  }
+
+  static PerRegionTable* alloc(HeapRegion* hr) {
+    PerRegionTable* fl = _free_list;
+    while (fl != NULL) {
+      PerRegionTable* nxt = fl->next_free();
+      PerRegionTable* res =
+        (PerRegionTable*)
+        Atomic::cmpxchg_ptr(nxt, &_free_list, fl);
+      if (res == fl) {
+        fl->init(hr);
+        return fl;
+      } else {
+        fl = _free_list;
+      }
+    }
+    assert(fl == NULL, "Loop condition.");
+    return new PerRegionTable(hr);
+  }
+
+  void add_card_work(short from_card, bool par) {
+    if (!_bm.at(from_card)) {
+      if (par) {
+        if (_bm.par_at_put(from_card, 1)) {
+#if PRT_COUNT_OCCUPIED
+          Atomic::inc(&_occupied);
+#endif
+        }
+      } else {
+        _bm.at_put(from_card, 1);
+#if PRT_COUNT_OCCUPIED
+        _occupied++;
+#endif
+      }
+    }
+  }
+
+  void add_reference_work(oop* from, bool par) {
+    // Must make this robust in case "from" is not in "_hr", because of
+    // concurrency.
+
+#if HRRS_VERBOSE
+    gclog_or_tty->print_cr("    PRT::Add_reference_work(" PTR_FORMAT "->" PTR_FORMAT").",
+                           from, *from);
+#endif
+
+    HeapRegion* loc_hr = hr();
+    // If the test below fails, then this table was reused concurrently
+    // with this operation.  This is OK, since the old table was coarsened,
+    // and adding a bit to the new table is never incorrect.
+    if (loc_hr->is_in_reserved(from)) {
+      size_t hw_offset = pointer_delta((HeapWord*)from, loc_hr->bottom());
+      size_t from_card =
+        hw_offset >>
+        (CardTableModRefBS::card_shift - LogHeapWordSize);
+
+      add_card_work((short) from_card, par);
+    }
+  }
+
+public:
+
+  HeapRegion* hr() const { return _hr; }
+
+#if PRT_COUNT_OCCUPIED
+  jint occupied() const {
+    // Overkill, but if we ever need it...
+    // guarantee(_occupied == _bm.count_one_bits(), "Check");
+    return _occupied;
+  }
+#else
+  jint occupied() const {
+    return _bm.count_one_bits();
+  }
+#endif
+
+  void init(HeapRegion* hr) {
+    _hr = hr;
+#if PRT_COUNT_OCCUPIED
+    _occupied = 0;
+#endif
+    _bm.clear();
+  }
+
+  void add_reference(oop* from) {
+    add_reference_work(from, /*parallel*/ true);
+  }
+
+  void seq_add_reference(oop* from) {
+    add_reference_work(from, /*parallel*/ false);
+  }
+
+  void scrub(CardTableModRefBS* ctbs, BitMap* card_bm) {
+    HeapWord* hr_bot = hr()->bottom();
+    int hr_first_card_index = ctbs->index_for(hr_bot);
+    bm()->set_intersection_at_offset(*card_bm, hr_first_card_index);
+#if PRT_COUNT_OCCUPIED
+    recount_occupied();
+#endif
+  }
+
+  void add_card(short from_card_index) {
+    add_card_work(from_card_index, /*parallel*/ true);
+  }
+
+  void seq_add_card(short from_card_index) {
+    add_card_work(from_card_index, /*parallel*/ false);
+  }
+
+  // (Destructively) union the bitmap of the current table into the given
+  // bitmap (which is assumed to be of the same size.)
+  void union_bitmap_into(BitMap* bm) {
+    bm->set_union(_bm);
+  }
+
+  // Mem size in bytes.
+  size_t mem_size() const {
+    return sizeof(this) + _bm.size_in_words() * HeapWordSize;
+  }
+
+  static size_t fl_mem_size() {
+    PerRegionTable* cur = _free_list;
+    size_t res = 0;
+    while (cur != NULL) {
+      res += sizeof(PerRegionTable);
+      cur = cur->next_free();
+    }
+    return res;
+  }
+
+  // Requires "from" to be in "hr()".
+  bool contains_reference(oop* from) const {
+    assert(hr()->is_in_reserved(from), "Precondition.");
+    size_t card_ind = pointer_delta(from, hr()->bottom(),
+                                    CardTableModRefBS::card_size);
+    return _bm.at(card_ind);
+  }
+};
+
+PerRegionTable* PerRegionTable::_free_list = NULL;
+
+
+#define COUNT_PAR_EXPANDS 0
+
+#if COUNT_PAR_EXPANDS
+static jint n_par_expands = 0;
+static jint n_par_contracts = 0;
+static jint par_expand_list_len = 0;
+static jint max_par_expand_list_len = 0;
+
+static void print_par_expand() {
+  Atomic::inc(&n_par_expands);
+  Atomic::inc(&par_expand_list_len);
+  if (par_expand_list_len > max_par_expand_list_len) {
+    max_par_expand_list_len = par_expand_list_len;
+  }
+  if ((n_par_expands % 10) == 0) {
+    gclog_or_tty->print_cr("\n\n%d par expands: %d contracts, "
+                  "len = %d, max_len = %d\n.",
+                  n_par_expands, n_par_contracts, par_expand_list_len,
+                  max_par_expand_list_len);
+  }
+}
+#endif
+
+class PosParPRT: public PerRegionTable {
+  PerRegionTable** _par_tables;
+
+  enum SomePrivateConstants {
+    ReserveParTableExpansion = 1
+  };
+
+  void par_expand() {
+    int n = HeapRegionRemSet::num_par_rem_sets()-1;
+    if (n <= 0) return;
+    if (_par_tables == NULL) {
+      PerRegionTable* res =
+        (PerRegionTable*)
+        Atomic::cmpxchg_ptr((PerRegionTable*)ReserveParTableExpansion,
+                            &_par_tables, NULL);
+      if (res != NULL) return;
+      // Otherwise, we reserved the right to do the expansion.
+
+      PerRegionTable** ptables = NEW_C_HEAP_ARRAY(PerRegionTable*, n);
+      for (int i = 0; i < n; i++) {
+        PerRegionTable* ptable = PerRegionTable::alloc(hr());
+        ptables[i] = ptable;
+      }
+      // Here we do not need an atomic.
+      _par_tables = ptables;
+#if COUNT_PAR_EXPANDS
+      print_par_expand();
+#endif
+      // We must put this table on the expanded list.
+      PosParPRT* exp_head = _par_expanded_list;
+      while (true) {
+        set_next_par_expanded(exp_head);
+        PosParPRT* res =
+          (PosParPRT*)
+          Atomic::cmpxchg_ptr(this, &_par_expanded_list, exp_head);
+        if (res == exp_head) return;
+        // Otherwise.
+        exp_head = res;
+      }
+      ShouldNotReachHere();
+    }
+  }
+
+  void par_contract() {
+    assert(_par_tables != NULL, "Precondition.");
+    int n = HeapRegionRemSet::num_par_rem_sets()-1;
+    for (int i = 0; i < n; i++) {
+      _par_tables[i]->union_bitmap_into(bm());
+      PerRegionTable::free(_par_tables[i]);
+      _par_tables[i] = NULL;
+    }
+#if PRT_COUNT_OCCUPIED
+    // We must recount the "occupied."
+    recount_occupied();
+#endif
+    FREE_C_HEAP_ARRAY(PerRegionTable*, _par_tables);
+    _par_tables = NULL;
+#if COUNT_PAR_EXPANDS
+    Atomic::inc(&n_par_contracts);
+    Atomic::dec(&par_expand_list_len);
+#endif
+  }
+
+  static PerRegionTable** _par_table_fl;
+
+  PosParPRT* _next;
+
+  static PosParPRT* _free_list;
+
+  PerRegionTable** par_tables() const {
+    assert(uintptr_t(NULL) == 0, "Assumption.");
+    if (uintptr_t(_par_tables) <= ReserveParTableExpansion)
+      return NULL;
+    else
+      return _par_tables;
+  }
+
+  PosParPRT* _next_par_expanded;
+  PosParPRT* next_par_expanded() { return _next_par_expanded; }
+  void set_next_par_expanded(PosParPRT* ppprt) { _next_par_expanded = ppprt; }
+  static PosParPRT* _par_expanded_list;
+
+public:
+
+  PosParPRT(HeapRegion* hr) : PerRegionTable(hr), _par_tables(NULL) {}
+
+  jint occupied() const {
+    jint res = PerRegionTable::occupied();
+    if (par_tables() != NULL) {
+      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
+        res += par_tables()[i]->occupied();
+      }
+    }
+    return res;
+  }
+
+  void init(HeapRegion* hr) {
+    PerRegionTable::init(hr);
+    _next = NULL;
+    if (par_tables() != NULL) {
+      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
+        par_tables()[i]->init(hr);
+      }
+    }
+  }
+
+  static void free(PosParPRT* prt) {
+    while (true) {
+      PosParPRT* fl = _free_list;
+      prt->set_next(fl);
+      PosParPRT* res =
+        (PosParPRT*)
+        Atomic::cmpxchg_ptr(prt, &_free_list, fl);
+      if (res == fl) return;
+    }
+    ShouldNotReachHere();
+  }
+
+  static PosParPRT* alloc(HeapRegion* hr) {
+    PosParPRT* fl = _free_list;
+    while (fl != NULL) {
+      PosParPRT* nxt = fl->next();
+      PosParPRT* res =
+        (PosParPRT*)
+        Atomic::cmpxchg_ptr(nxt, &_free_list, fl);
+      if (res == fl) {
+        fl->init(hr);
+        return fl;
+      } else {
+        fl = _free_list;
+      }
+    }
+    assert(fl == NULL, "Loop condition.");
+    return new PosParPRT(hr);
+  }
+
+  PosParPRT* next() const { return _next; }
+  void set_next(PosParPRT* nxt) { _next = nxt; }
+  PosParPRT** next_addr() { return &_next; }
+
+  void add_reference(oop* from, int tid) {
+    // Expand if necessary.
+    PerRegionTable** pt = par_tables();
+    if (par_tables() == NULL && tid > 0 && hr()->is_gc_alloc_region()) {
+      par_expand();
+      pt = par_tables();
+    }
+    if (pt != NULL) {
+      // We always have to assume that mods to table 0 are in parallel,
+      // because of the claiming scheme in parallel expansion.  A thread
+      // with tid != 0 that finds the table to be NULL, but doesn't succeed
+      // in claiming the right of expanding it, will end up in the else
+      // clause of the above if test.  That thread could be delayed, and a
+      // thread 0 add reference could see the table expanded, and come
+      // here.  Both threads would be adding in parallel.  But we get to
+      // not use atomics for tids > 0.
+      if (tid == 0) {
+        PerRegionTable::add_reference(from);
+      } else {
+        pt[tid-1]->seq_add_reference(from);
+      }
+    } else {
+      // Not expanded -- add to the base table.
+      PerRegionTable::add_reference(from);
+    }
+  }
+
+  void scrub(CardTableModRefBS* ctbs, BitMap* card_bm) {
+    assert(_par_tables == NULL, "Precondition");
+    PerRegionTable::scrub(ctbs, card_bm);
+  }
+
+  size_t mem_size() const {
+    size_t res =
+      PerRegionTable::mem_size() + sizeof(this) - sizeof(PerRegionTable);
+    if (_par_tables != NULL) {
+      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
+        res += _par_tables[i]->mem_size();
+      }
+    }
+    return res;
+  }
+
+  static size_t fl_mem_size() {
+    PosParPRT* cur = _free_list;
+    size_t res = 0;
+    while (cur != NULL) {
+      res += sizeof(PosParPRT);
+      cur = cur->next();
+    }
+    return res;
+  }
+
+  bool contains_reference(oop* from) const {
+    if (PerRegionTable::contains_reference(from)) return true;
+    if (_par_tables != NULL) {
+      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
+        if (_par_tables[i]->contains_reference(from)) return true;
+      }
+    }
+    return false;
+  }
+
+  static void par_contract_all();
+
+};
+
+void PosParPRT::par_contract_all() {
+  PosParPRT* hd = _par_expanded_list;
+  while (hd != NULL) {
+    PosParPRT* nxt = hd->next_par_expanded();
+    PosParPRT* res =
+      (PosParPRT*)
+      Atomic::cmpxchg_ptr(nxt, &_par_expanded_list, hd);
+    if (res == hd) {
+      // We claimed the right to contract this table.
+      hd->set_next_par_expanded(NULL);
+      hd->par_contract();
+      hd = _par_expanded_list;
+    } else {
+      hd = res;
+    }
+  }
+}
+
+PosParPRT* PosParPRT::_free_list = NULL;
+PosParPRT* PosParPRT::_par_expanded_list = NULL;
+
+jint OtherRegionsTable::_cache_probes = 0;
+jint OtherRegionsTable::_cache_hits = 0;
+
+size_t OtherRegionsTable::_max_fine_entries = 0;
+size_t OtherRegionsTable::_mod_max_fine_entries_mask = 0;
+#if SAMPLE_FOR_EVICTION
+size_t OtherRegionsTable::_fine_eviction_stride = 0;
+size_t OtherRegionsTable::_fine_eviction_sample_size = 0;
+#endif
+
+OtherRegionsTable::OtherRegionsTable(HeapRegion* hr) :
+  _g1h(G1CollectedHeap::heap()),
+  _m(Mutex::leaf, "An OtherRegionsTable lock", true),
+  _hr(hr),
+  _coarse_map(G1CollectedHeap::heap()->max_regions(),
+              false /* in-resource-area */),
+  _fine_grain_regions(NULL),
+  _n_fine_entries(0), _n_coarse_entries(0),
+#if SAMPLE_FOR_EVICTION
+  _fine_eviction_start(0),
+#endif
+  _sparse_table(hr)
+{
+  typedef PosParPRT* PosParPRTPtr;
+  if (_max_fine_entries == 0) {
+    assert(_mod_max_fine_entries_mask == 0, "Both or none.");
+    _max_fine_entries = (1 << G1LogRSRegionEntries);
+    _mod_max_fine_entries_mask = _max_fine_entries - 1;
+#if SAMPLE_FOR_EVICTION
+    assert(_fine_eviction_sample_size == 0
+           && _fine_eviction_stride == 0, "All init at same time.");
+    _fine_eviction_sample_size = MAX2((size_t)4, (size_t)G1LogRSRegionEntries);
+    _fine_eviction_stride = _max_fine_entries / _fine_eviction_sample_size;
+#endif
+  }
+  _fine_grain_regions = new PosParPRTPtr[_max_fine_entries];
+  if (_fine_grain_regions == NULL)
+    vm_exit_out_of_memory(sizeof(void*)*_max_fine_entries,
+                          "Failed to allocate _fine_grain_entries.");
+  for (size_t i = 0; i < _max_fine_entries; i++) {
+    _fine_grain_regions[i] = NULL;
+  }
+}
+
+int** OtherRegionsTable::_from_card_cache = NULL;
+size_t OtherRegionsTable::_from_card_cache_max_regions = 0;
+size_t OtherRegionsTable::_from_card_cache_mem_size = 0;
+
+void OtherRegionsTable::init_from_card_cache(size_t max_regions) {
+  _from_card_cache_max_regions = max_regions;
+
+  int n_par_rs = HeapRegionRemSet::num_par_rem_sets();
+  _from_card_cache = NEW_C_HEAP_ARRAY(int*, n_par_rs);
+  for (int i = 0; i < n_par_rs; i++) {
+    _from_card_cache[i] = NEW_C_HEAP_ARRAY(int, max_regions);
+    for (size_t j = 0; j < max_regions; j++) {
+      _from_card_cache[i][j] = -1;  // An invalid value.
+    }
+  }
+  _from_card_cache_mem_size = n_par_rs * max_regions * sizeof(int);
+}
+
+void OtherRegionsTable::shrink_from_card_cache(size_t new_n_regs) {
+  for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
+    assert(new_n_regs <= _from_card_cache_max_regions, "Must be within max.");
+    for (size_t j = new_n_regs; j < _from_card_cache_max_regions; j++) {
+      _from_card_cache[i][j] = -1;  // An invalid value.
+    }
+  }
+}
+
+#ifndef PRODUCT
+void OtherRegionsTable::print_from_card_cache() {
+  for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
+    for (size_t j = 0; j < _from_card_cache_max_regions; j++) {
+      gclog_or_tty->print_cr("_from_card_cache[%d][%d] = %d.",
+                    i, j, _from_card_cache[i][j]);
+    }
+  }
+}
+#endif
+
+void OtherRegionsTable::add_reference(oop* from, int tid) {
+  size_t cur_hrs_ind = hr()->hrs_index();
+
+#if HRRS_VERBOSE
+  gclog_or_tty->print_cr("ORT::add_reference_work(" PTR_FORMAT "->" PTR_FORMAT ").",
+                                                  from, *from);
+#endif
+
+  int from_card = (int)(uintptr_t(from) >> CardTableModRefBS::card_shift);
+
+#if HRRS_VERBOSE
+  gclog_or_tty->print_cr("Table for [" PTR_FORMAT "...): card %d (cache = %d)",
+                hr()->bottom(), from_card,
+                _from_card_cache[tid][cur_hrs_ind]);
+#endif
+
+#define COUNT_CACHE 0
+#if COUNT_CACHE
+  jint p = Atomic::add(1, &_cache_probes);
+  if ((p % 10000) == 0) {
+    jint hits = _cache_hits;
+    gclog_or_tty->print_cr("%d/%d = %5.2f%% RS cache hits.",
+                  _cache_hits, p, 100.0* (float)hits/(float)p);
+  }
+#endif
+  if (from_card == _from_card_cache[tid][cur_hrs_ind]) {
+#if HRRS_VERBOSE
+    gclog_or_tty->print_cr("  from-card cache hit.");
+#endif
+#if COUNT_CACHE
+    Atomic::inc(&_cache_hits);
+#endif
+    assert(contains_reference(from), "We just added it!");
+    return;
+  } else {
+    _from_card_cache[tid][cur_hrs_ind] = from_card;
+  }
+
+  // Note that this may be a continued H region.
+  HeapRegion* from_hr = _g1h->heap_region_containing_raw(from);
+  size_t from_hrs_ind = (size_t)from_hr->hrs_index();
+
+  // If the region is already coarsened, return.
+  if (_coarse_map.at(from_hrs_ind)) {
+#if HRRS_VERBOSE
+    gclog_or_tty->print_cr("  coarse map hit.");
+#endif
+    assert(contains_reference(from), "We just added it!");
+    return;
+  }
+
+  // Otherwise find a per-region table to add it to.
+  size_t ind = from_hrs_ind & _mod_max_fine_entries_mask;
+  PosParPRT* prt = find_region_table(ind, from_hr);
+  if (prt == NULL) {
+    MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
+    // Confirm that it's really not there...
+    prt = find_region_table(ind, from_hr);
+    if (prt == NULL) {
+
+      uintptr_t from_hr_bot_card_index =
+        uintptr_t(from_hr->bottom())
+          >> CardTableModRefBS::card_shift;
+      int card_index = from_card - from_hr_bot_card_index;
+      assert(0 <= card_index && card_index < PosParPRT::CardsPerRegion,
+             "Must be in range.");
+      if (G1HRRSUseSparseTable &&
+          _sparse_table.add_card((short) from_hrs_ind, card_index)) {
+        if (G1RecordHRRSOops) {
+          HeapRegionRemSet::record(hr(), from);
+#if HRRS_VERBOSE
+          gclog_or_tty->print("   Added card " PTR_FORMAT " to region "
+                              "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n",
+                              align_size_down(uintptr_t(from),
+                                              CardTableModRefBS::card_size),
+                              hr()->bottom(), from);
+#endif
+        }
+#if HRRS_VERBOSE
+        gclog_or_tty->print_cr("   added card to sparse table.");
+#endif
+        assert(contains_reference_locked(from), "We just added it!");
+        return;
+      } else {
+#if HRRS_VERBOSE
+        gclog_or_tty->print_cr("   [tid %d] sparse table entry "
+                      "overflow(f: %d, t: %d)",
+                      tid, from_hrs_ind, cur_hrs_ind);
+#endif
+      }
+
+      // Otherwise, transfer from sparse to fine-grain.
+      short cards[SparsePRTEntry::CardsPerEntry];
+      if (G1HRRSUseSparseTable) {
+        bool res = _sparse_table.get_cards((short) from_hrs_ind, &cards[0]);
+        assert(res, "There should have been an entry");
+      }
+
+      if (_n_fine_entries == _max_fine_entries) {
+        prt = delete_region_table();
+      } else {
+        prt = PosParPRT::alloc(from_hr);
+      }
+      prt->init(from_hr);
+      // Record the outgoing pointer in the from_region's outgoing bitmap.
+      from_hr->rem_set()->add_outgoing_reference(hr());
+
+      PosParPRT* first_prt = _fine_grain_regions[ind];
+      prt->set_next(first_prt);  // XXX Maybe move to init?
+      _fine_grain_regions[ind] = prt;
+      _n_fine_entries++;
+
+      // Add in the cards from the sparse table.
+      if (G1HRRSUseSparseTable) {
+        for (int i = 0; i < SparsePRTEntry::CardsPerEntry; i++) {
+          short c = cards[i];
+          if (c != SparsePRTEntry::NullEntry) {
+            prt->add_card(c);
+          }
+        }
+        // Now we can delete the sparse entry.
+        bool res = _sparse_table.delete_entry((short) from_hrs_ind);
+        assert(res, "It should have been there.");
+      }
+    }
+    assert(prt != NULL && prt->hr() == from_hr, "consequence");
+  }
+  // Note that we can't assert "prt->hr() == from_hr", because of the
+  // possibility of concurrent reuse.  But see head comment of
+  // OtherRegionsTable for why this is OK.
+  assert(prt != NULL, "Inv");
+
+  prt->add_reference(from, tid);
+  if (G1RecordHRRSOops) {
+    HeapRegionRemSet::record(hr(), from);
+#if HRRS_VERBOSE
+    gclog_or_tty->print("Added card " PTR_FORMAT " to region "
+                        "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n",
+                        align_size_down(uintptr_t(from),
+                                        CardTableModRefBS::card_size),
+                        hr()->bottom(), from);
+#endif
+  }
+  assert(contains_reference(from), "We just added it!");
+}
+
+PosParPRT*
+OtherRegionsTable::find_region_table(size_t ind, HeapRegion* hr) const {
+  assert(0 <= ind && ind < _max_fine_entries, "Preconditions.");
+  PosParPRT* prt = _fine_grain_regions[ind];
+  while (prt != NULL && prt->hr() != hr) {
+    prt = prt->next();
+  }
+  // Loop postcondition is the method postcondition.
+  return prt;
+}
+
+
+#define DRT_CENSUS 0
+
+#if DRT_CENSUS
+static const int HistoSize = 6;
+static int global_histo[HistoSize] = { 0, 0, 0, 0, 0, 0 };
+static int coarsenings = 0;
+static int occ_sum = 0;
+#endif
+
+jint OtherRegionsTable::_n_coarsenings = 0;
+
+PosParPRT* OtherRegionsTable::delete_region_table() {
+#if DRT_CENSUS
+  int histo[HistoSize] = { 0, 0, 0, 0, 0, 0 };
+  const int histo_limits[] = { 1, 4, 16, 64, 256, 2048 };
+#endif
+
+  assert(_m.owned_by_self(), "Precondition");
+  assert(_n_fine_entries == _max_fine_entries, "Precondition");
+  PosParPRT* max = NULL;
+  jint max_occ = 0;
+  PosParPRT** max_prev;
+  size_t max_ind;
+
+#if SAMPLE_FOR_EVICTION
+  size_t i = _fine_eviction_start;
+  for (size_t k = 0; k < _fine_eviction_sample_size; k++) {
+    size_t ii = i;
+    // Make sure we get a non-NULL sample.
+    while (_fine_grain_regions[ii] == NULL) {
+      ii++;
+      if (ii == _max_fine_entries) ii = 0;
+      guarantee(ii != i, "We must find one.");
+    }
+    PosParPRT** prev = &_fine_grain_regions[ii];
+    PosParPRT* cur = *prev;
+    while (cur != NULL) {
+      jint cur_occ = cur->occupied();
+      if (max == NULL || cur_occ > max_occ) {
+        max = cur;
+        max_prev = prev;
+        max_ind = i;
+        max_occ = cur_occ;
+      }
+      prev = cur->next_addr();
+      cur = cur->next();
+    }
+    i = i + _fine_eviction_stride;
+    if (i >= _n_fine_entries) i = i - _n_fine_entries;
+  }
+  _fine_eviction_start++;
+  if (_fine_eviction_start >= _n_fine_entries)
+    _fine_eviction_start -= _n_fine_entries;
+#else
+  for (int i = 0; i < _max_fine_entries; i++) {
+    PosParPRT** prev = &_fine_grain_regions[i];
+    PosParPRT* cur = *prev;
+    while (cur != NULL) {
+      jint cur_occ = cur->occupied();
+#if DRT_CENSUS
+      for (int k = 0; k < HistoSize; k++) {
+        if (cur_occ <= histo_limits[k]) {
+          histo[k]++; global_histo[k]++; break;
+        }
+      }
+#endif
+      if (max == NULL || cur_occ > max_occ) {
+        max = cur;
+        max_prev = prev;
+        max_ind = i;
+        max_occ = cur_occ;
+      }
+      prev = cur->next_addr();
+      cur = cur->next();
+    }
+  }
+#endif
+  // XXX
+  guarantee(max != NULL, "Since _n_fine_entries > 0");
+#if DRT_CENSUS
+  gclog_or_tty->print_cr("In a coarsening: histo of occs:");
+  for (int k = 0; k < HistoSize; k++) {
+    gclog_or_tty->print_cr("  <= %4d: %5d.", histo_limits[k], histo[k]);
+  }
+  coarsenings++;
+  occ_sum += max_occ;
+  if ((coarsenings % 100) == 0) {
+    gclog_or_tty->print_cr("\ncoarsenings = %d; global summary:", coarsenings);
+    for (int k = 0; k < HistoSize; k++) {
+      gclog_or_tty->print_cr("  <= %4d: %5d.", histo_limits[k], global_histo[k]);
+    }
+    gclog_or_tty->print_cr("Avg occ of deleted region = %6.2f.",
+                  (float)occ_sum/(float)coarsenings);
+  }
+#endif
+
+  // Set the corresponding coarse bit.
+  int max_hrs_index = max->hr()->hrs_index();
+  if (!_coarse_map.at(max_hrs_index)) {
+    _coarse_map.at_put(max_hrs_index, true);
+    _n_coarse_entries++;
+#if 0
+    gclog_or_tty->print("Coarsened entry in region [" PTR_FORMAT "...] "
+               "for region [" PTR_FORMAT "...] (%d coarse entries).\n",
+               hr()->bottom(),
+               max->hr()->bottom(),
+               _n_coarse_entries);
+#endif
+  }
+
+  // Unsplice.
+  *max_prev = max->next();
+  Atomic::inc(&_n_coarsenings);
+  _n_fine_entries--;
+  return max;
+}
+
+
+// At present, this must be called stop-world single-threaded.
+void OtherRegionsTable::scrub(CardTableModRefBS* ctbs,
+                              BitMap* region_bm, BitMap* card_bm) {
+  // First eliminated garbage regions from the coarse map.
+  if (G1RSScrubVerbose)
+    gclog_or_tty->print_cr("Scrubbing region %d:", hr()->hrs_index());
+
+  assert(_coarse_map.size() == region_bm->size(), "Precondition");
+  if (G1RSScrubVerbose)
+    gclog_or_tty->print("   Coarse map: before = %d...", _n_coarse_entries);
+  _coarse_map.set_intersection(*region_bm);
+  _n_coarse_entries = _coarse_map.count_one_bits();
+  if (G1RSScrubVerbose)
+    gclog_or_tty->print_cr("   after = %d.", _n_coarse_entries);
+
+  // Now do the fine-grained maps.
+  for (size_t i = 0; i < _max_fine_entries; i++) {
+    PosParPRT* cur = _fine_grain_regions[i];
+    PosParPRT** prev = &_fine_grain_regions[i];
+    while (cur != NULL) {
+      PosParPRT* nxt = cur->next();
+      // If the entire region is dead, eliminate.
+      if (G1RSScrubVerbose)
+        gclog_or_tty->print_cr("     For other region %d:", cur->hr()->hrs_index());
+      if (!region_bm->at(cur->hr()->hrs_index())) {
+        *prev = nxt;
+        cur->set_next(NULL);
+        _n_fine_entries--;
+        if (G1RSScrubVerbose)
+          gclog_or_tty->print_cr("          deleted via region map.");
+        PosParPRT::free(cur);
+      } else {
+        // Do fine-grain elimination.
+        if (G1RSScrubVerbose)
+          gclog_or_tty->print("          occ: before = %4d.", cur->occupied());
+        cur->scrub(ctbs, card_bm);
+        if (G1RSScrubVerbose)
+          gclog_or_tty->print_cr("          after = %4d.", cur->occupied());
+        // Did that empty the table completely?
+        if (cur->occupied() == 0) {
+          *prev = nxt;
+          cur->set_next(NULL);
+          _n_fine_entries--;
+          PosParPRT::free(cur);
+        } else {
+          prev = cur->next_addr();
+        }
+      }
+      cur = nxt;
+    }
+  }
+  // Since we may have deleted a from_card_cache entry from the RS, clear
+  // the FCC.
+  clear_fcc();
+}
+
+
+size_t OtherRegionsTable::occupied() const {
+  // Cast away const in this case.
+  MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag);
+  size_t sum = occ_fine();
+  sum += occ_sparse();
+  sum += occ_coarse();
+  return sum;
+}
+
+size_t OtherRegionsTable::occ_fine() const {
+  size_t sum = 0;
+  for (size_t i = 0; i < _max_fine_entries; i++) {
+    PosParPRT* cur = _fine_grain_regions[i];
+    while (cur != NULL) {
+      sum += cur->occupied();
+      cur = cur->next();
+    }
+  }
+  return sum;
+}
+
+size_t OtherRegionsTable::occ_coarse() const {
+  return (_n_coarse_entries * PosParPRT::CardsPerRegion);
+}
+
+size_t OtherRegionsTable::occ_sparse() const {
+  return _sparse_table.occupied();
+}
+
+size_t OtherRegionsTable::mem_size() const {
+  // Cast away const in this case.
+  MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag);
+  size_t sum = 0;
+  for (size_t i = 0; i < _max_fine_entries; i++) {
+    PosParPRT* cur = _fine_grain_regions[i];
+    while (cur != NULL) {
+      sum += cur->mem_size();
+      cur = cur->next();
+    }
+  }
+  sum += (sizeof(PosParPRT*) * _max_fine_entries);
+  sum += (_coarse_map.size_in_words() * HeapWordSize);
+  sum += (_sparse_table.mem_size());
+  sum += sizeof(*this) - sizeof(_sparse_table); // Avoid double counting above.
+  return sum;
+}
+
+size_t OtherRegionsTable::static_mem_size() {
+  return _from_card_cache_mem_size;
+}
+
+size_t OtherRegionsTable::fl_mem_size() {
+  return PerRegionTable::fl_mem_size() + PosParPRT::fl_mem_size();
+}
+
+void OtherRegionsTable::clear_fcc() {
+  for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
+    _from_card_cache[i][hr()->hrs_index()] = -1;
+  }
+}
+
+void OtherRegionsTable::clear() {
+  MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
+  for (size_t i = 0; i < _max_fine_entries; i++) {
+    PosParPRT* cur = _fine_grain_regions[i];
+    while (cur != NULL) {
+      PosParPRT* nxt = cur->next();
+      PosParPRT::free(cur);
+      cur = nxt;
+    }
+    _fine_grain_regions[i] = NULL;
+  }
+  _sparse_table.clear();
+  _coarse_map.clear();
+  _n_fine_entries = 0;
+  _n_coarse_entries = 0;
+
+  clear_fcc();
+}
+
+void OtherRegionsTable::clear_incoming_entry(HeapRegion* from_hr) {
+  MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
+  size_t hrs_ind = (size_t)from_hr->hrs_index();
+  size_t ind = hrs_ind & _mod_max_fine_entries_mask;
+  if (del_single_region_table(ind, from_hr)) {
+    assert(!_coarse_map.at(hrs_ind), "Inv");
+  } else {
+    _coarse_map.par_at_put(hrs_ind, 0);
+  }
+  // Check to see if any of the fcc entries come from here.
+  int hr_ind = hr()->hrs_index();
+  for (int tid = 0; tid < HeapRegionRemSet::num_par_rem_sets(); tid++) {
+    int fcc_ent = _from_card_cache[tid][hr_ind];
+    if (fcc_ent != -1) {
+      HeapWord* card_addr = (HeapWord*)
+        (uintptr_t(fcc_ent) << CardTableModRefBS::card_shift);
+      if (hr()->is_in_reserved(card_addr)) {
+        // Clear the from card cache.
+        _from_card_cache[tid][hr_ind] = -1;
+      }
+    }
+  }
+}
+
+bool OtherRegionsTable::del_single_region_table(size_t ind,
+                                                HeapRegion* hr) {
+  assert(0 <= ind && ind < _max_fine_entries, "Preconditions.");
+  PosParPRT** prev_addr = &_fine_grain_regions[ind];
+  PosParPRT* prt = *prev_addr;
+  while (prt != NULL && prt->hr() != hr) {
+    prev_addr = prt->next_addr();
+    prt = prt->next();
+  }
+  if (prt != NULL) {
+    assert(prt->hr() == hr, "Loop postcondition.");
+    *prev_addr = prt->next();
+    PosParPRT::free(prt);
+    _n_fine_entries--;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool OtherRegionsTable::contains_reference(oop* from) const {
+  // Cast away const in this case.
+  MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag);
+  return contains_reference_locked(from);
+}
+
+bool OtherRegionsTable::contains_reference_locked(oop* from) const {
+  HeapRegion* hr = _g1h->heap_region_containing_raw(from);
+  if (hr == NULL) return false;
+  size_t hr_ind = hr->hrs_index();
+  // Is this region in the coarse map?
+  if (_coarse_map.at(hr_ind)) return true;
+
+  PosParPRT* prt = find_region_table(hr_ind & _mod_max_fine_entries_mask,
+                                     hr);
+  if (prt != NULL) {
+    return prt->contains_reference(from);
+
+  } else {
+    uintptr_t from_card =
+      (uintptr_t(from) >> CardTableModRefBS::card_shift);
+    uintptr_t hr_bot_card_index =
+      uintptr_t(hr->bottom()) >> CardTableModRefBS::card_shift;
+    assert(from_card >= hr_bot_card_index, "Inv");
+    int card_index = from_card - hr_bot_card_index;
+    return _sparse_table.contains_card((short)hr_ind, card_index);
+  }
+
+
+}
+
+
+bool HeapRegionRemSet::_par_traversal = false;
+
+void HeapRegionRemSet::set_par_traversal(bool b) {
+  assert(_par_traversal != b, "Proper alternation...");
+  _par_traversal = b;
+}
+
+int HeapRegionRemSet::num_par_rem_sets() {
+  // We always have at least two, so that a mutator thread can claim an
+  // id and add to a rem set.
+  return (int) MAX2(ParallelGCThreads, (size_t)2);
+}
+
+HeapRegionRemSet::HeapRegionRemSet(G1BlockOffsetSharedArray* bosa,
+                                   HeapRegion* hr)
+    : _bosa(bosa), _other_regions(hr),
+      _outgoing_region_map(G1CollectedHeap::heap()->max_regions(),
+                           false /* in-resource-area */),
+      _iter_state(Unclaimed)
+{}
+
+
+void HeapRegionRemSet::init_for_par_iteration() {
+  _iter_state = Unclaimed;
+}
+
+bool HeapRegionRemSet::claim_iter() {
+  if (_iter_state != Unclaimed) return false;
+  jint res = Atomic::cmpxchg(Claimed, (jint*)(&_iter_state), Unclaimed);
+  return (res == Unclaimed);
+}
+
+void HeapRegionRemSet::set_iter_complete() {
+  _iter_state = Complete;
+}
+
+bool HeapRegionRemSet::iter_is_complete() {
+  return _iter_state == Complete;
+}
+
+
+void HeapRegionRemSet::init_iterator(HeapRegionRemSetIterator* iter) const {
+  iter->initialize(this);
+}
+
+#ifndef PRODUCT
+void HeapRegionRemSet::print() const {
+  HeapRegionRemSetIterator iter;
+  init_iterator(&iter);
+  size_t card_index;
+  while (iter.has_next(card_index)) {
+    HeapWord* card_start =
+      G1CollectedHeap::heap()->bot_shared()->address_for_index(card_index);
+    gclog_or_tty->print_cr("  Card " PTR_FORMAT ".", card_start);
+  }
+  // XXX
+  if (iter.n_yielded() != occupied()) {
+    gclog_or_tty->print_cr("Yielded disagrees with occupied:");
+    gclog_or_tty->print_cr("  %6d yielded (%6d coarse, %6d fine).",
+                  iter.n_yielded(),
+                  iter.n_yielded_coarse(), iter.n_yielded_fine());
+    gclog_or_tty->print_cr("  %6d occ     (%6d coarse, %6d fine).",
+                  occupied(), occ_coarse(), occ_fine());
+  }
+  guarantee(iter.n_yielded() == occupied(),
+            "We should have yielded all the represented cards.");
+}
+#endif
+
+void HeapRegionRemSet::cleanup() {
+  SparsePRT::cleanup_all();
+}
+
+void HeapRegionRemSet::par_cleanup() {
+  PosParPRT::par_contract_all();
+}
+
+void HeapRegionRemSet::add_outgoing_reference(HeapRegion* to_hr) {
+  _outgoing_region_map.par_at_put(to_hr->hrs_index(), 1);
+}
+
+void HeapRegionRemSet::clear() {
+  clear_outgoing_entries();
+  _outgoing_region_map.clear();
+  _other_regions.clear();
+  assert(occupied() == 0, "Should be clear.");
+}
+
+void HeapRegionRemSet::clear_outgoing_entries() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  size_t i = _outgoing_region_map.get_next_one_offset(0);
+  while (i < _outgoing_region_map.size()) {
+    HeapRegion* to_region = g1h->region_at(i);
+    to_region->rem_set()->clear_incoming_entry(hr());
+    i = _outgoing_region_map.get_next_one_offset(i+1);
+  }
+}
+
+
+void HeapRegionRemSet::scrub(CardTableModRefBS* ctbs,
+                             BitMap* region_bm, BitMap* card_bm) {
+  _other_regions.scrub(ctbs, region_bm, card_bm);
+}
+
+//-------------------- Iteration --------------------
+
+HeapRegionRemSetIterator::
+HeapRegionRemSetIterator() :
+  _hrrs(NULL),
+  _g1h(G1CollectedHeap::heap()),
+  _bosa(NULL),
+  _sparse_iter(size_t(G1CollectedHeap::heap()->reserved_region().start())
+               >> CardTableModRefBS::card_shift)
+{}
+
+void HeapRegionRemSetIterator::initialize(const HeapRegionRemSet* hrrs) {
+  _hrrs = hrrs;
+  _coarse_map = &_hrrs->_other_regions._coarse_map;
+  _fine_grain_regions = _hrrs->_other_regions._fine_grain_regions;
+  _bosa = _hrrs->bosa();
+
+  _is = Sparse;
+  // Set these values so that we increment to the first region.
+  _coarse_cur_region_index = -1;
+  _coarse_cur_region_cur_card = (PosParPRT::CardsPerRegion-1);;
+
+  _cur_region_cur_card = 0;
+
+  _fine_array_index = -1;
+  _fine_cur_prt = NULL;
+
+  _n_yielded_coarse = 0;
+  _n_yielded_fine = 0;
+  _n_yielded_sparse = 0;
+
+  _sparse_iter.init(&hrrs->_other_regions._sparse_table);
+}
+
+bool HeapRegionRemSetIterator::coarse_has_next(size_t& card_index) {
+  if (_hrrs->_other_regions._n_coarse_entries == 0) return false;
+  // Go to the next card.
+  _coarse_cur_region_cur_card++;
+  // Was the last the last card in the current region?
+  if (_coarse_cur_region_cur_card == PosParPRT::CardsPerRegion) {
+    // Yes: find the next region.  This may leave _coarse_cur_region_index
+    // Set to the last index, in which case there are no more coarse
+    // regions.
+    _coarse_cur_region_index =
+      (int) _coarse_map->get_next_one_offset(_coarse_cur_region_index + 1);
+    if ((size_t)_coarse_cur_region_index < _coarse_map->size()) {
+      _coarse_cur_region_cur_card = 0;
+      HeapWord* r_bot =
+        _g1h->region_at(_coarse_cur_region_index)->bottom();
+      _cur_region_card_offset = _bosa->index_for(r_bot);
+    } else {
+      return false;
+    }
+  }
+  // If we didn't return false above, then we can yield a card.
+  card_index = _cur_region_card_offset + _coarse_cur_region_cur_card;
+  return true;
+}
+
+void HeapRegionRemSetIterator::fine_find_next_non_null_prt() {
+  // Otherwise, find the next bucket list in the array.
+  _fine_array_index++;
+  while (_fine_array_index < (int) OtherRegionsTable::_max_fine_entries) {
+    _fine_cur_prt = _fine_grain_regions[_fine_array_index];
+    if (_fine_cur_prt != NULL) return;
+    else _fine_array_index++;
+  }
+  assert(_fine_cur_prt == NULL, "Loop post");
+}
+
+bool HeapRegionRemSetIterator::fine_has_next(size_t& card_index) {
+  if (fine_has_next()) {
+    _cur_region_cur_card =
+      _fine_cur_prt->_bm.get_next_one_offset(_cur_region_cur_card + 1);
+  }
+  while (!fine_has_next()) {
+    if (_cur_region_cur_card == PosParPRT::CardsPerRegion) {
+      _cur_region_cur_card = 0;
+      _fine_cur_prt = _fine_cur_prt->next();
+    }
+    if (_fine_cur_prt == NULL) {
+      fine_find_next_non_null_prt();
+      if (_fine_cur_prt == NULL) return false;
+    }
+    assert(_fine_cur_prt != NULL && _cur_region_cur_card == 0,
+           "inv.");
+    HeapWord* r_bot =
+      _fine_cur_prt->hr()->bottom();
+    _cur_region_card_offset = _bosa->index_for(r_bot);
+    _cur_region_cur_card = _fine_cur_prt->_bm.get_next_one_offset(0);
+  }
+  assert(fine_has_next(), "Or else we exited the loop via the return.");
+  card_index = _cur_region_card_offset + _cur_region_cur_card;
+  return true;
+}
+
+bool HeapRegionRemSetIterator::fine_has_next() {
+  return
+    _fine_cur_prt != NULL &&
+    _cur_region_cur_card < PosParPRT::CardsPerRegion;
+}
+
+bool HeapRegionRemSetIterator::has_next(size_t& card_index) {
+  switch (_is) {
+  case Sparse:
+    if (_sparse_iter.has_next(card_index)) {
+      _n_yielded_sparse++;
+      return true;
+    }
+    // Otherwise, deliberate fall-through
+    _is = Fine;
+  case Fine:
+    if (fine_has_next(card_index)) {
+      _n_yielded_fine++;
+      return true;
+    }
+    // Otherwise, deliberate fall-through
+    _is = Coarse;
+  case Coarse:
+    if (coarse_has_next(card_index)) {
+      _n_yielded_coarse++;
+      return true;
+    }
+    // Otherwise...
+    break;
+  }
+  assert(ParallelGCThreads > 1 ||
+         n_yielded() == _hrrs->occupied(),
+         "Should have yielded all the cards in the rem set "
+         "(in the non-par case).");
+  return false;
+}
+
+
+
+oop**        HeapRegionRemSet::_recorded_oops = NULL;
+HeapWord**   HeapRegionRemSet::_recorded_cards = NULL;
+HeapRegion** HeapRegionRemSet::_recorded_regions = NULL;
+int          HeapRegionRemSet::_n_recorded = 0;
+
+HeapRegionRemSet::Event* HeapRegionRemSet::_recorded_events = NULL;
+int*         HeapRegionRemSet::_recorded_event_index = NULL;
+int          HeapRegionRemSet::_n_recorded_events = 0;
+
+void HeapRegionRemSet::record(HeapRegion* hr, oop* f) {
+  if (_recorded_oops == NULL) {
+    assert(_n_recorded == 0
+           && _recorded_cards == NULL
+           && _recorded_regions == NULL,
+           "Inv");
+    _recorded_oops = NEW_C_HEAP_ARRAY(oop*, MaxRecorded);
+    _recorded_cards = NEW_C_HEAP_ARRAY(HeapWord*, MaxRecorded);
+    _recorded_regions = NEW_C_HEAP_ARRAY(HeapRegion*, MaxRecorded);
+  }
+  if (_n_recorded == MaxRecorded) {
+    gclog_or_tty->print_cr("Filled up 'recorded' (%d).", MaxRecorded);
+  } else {
+    _recorded_cards[_n_recorded] =
+      (HeapWord*)align_size_down(uintptr_t(f),
+                                 CardTableModRefBS::card_size);
+    _recorded_oops[_n_recorded] = f;
+    _recorded_regions[_n_recorded] = hr;
+    _n_recorded++;
+  }
+}
+
+void HeapRegionRemSet::record_event(Event evnt) {
+  if (!G1RecordHRRSEvents) return;
+
+  if (_recorded_events == NULL) {
+    assert(_n_recorded_events == 0
+           && _recorded_event_index == NULL,
+           "Inv");
+    _recorded_events = NEW_C_HEAP_ARRAY(Event, MaxRecordedEvents);
+    _recorded_event_index = NEW_C_HEAP_ARRAY(int, MaxRecordedEvents);
+  }
+  if (_n_recorded_events == MaxRecordedEvents) {
+    gclog_or_tty->print_cr("Filled up 'recorded_events' (%d).", MaxRecordedEvents);
+  } else {
+    _recorded_events[_n_recorded_events] = evnt;
+    _recorded_event_index[_n_recorded_events] = _n_recorded;
+    _n_recorded_events++;
+  }
+}
+
+void HeapRegionRemSet::print_event(outputStream* str, Event evnt) {
+  switch (evnt) {
+  case Event_EvacStart:
+    str->print("Evac Start");
+    break;
+  case Event_EvacEnd:
+    str->print("Evac End");
+    break;
+  case Event_RSUpdateEnd:
+    str->print("RS Update End");
+    break;
+  }
+}
+
+void HeapRegionRemSet::print_recorded() {
+  int cur_evnt = 0;
+  Event cur_evnt_kind;
+  int cur_evnt_ind = 0;
+  if (_n_recorded_events > 0) {
+    cur_evnt_kind = _recorded_events[cur_evnt];
+    cur_evnt_ind = _recorded_event_index[cur_evnt];
+  }
+
+  for (int i = 0; i < _n_recorded; i++) {
+    while (cur_evnt < _n_recorded_events && i == cur_evnt_ind) {
+      gclog_or_tty->print("Event: ");
+      print_event(gclog_or_tty, cur_evnt_kind);
+      gclog_or_tty->print_cr("");
+      cur_evnt++;
+      if (cur_evnt < MaxRecordedEvents) {
+        cur_evnt_kind = _recorded_events[cur_evnt];
+        cur_evnt_ind = _recorded_event_index[cur_evnt];
+      }
+    }
+    gclog_or_tty->print("Added card " PTR_FORMAT " to region [" PTR_FORMAT "...]"
+                        " for ref " PTR_FORMAT ".\n",
+                        _recorded_cards[i], _recorded_regions[i]->bottom(),
+                        _recorded_oops[i]);
+  }
+}
+
+#ifndef PRODUCT
+void HeapRegionRemSet::test() {
+  os::sleep(Thread::current(), (jlong)5000, false);
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  // Run with "-XX:G1LogRSRegionEntries=2", so that 1 and 5 end up in same
+  // hash bucket.
+  HeapRegion* hr0 = g1h->region_at(0);
+  HeapRegion* hr1 = g1h->region_at(1);
+  HeapRegion* hr2 = g1h->region_at(5);
+  HeapRegion* hr3 = g1h->region_at(6);
+  HeapRegion* hr4 = g1h->region_at(7);
+  HeapRegion* hr5 = g1h->region_at(8);
+
+  HeapWord* hr1_start = hr1->bottom();
+  HeapWord* hr1_mid = hr1_start + HeapRegion::GrainWords/2;
+  HeapWord* hr1_last = hr1->end() - 1;
+
+  HeapWord* hr2_start = hr2->bottom();
+  HeapWord* hr2_mid = hr2_start + HeapRegion::GrainWords/2;
+  HeapWord* hr2_last = hr2->end() - 1;
+
+  HeapWord* hr3_start = hr3->bottom();
+  HeapWord* hr3_mid = hr3_start + HeapRegion::GrainWords/2;
+  HeapWord* hr3_last = hr3->end() - 1;
+
+  HeapRegionRemSet* hrrs = hr0->rem_set();
+
+  // Make three references from region 0x101...
+  hrrs->add_reference((oop*)hr1_start);
+  hrrs->add_reference((oop*)hr1_mid);
+  hrrs->add_reference((oop*)hr1_last);
+
+  hrrs->add_reference((oop*)hr2_start);
+  hrrs->add_reference((oop*)hr2_mid);
+  hrrs->add_reference((oop*)hr2_last);
+
+  hrrs->add_reference((oop*)hr3_start);
+  hrrs->add_reference((oop*)hr3_mid);
+  hrrs->add_reference((oop*)hr3_last);
+
+  // Now cause a coarsening.
+  hrrs->add_reference((oop*)hr4->bottom());
+  hrrs->add_reference((oop*)hr5->bottom());
+
+  // Now, does iteration yield these three?
+  HeapRegionRemSetIterator iter;
+  hrrs->init_iterator(&iter);
+  size_t sum = 0;
+  size_t card_index;
+  while (iter.has_next(card_index)) {
+    HeapWord* card_start =
+      G1CollectedHeap::heap()->bot_shared()->address_for_index(card_index);
+    gclog_or_tty->print_cr("  Card " PTR_FORMAT ".", card_start);
+    sum++;
+  }
+  guarantee(sum == 11 - 3 + 2048, "Failure");
+  guarantee(sum == hrrs->occupied(), "Failure");
+}
+#endif
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,470 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Remembered set for a heap region.  Represent a set of "cards" that
+// contain pointers into the owner heap region.  Cards are defined somewhat
+// abstractly, in terms of what the "BlockOffsetTable" in use can parse.
+
+class G1CollectedHeap;
+class G1BlockOffsetSharedArray;
+class HeapRegion;
+class HeapRegionRemSetIterator;
+class PosParPRT;
+class SparsePRT;
+
+
+// The "_coarse_map" is a bitmap with one bit for each region, where set
+// bits indicate that the corresponding region may contain some pointer
+// into the owning region.
+
+// The "_fine_grain_entries" array is an open hash table of PerRegionTables
+// (PRTs), indicating regions for which we're keeping the RS as a set of
+// cards.  The strategy is to cap the size of the fine-grain table,
+// deleting an entry and setting the corresponding coarse-grained bit when
+// we would overflow this cap.
+
+// We use a mixture of locking and lock-free techniques here.  We allow
+// threads to locate PRTs without locking, but threads attempting to alter
+// a bucket list obtain a lock.  This means that any failing attempt to
+// find a PRT must be retried with the lock.  It might seem dangerous that
+// a read can find a PRT that is concurrently deleted.  This is all right,
+// because:
+//
+//   1) We only actually free PRT's at safe points (though we reuse them at
+//      other times).
+//   2) We find PRT's in an attempt to add entries.  If a PRT is deleted,
+//      it's _coarse_map bit is set, so the that we were attempting to add
+//      is represented.  If a deleted PRT is re-used, a thread adding a bit,
+//      thinking the PRT is for a different region, does no harm.
+
+class OtherRegionsTable: public CHeapObj {
+  friend class HeapRegionRemSetIterator;
+
+  G1CollectedHeap* _g1h;
+  Mutex            _m;
+  HeapRegion*      _hr;
+
+  // These are protected by "_m".
+  BitMap      _coarse_map;
+  size_t      _n_coarse_entries;
+  static jint _n_coarsenings;
+
+  PosParPRT** _fine_grain_regions;
+  size_t      _n_fine_entries;
+
+#define SAMPLE_FOR_EVICTION 1
+#if SAMPLE_FOR_EVICTION
+  size_t        _fine_eviction_start;
+  static size_t _fine_eviction_stride;
+  static size_t _fine_eviction_sample_size;
+#endif
+
+  SparsePRT   _sparse_table;
+
+  // These are static after init.
+  static size_t _max_fine_entries;
+  static size_t _mod_max_fine_entries_mask;
+
+  // Requires "prt" to be the first element of the bucket list appropriate
+  // for "hr".  If this list contains an entry for "hr", return it,
+  // otherwise return "NULL".
+  PosParPRT* find_region_table(size_t ind, HeapRegion* hr) const;
+
+  // Find, delete, and return a candidate PosParPRT, if any exists,
+  // adding the deleted region to the coarse bitmap.  Requires the caller
+  // to hold _m, and the fine-grain table to be full.
+  PosParPRT* delete_region_table();
+
+  // If a PRT for "hr" is in the bucket list indicated by "ind" (which must
+  // be the correct index for "hr"), delete it and return true; else return
+  // false.
+  bool del_single_region_table(size_t ind, HeapRegion* hr);
+
+  static jint _cache_probes;
+  static jint _cache_hits;
+
+  // Indexed by thread X heap region, to minimize thread contention.
+  static int** _from_card_cache;
+  static size_t _from_card_cache_max_regions;
+  static size_t _from_card_cache_mem_size;
+
+public:
+  OtherRegionsTable(HeapRegion* hr);
+
+  HeapRegion* hr() const { return _hr; }
+
+  // For now.  Could "expand" some tables in the future, so that this made
+  // sense.
+  void add_reference(oop* from, int tid);
+
+  void add_reference(oop* from) {
+    return add_reference(from, 0);
+  }
+
+  // Removes any entries shown by the given bitmaps to contain only dead
+  // objects.
+  void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm);
+
+  // Not const because it takes a lock.
+  size_t occupied() const;
+  size_t occ_fine() const;
+  size_t occ_coarse() const;
+  size_t occ_sparse() const;
+
+  static jint n_coarsenings() { return _n_coarsenings; }
+
+  // Returns size in bytes.
+  // Not const because it takes a lock.
+  size_t mem_size() const;
+  static size_t static_mem_size();
+  static size_t fl_mem_size();
+
+  bool contains_reference(oop* from) const;
+  bool contains_reference_locked(oop* from) const;
+
+  void clear();
+
+  // Specifically clear the from_card_cache.
+  void clear_fcc();
+
+  // "from_hr" is being cleared; remove any entries from it.
+  void clear_incoming_entry(HeapRegion* from_hr);
+
+  // Declare the heap size (in # of regions) to the OtherRegionsTable.
+  // (Uses it to initialize from_card_cache).
+  static void init_from_card_cache(size_t max_regions);
+
+  // Declares that only regions i s.t. 0 <= i < new_n_regs are in use.
+  // Make sure any entries for higher regions are invalid.
+  static void shrink_from_card_cache(size_t new_n_regs);
+
+  static void print_from_card_cache();
+
+};
+
+
+class HeapRegionRemSet : public CHeapObj {
+  friend class VMStructs;
+  friend class HeapRegionRemSetIterator;
+
+public:
+  enum Event {
+    Event_EvacStart, Event_EvacEnd, Event_RSUpdateEnd
+  };
+
+private:
+  G1BlockOffsetSharedArray* _bosa;
+  G1BlockOffsetSharedArray* bosa() const { return _bosa; }
+
+  static bool _par_traversal;
+
+  OtherRegionsTable _other_regions;
+
+  // One set bit for every region that has an entry for this one.
+  BitMap _outgoing_region_map;
+
+  // Clear entries for the current region in any rem sets named in
+  // the _outgoing_region_map.
+  void clear_outgoing_entries();
+
+#if MAYBE
+  // Audit the given card index.
+  void audit_card(size_t card_num, HeapRegion* hr, u2* rc_arr,
+                  HeapRegionRemSet* empty_cards, size_t* one_obj_cards);
+
+  // Assumes that "audit_stage1" has been called for "hr", to set up
+  // "shadow" and "new_rs" appropriately.  Identifies individual popular
+  // objects; returns "true" if any are found.
+  bool audit_find_pop(HeapRegion* hr, u2* rc_arr);
+
+  // Assumes that "audit_stage1" has been called for "hr", to set up
+  // "shadow" and "new_rs" appropriately.  Identifies individual popular
+  // objects, and determines the number of entries in "new_rs" if any such
+  // popular objects are ignored.  If this is sufficiently small, returns
+  // "false" to indicate that a constraint should not be introduced.
+  // Otherwise, returns "true" to indicate that we should go ahead with
+  // adding the constraint.
+  bool audit_stag(HeapRegion* hr, u2* rc_arr);
+
+
+  u2* alloc_rc_array();
+
+  SeqHeapRegionRemSet* audit_post(u2* rc_arr, size_t multi_obj_crds,
+                                  SeqHeapRegionRemSet* empty_cards);
+#endif
+
+  enum ParIterState { Unclaimed, Claimed, Complete };
+  ParIterState _iter_state;
+
+  // Unused unless G1RecordHRRSOops is true.
+
+  static const int MaxRecorded = 1000000;
+  static oop**        _recorded_oops;
+  static HeapWord**   _recorded_cards;
+  static HeapRegion** _recorded_regions;
+  static int          _n_recorded;
+
+  static const int MaxRecordedEvents = 1000;
+  static Event*       _recorded_events;
+  static int*         _recorded_event_index;
+  static int          _n_recorded_events;
+
+  static void print_event(outputStream* str, Event evnt);
+
+public:
+  HeapRegionRemSet(G1BlockOffsetSharedArray* bosa,
+                   HeapRegion* hr);
+
+  static int num_par_rem_sets();
+  static bool par_traversal() { return _par_traversal; }
+  static void set_par_traversal(bool b);
+
+  HeapRegion* hr() const {
+    return _other_regions.hr();
+  }
+
+  size_t occupied() const {
+    return _other_regions.occupied();
+  }
+  size_t occ_fine() const {
+    return _other_regions.occ_fine();
+  }
+  size_t occ_coarse() const {
+    return _other_regions.occ_coarse();
+  }
+  size_t occ_sparse() const {
+    return _other_regions.occ_sparse();
+  }
+
+  static jint n_coarsenings() { return OtherRegionsTable::n_coarsenings(); }
+
+  /* Used in the sequential case.  Returns "true" iff this addition causes
+     the size limit to be reached. */
+  bool add_reference(oop* from) {
+    _other_regions.add_reference(from);
+    return false;
+  }
+
+  /* Used in the parallel case.  Returns "true" iff this addition causes
+     the size limit to be reached. */
+  bool add_reference(oop* from, int tid) {
+    _other_regions.add_reference(from, tid);
+    return false;
+  }
+
+  // Records the fact that the current region contains an outgoing
+  // reference into "to_hr".
+  void add_outgoing_reference(HeapRegion* to_hr);
+
+  // Removes any entries shown by the given bitmaps to contain only dead
+  // objects.
+  void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm);
+
+  // The region is being reclaimed; clear its remset, and any mention of
+  // entries for this region in other remsets.
+  void clear();
+
+  // Forget any entries due to pointers from "from_hr".
+  void clear_incoming_entry(HeapRegion* from_hr) {
+    _other_regions.clear_incoming_entry(from_hr);
+  }
+
+#if 0
+  virtual void cleanup() = 0;
+#endif
+
+  // Should be called from single-threaded code.
+  void init_for_par_iteration();
+  // Attempt to claim the region.  Returns true iff this call caused an
+  // atomic transition from Unclaimed to Claimed.
+  bool claim_iter();
+  // Sets the iteration state to "complete".
+  void set_iter_complete();
+  // Returns "true" iff the region's iteration is complete.
+  bool iter_is_complete();
+
+  // Initialize the given iterator to iterate over this rem set.
+  void init_iterator(HeapRegionRemSetIterator* iter) const;
+
+#if 0
+  // Apply the "do_card" method to the start address of every card in the
+  // rem set.  Returns false if some application of the closure aborted.
+  virtual bool card_iterate(CardClosure* iter) = 0;
+#endif
+
+  // The actual # of bytes this hr_remset takes up.
+  size_t mem_size() {
+    return _other_regions.mem_size()
+      // This correction is necessary because the above includes the second
+      // part.
+      + sizeof(this) - sizeof(OtherRegionsTable);
+  }
+
+  // Returns the memory occupancy of all static data structures associated
+  // with remembered sets.
+  static size_t static_mem_size() {
+    return OtherRegionsTable::static_mem_size();
+  }
+
+  // Returns the memory occupancy of all free_list data structures associated
+  // with remembered sets.
+  static size_t fl_mem_size() {
+    return OtherRegionsTable::fl_mem_size();
+  }
+
+  bool contains_reference(oop* from) const {
+    return _other_regions.contains_reference(from);
+  }
+  void print() const;
+
+#if MAYBE
+  // We are about to introduce a constraint, requiring the collection time
+  // of the region owning this RS to be <= "hr", and forgetting pointers
+  // from the owning region to "hr."  Before doing so, examines this rem
+  // set for pointers to "hr", possibly identifying some popular objects.,
+  // and possibly finding some cards to no longer contain pointers to "hr",
+  //
+  // These steps may prevent the the constraint from being necessary; in
+  // which case returns a set of cards now thought to contain no pointers
+  // into HR.  In the normal (I assume) case, returns NULL, indicating that
+  // we should go ahead and add the constraint.
+  virtual SeqHeapRegionRemSet* audit(HeapRegion* hr) = 0;
+#endif
+
+  // Called during a stop-world phase to perform any deferred cleanups.
+  // The second version may be called by parallel threads after then finish
+  // collection work.
+  static void cleanup();
+  static void par_cleanup();
+
+  // Declare the heap size (in # of regions) to the HeapRegionRemSet(s).
+  // (Uses it to initialize from_card_cache).
+  static void init_heap(size_t max_regions) {
+    OtherRegionsTable::init_from_card_cache(max_regions);
+  }
+
+  // Declares that only regions i s.t. 0 <= i < new_n_regs are in use.
+  static void shrink_heap(size_t new_n_regs) {
+    OtherRegionsTable::shrink_from_card_cache(new_n_regs);
+  }
+
+#ifndef PRODUCT
+  static void print_from_card_cache() {
+    OtherRegionsTable::print_from_card_cache();
+  }
+#endif
+
+  static void record(HeapRegion* hr, oop* f);
+  static void print_recorded();
+  static void record_event(Event evnt);
+
+  // Run unit tests.
+#ifndef PRODUCT
+  static void test();
+#endif
+
+};
+
+class HeapRegionRemSetIterator : public CHeapObj {
+
+  // The region over which we're iterating.
+  const HeapRegionRemSet* _hrrs;
+
+  // Local caching of HRRS fields.
+  const BitMap*             _coarse_map;
+  PosParPRT**               _fine_grain_regions;
+
+  G1BlockOffsetSharedArray* _bosa;
+  G1CollectedHeap*          _g1h;
+
+  // The number yielded since initialization.
+  size_t _n_yielded_fine;
+  size_t _n_yielded_coarse;
+  size_t _n_yielded_sparse;
+
+  // If true we're iterating over the coarse table; if false the fine
+  // table.
+  enum IterState {
+    Sparse,
+    Fine,
+    Coarse
+  };
+  IterState _is;
+
+  // In both kinds of iteration, heap offset of first card of current
+  // region.
+  size_t _cur_region_card_offset;
+  // Card offset within cur region.
+  size_t _cur_region_cur_card;
+
+  // Coarse table iteration fields:
+
+  // Current region index;
+  int _coarse_cur_region_index;
+  int _coarse_cur_region_cur_card;
+
+  bool coarse_has_next(size_t& card_index);
+
+  // Fine table iteration fields:
+
+  // Index of bucket-list we're working on.
+  int _fine_array_index;
+  // Per Region Table we're doing within current bucket list.
+  PosParPRT* _fine_cur_prt;
+
+  /* SparsePRT::*/ SparsePRTIter _sparse_iter;
+
+  void fine_find_next_non_null_prt();
+
+  bool fine_has_next();
+  bool fine_has_next(size_t& card_index);
+
+public:
+  // We require an iterator to be initialized before use, so the
+  // constructor does little.
+  HeapRegionRemSetIterator();
+
+  void initialize(const HeapRegionRemSet* hrrs);
+
+  // If there remains one or more cards to be yielded, returns true and
+  // sets "card_index" to one of those cards (which is then considered
+  // yielded.)   Otherwise, returns false (and leaves "card_index"
+  // undefined.)
+  bool has_next(size_t& card_index);
+
+  size_t n_yielded_fine() { return _n_yielded_fine; }
+  size_t n_yielded_coarse() { return _n_yielded_coarse; }
+  size_t n_yielded_sparse() { return _n_yielded_sparse; }
+  size_t n_yielded() {
+    return n_yielded_fine() + n_yielded_coarse() + n_yielded_sparse();
+  }
+};
+
+#if 0
+class CardClosure: public Closure {
+public:
+  virtual void do_card(HeapWord* card_start) = 0;
+};
+
+#endif
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/heapRegionSeq.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,344 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_heapRegionSeq.cpp.incl"
+
+// Local to this file.
+
+static int orderRegions(HeapRegion** hr1p, HeapRegion** hr2p) {
+  if ((*hr1p)->end() <= (*hr2p)->bottom()) return -1;
+  else if ((*hr2p)->end() <= (*hr1p)->bottom()) return 1;
+  else if (*hr1p == *hr2p) return 0;
+  else {
+    assert(false, "We should never compare distinct overlapping regions.");
+  }
+  return 0;
+}
+
+HeapRegionSeq::HeapRegionSeq() :
+  _alloc_search_start(0),
+  // The line below is the worst bit of C++ hackery I've ever written
+  // (Detlefs, 11/23).  You should think of it as equivalent to
+  // "_regions(100, true)": initialize the growable array and inform it
+  // that it should allocate its elem array(s) on the C heap.  The first
+  // argument, however, is actually a comma expression (new-expr, 100).
+  // The purpose of the new_expr is to inform the growable array that it
+  // is *already* allocated on the C heap: it uses the placement syntax to
+  // keep it from actually doing any allocation.
+  _regions((ResourceObj::operator new (sizeof(GrowableArray<HeapRegion*>),
+                                       (void*)&_regions,
+                                       ResourceObj::C_HEAP),
+            100),
+           true),
+  _next_rr_candidate(0),
+  _seq_bottom(NULL)
+{}
+
+// Private methods.
+
+HeapWord*
+HeapRegionSeq::alloc_obj_from_region_index(int ind, size_t word_size) {
+  assert(G1CollectedHeap::isHumongous(word_size),
+         "Allocation size should be humongous");
+  int cur = ind;
+  int first = cur;
+  size_t sumSizes = 0;
+  while (cur < _regions.length() && sumSizes < word_size) {
+    // Loop invariant:
+    //  For all i in [first, cur):
+    //       _regions.at(i)->is_empty()
+    //    && _regions.at(i) is contiguous with its predecessor, if any
+    //  && sumSizes is the sum of the sizes of the regions in the interval
+    //       [first, cur)
+    HeapRegion* curhr = _regions.at(cur);
+    if (curhr->is_empty()
+        && !curhr->is_reserved()
+        && (first == cur
+            || (_regions.at(cur-1)->end() ==
+                curhr->bottom()))) {
+      sumSizes += curhr->capacity() / HeapWordSize;
+    } else {
+      first = cur + 1;
+      sumSizes = 0;
+    }
+    cur++;
+  }
+  if (sumSizes >= word_size) {
+    _alloc_search_start = cur;
+    // Mark the allocated regions as allocated.
+    bool zf = G1CollectedHeap::heap()->allocs_are_zero_filled();
+    HeapRegion* first_hr = _regions.at(first);
+    for (int i = first; i < cur; i++) {
+      HeapRegion* hr = _regions.at(i);
+      if (zf)
+        hr->ensure_zero_filled();
+      {
+        MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+        hr->set_zero_fill_allocated();
+      }
+      size_t sz = hr->capacity() / HeapWordSize;
+      HeapWord* tmp = hr->allocate(sz);
+      assert(tmp != NULL, "Humongous allocation failure");
+      MemRegion mr = MemRegion(tmp, sz);
+      SharedHeap::fill_region_with_object(mr);
+      hr->declare_filled_region_to_BOT(mr);
+      if (i == first) {
+        first_hr->set_startsHumongous();
+      } else {
+        assert(i > first, "sanity");
+        hr->set_continuesHumongous(first_hr);
+      }
+    }
+    HeapWord* first_hr_bot = first_hr->bottom();
+    HeapWord* obj_end = first_hr_bot + word_size;
+    first_hr->set_top(obj_end);
+    return first_hr_bot;
+  } else {
+    // If we started from the beginning, we want to know why we can't alloc.
+    return NULL;
+  }
+}
+
+void HeapRegionSeq::print_empty_runs(bool reserved_are_empty) {
+  int empty_run = 0;
+  int n_empty = 0;
+  bool at_least_one_reserved = false;
+  int empty_run_start;
+  for (int i = 0; i < _regions.length(); i++) {
+    HeapRegion* r = _regions.at(i);
+    if (r->continuesHumongous()) continue;
+    if (r->is_empty() && (reserved_are_empty || !r->is_reserved())) {
+      assert(!r->isHumongous(), "H regions should not be empty.");
+      if (empty_run == 0) empty_run_start = i;
+      empty_run++;
+      n_empty++;
+      if (r->is_reserved()) {
+        at_least_one_reserved = true;
+      }
+    } else {
+      if (empty_run > 0) {
+        gclog_or_tty->print("  %d:%d", empty_run_start, empty_run);
+        if (reserved_are_empty && at_least_one_reserved)
+          gclog_or_tty->print("(R)");
+        empty_run = 0;
+        at_least_one_reserved = false;
+      }
+    }
+  }
+  if (empty_run > 0) {
+    gclog_or_tty->print(" %d:%d", empty_run_start, empty_run);
+    if (reserved_are_empty && at_least_one_reserved) gclog_or_tty->print("(R)");
+  }
+  gclog_or_tty->print_cr(" [tot = %d]", n_empty);
+}
+
+int HeapRegionSeq::find(HeapRegion* hr) {
+  // FIXME: optimized for adjacent regions of fixed size.
+  int ind = hr->hrs_index();
+  if (ind != -1) {
+    assert(_regions.at(ind) == hr, "Mismatch");
+  }
+  return ind;
+}
+
+
+// Public methods.
+
+void HeapRegionSeq::insert(HeapRegion* hr) {
+  if (_regions.length() == 0
+      || _regions.top()->end() <= hr->bottom()) {
+    hr->set_hrs_index(_regions.length());
+    _regions.append(hr);
+  } else {
+    _regions.append(hr);
+    _regions.sort(orderRegions);
+    for (int i = 0; i < _regions.length(); i++) {
+      _regions.at(i)->set_hrs_index(i);
+    }
+  }
+  char* bot = (char*)_regions.at(0)->bottom();
+  if (_seq_bottom == NULL || bot < _seq_bottom) _seq_bottom = bot;
+}
+
+size_t HeapRegionSeq::length() {
+  return _regions.length();
+}
+
+size_t HeapRegionSeq::free_suffix() {
+  size_t res = 0;
+  int first = _regions.length() - 1;
+  int cur = first;
+  while (cur >= 0 &&
+         (_regions.at(cur)->is_empty()
+          && !_regions.at(cur)->is_reserved()
+          && (first == cur
+              || (_regions.at(cur+1)->bottom() ==
+                  _regions.at(cur)->end())))) {
+      res++;
+      cur--;
+  }
+  return res;
+}
+
+HeapWord* HeapRegionSeq::obj_allocate(size_t word_size) {
+  int cur = _alloc_search_start;
+  // Make sure "cur" is a valid index.
+  assert(cur >= 0, "Invariant.");
+  HeapWord* res = alloc_obj_from_region_index(cur, word_size);
+  if (res == NULL)
+    res = alloc_obj_from_region_index(0, word_size);
+  return res;
+}
+
+void HeapRegionSeq::iterate(HeapRegionClosure* blk) {
+  iterate_from((HeapRegion*)NULL, blk);
+}
+
+// The first argument r is the heap region at which iteration begins.
+// This operation runs fastest when r is NULL, or the heap region for
+// which a HeapRegionClosure most recently returned true, or the
+// heap region immediately to its right in the sequence.  In all
+// other cases a linear search is required to find the index of r.
+
+void HeapRegionSeq::iterate_from(HeapRegion* r, HeapRegionClosure* blk) {
+
+  // :::: FIXME ::::
+  // Static cache value is bad, especially when we start doing parallel
+  // remembered set update. For now just don't cache anything (the
+  // code in the def'd out blocks).
+
+#if 0
+  static int cached_j = 0;
+#endif
+  int len = _regions.length();
+  int j = 0;
+  // Find the index of r.
+  if (r != NULL) {
+#if 0
+    assert(cached_j >= 0, "Invariant.");
+    if ((cached_j < len) && (r == _regions.at(cached_j))) {
+      j = cached_j;
+    } else if ((cached_j + 1 < len) && (r == _regions.at(cached_j + 1))) {
+      j = cached_j + 1;
+    } else {
+      j = find(r);
+#endif
+      if (j < 0) {
+        j = 0;
+      }
+#if 0
+    }
+#endif
+  }
+  int i;
+  for (i = j; i < len; i += 1) {
+    int res = blk->doHeapRegion(_regions.at(i));
+    if (res) {
+#if 0
+      cached_j = i;
+#endif
+      blk->incomplete();
+      return;
+    }
+  }
+  for (i = 0; i < j; i += 1) {
+    int res = blk->doHeapRegion(_regions.at(i));
+    if (res) {
+#if 0
+      cached_j = i;
+#endif
+      blk->incomplete();
+      return;
+    }
+  }
+}
+
+void HeapRegionSeq::iterate_from(int idx, HeapRegionClosure* blk) {
+  int len = _regions.length();
+  int i;
+  for (i = idx; i < len; i++) {
+    if (blk->doHeapRegion(_regions.at(i))) {
+      blk->incomplete();
+      return;
+    }
+  }
+  for (i = 0; i < idx; i++) {
+    if (blk->doHeapRegion(_regions.at(i))) {
+      blk->incomplete();
+      return;
+    }
+  }
+}
+
+MemRegion HeapRegionSeq::shrink_by(size_t shrink_bytes,
+                                   size_t& num_regions_deleted) {
+  assert(shrink_bytes % os::vm_page_size() == 0, "unaligned");
+  assert(shrink_bytes % HeapRegion::GrainBytes == 0, "unaligned");
+
+  if (_regions.length() == 0) {
+    num_regions_deleted = 0;
+    return MemRegion();
+  }
+  int j = _regions.length() - 1;
+  HeapWord* end = _regions.at(j)->end();
+  HeapWord* last_start = end;
+  while (j >= 0 && shrink_bytes > 0) {
+    HeapRegion* cur = _regions.at(j);
+    // We have to leave humongous regions where they are,
+    // and work around them.
+    if (cur->isHumongous()) {
+      return MemRegion(last_start, end);
+    }
+    cur->reset_zero_fill();
+    assert(cur == _regions.top(), "Should be top");
+    if (!cur->is_empty()) break;
+    shrink_bytes -= cur->capacity();
+    num_regions_deleted++;
+    _regions.pop();
+    last_start = cur->bottom();
+    // We need to delete these somehow, but can't currently do so here: if
+    // we do, the ZF thread may still access the deleted region.  We'll
+    // leave this here as a reminder that we have to do something about
+    // this.
+    // delete cur;
+    j--;
+  }
+  return MemRegion(last_start, end);
+}
+
+
+class PrintHeapRegionClosure : public  HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion* r) {
+    gclog_or_tty->print(PTR_FORMAT ":", r);
+    r->print();
+    return false;
+  }
+};
+
+void HeapRegionSeq::print() {
+  PrintHeapRegionClosure cl;
+  iterate(&cl);
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/heapRegionSeq.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/heapRegionSeq.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class HeapRegion;
+class HeapRegionClosure;
+
+class HeapRegionSeq: public CHeapObj {
+
+  // _regions is kept sorted by start address order, and no two regions are
+  // overlapping.
+  GrowableArray<HeapRegion*> _regions;
+
+  // The index in "_regions" at which to start the next allocation search.
+  // (For efficiency only; private to obj_allocate after initialization.)
+  int _alloc_search_start;
+
+  // Attempts to allocate a block of the (assumed humongous) word_size,
+  // starting at the region "ind".
+  HeapWord* alloc_obj_from_region_index(int ind, size_t word_size);
+
+  // Currently, we're choosing collection sets in a round-robin fashion,
+  // starting here.
+  int _next_rr_candidate;
+
+  // The bottom address of the bottom-most region, or else NULL if there
+  // are no regions in the sequence.
+  char* _seq_bottom;
+
+ public:
+  // Initializes "this" to the empty sequence of regions.
+  HeapRegionSeq();
+
+  // Adds "hr" to "this" sequence.  Requires "hr" not to overlap with
+  // any region already in "this".  (Will perform better if regions are
+  // inserted in ascending address order.)
+  void insert(HeapRegion* hr);
+
+  // Given a HeapRegion*, returns its index within _regions,
+  // or returns -1 if not found.
+  int find(HeapRegion* hr);
+
+  // Requires the index to be valid, and return the region at the index.
+  HeapRegion* at(size_t i) { return _regions.at((int)i); }
+
+  // Return the number of regions in the sequence.
+  size_t length();
+
+  // Returns the number of contiguous regions at the end of the sequence
+  // that are available for allocation.
+  size_t free_suffix();
+
+  // Requires "word_size" to be humongous (in the technical sense).  If
+  // possible, allocates a contiguous subsequence of the heap regions to
+  // satisfy the allocation, and returns the address of the beginning of
+  // that sequence, otherwise returns NULL.
+  HeapWord* obj_allocate(size_t word_size);
+
+  // Apply the "doHeapRegion" method of "blk" to all regions in "this",
+  // in address order, terminating the iteration early
+  // if the "doHeapRegion" method returns "true".
+  void iterate(HeapRegionClosure* blk);
+
+  // Apply the "doHeapRegion" method of "blk" to all regions in "this",
+  // starting at "r" (or first region, if "r" is NULL), in a circular
+  // manner, terminating the iteration early if the "doHeapRegion" method
+  // returns "true".
+  void iterate_from(HeapRegion* r, HeapRegionClosure* blk);
+
+  // As above, but start from a given index in the sequence
+  // instead of a given heap region.
+  void iterate_from(int idx, HeapRegionClosure* blk);
+
+  // Requires "shrink_bytes" to be a multiple of the page size and heap
+  // region granularity.  Deletes as many "rightmost" completely free heap
+  // regions from the sequence as comprise shrink_bytes bytes.  Returns the
+  // MemRegion indicating the region those regions comprised, and sets
+  // "num_regions_deleted" to the number of regions deleted.
+  MemRegion shrink_by(size_t shrink_bytes, size_t& num_regions_deleted);
+
+  // If "addr" falls within a region in the sequence, return that region,
+  // or else NULL.
+  HeapRegion* addr_to_region(const void* addr);
+
+  void print();
+
+  // Prints out runs of empty regions.  If the arg is "true" reserved
+  // (popular regions are considered "empty".
+  void print_empty_runs(bool reserved_are_empty);
+
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/heapRegionSeq.inline.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/heapRegionSeq.inline.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+inline HeapRegion* HeapRegionSeq::addr_to_region(const void* addr) {
+  assert(_seq_bottom != NULL, "bad _seq_bottom in addr_to_region");
+  if ((char*) addr >= _seq_bottom) {
+    size_t diff = (size_t) pointer_delta((HeapWord*) addr,
+                                         (HeapWord*) _seq_bottom);
+    int index = (int) (diff >> HeapRegion::LogOfHRGrainWords);
+    assert(index >= 0, "invariant / paranoia");
+    if (index < _regions.length()) {
+      HeapRegion* hr = _regions.at(index);
+      assert(hr->is_in_reserved(addr),
+             "addr_to_region is wrong...");
+      return hr;
+    }
+  }
+  return NULL;
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/ptrQueue.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,208 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_ptrQueue.cpp.incl"
+
+PtrQueue::PtrQueue(PtrQueueSet* qset_, bool perm) :
+  _qset(qset_), _buf(NULL), _index(0), _active(false),
+  _perm(perm), _lock(NULL)
+{}
+
+PtrQueue::~PtrQueue() {
+  if (!_perm && _buf != NULL) {
+    if (_index == _sz) {
+      // No work to do.
+      qset()->deallocate_buffer(_buf);
+    } else {
+      // We must NULL out the unused entries, then enqueue.
+      for (size_t i = 0; i < _index; i += oopSize) {
+        _buf[byte_index_to_index((int)i)] = NULL;
+      }
+      qset()->enqueue_complete_buffer(_buf);
+      _buf = NULL;
+    }
+  }
+}
+
+
+static int byte_index_to_index(int ind) {
+  assert((ind % oopSize) == 0, "Invariant.");
+  return ind / oopSize;
+}
+
+static int index_to_byte_index(int byte_ind) {
+  return byte_ind * oopSize;
+}
+
+void PtrQueue::enqueue_known_active(void* ptr) {
+  assert(0 <= _index && _index <= _sz, "Invariant.");
+  assert(_index == 0 || _buf != NULL, "invariant");
+
+  while (_index == 0) {
+    handle_zero_index();
+  }
+  assert(_index > 0, "postcondition");
+
+  _index -= oopSize;
+  _buf[byte_index_to_index((int)_index)] = ptr;
+  assert(0 <= _index && _index <= _sz, "Invariant.");
+}
+
+void PtrQueue::locking_enqueue_completed_buffer(void** buf) {
+  assert(_lock->owned_by_self(), "Required.");
+  _lock->unlock();
+  qset()->enqueue_complete_buffer(buf);
+  // We must relock only because the caller will unlock, for the normal
+  // case.
+  _lock->lock_without_safepoint_check();
+}
+
+
+PtrQueueSet::PtrQueueSet(bool notify_when_complete) :
+  _max_completed_queue(0),
+  _cbl_mon(NULL), _fl_lock(NULL),
+  _notify_when_complete(notify_when_complete),
+  _sz(0),
+  _completed_buffers_head(NULL),
+  _completed_buffers_tail(NULL),
+  _n_completed_buffers(0),
+  _process_completed_threshold(0), _process_completed(false),
+  _buf_free_list(NULL), _buf_free_list_sz(0)
+{}
+
+void** PtrQueueSet::allocate_buffer() {
+  assert(_sz > 0, "Didn't set a buffer size.");
+  MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
+  if (_buf_free_list != NULL) {
+    void** res = _buf_free_list;
+    _buf_free_list = (void**)_buf_free_list[0];
+    _buf_free_list_sz--;
+    // Just override the next pointer with NULL, just in case we scan this part
+    // of the buffer.
+    res[0] = NULL;
+    return res;
+  } else {
+    return NEW_C_HEAP_ARRAY(void*, _sz);
+  }
+}
+
+void PtrQueueSet::deallocate_buffer(void** buf) {
+  assert(_sz > 0, "Didn't set a buffer size.");
+  MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
+  buf[0] = (void*)_buf_free_list;
+  _buf_free_list = buf;
+  _buf_free_list_sz++;
+}
+
+void PtrQueueSet::reduce_free_list() {
+  // For now we'll adopt the strategy of deleting half.
+  MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
+  size_t n = _buf_free_list_sz / 2;
+  while (n > 0) {
+    assert(_buf_free_list != NULL, "_buf_free_list_sz must be wrong.");
+    void** head = _buf_free_list;
+    _buf_free_list = (void**)_buf_free_list[0];
+    FREE_C_HEAP_ARRAY(void*,head);
+    n--;
+  }
+}
+
+void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index, bool ignore_max_completed) {
+  // I use explicit locking here because there's a bailout in the middle.
+  _cbl_mon->lock_without_safepoint_check();
+
+  Thread* thread = Thread::current();
+  assert( ignore_max_completed ||
+          thread->is_Java_thread() ||
+          SafepointSynchronize::is_at_safepoint(),
+          "invariant" );
+  ignore_max_completed = ignore_max_completed || !thread->is_Java_thread();
+
+  if (!ignore_max_completed && _max_completed_queue > 0 &&
+      _n_completed_buffers >= (size_t) _max_completed_queue) {
+    _cbl_mon->unlock();
+    bool b = mut_process_buffer(buf);
+    if (b) {
+      deallocate_buffer(buf);
+      return;
+    }
+
+    // Otherwise, go ahead and enqueue the buffer.  Must reaquire the lock.
+    _cbl_mon->lock_without_safepoint_check();
+  }
+
+  // Here we still hold the _cbl_mon.
+  CompletedBufferNode* cbn = new CompletedBufferNode;
+  cbn->buf = buf;
+  cbn->next = NULL;
+  cbn->index = index;
+  if (_completed_buffers_tail == NULL) {
+    assert(_completed_buffers_head == NULL, "Well-formedness");
+    _completed_buffers_head = cbn;
+    _completed_buffers_tail = cbn;
+  } else {
+    _completed_buffers_tail->next = cbn;
+    _completed_buffers_tail = cbn;
+  }
+  _n_completed_buffers++;
+
+  if (!_process_completed &&
+      _n_completed_buffers == _process_completed_threshold) {
+    _process_completed = true;
+    if (_notify_when_complete)
+      _cbl_mon->notify_all();
+  }
+  debug_only(assert_completed_buffer_list_len_correct_locked());
+  _cbl_mon->unlock();
+}
+
+int PtrQueueSet::completed_buffers_list_length() {
+  int n = 0;
+  CompletedBufferNode* cbn = _completed_buffers_head;
+  while (cbn != NULL) {
+    n++;
+    cbn = cbn->next;
+  }
+  return n;
+}
+
+void PtrQueueSet::assert_completed_buffer_list_len_correct() {
+  MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+  assert_completed_buffer_list_len_correct_locked();
+}
+
+void PtrQueueSet::assert_completed_buffer_list_len_correct_locked() {
+  guarantee((size_t)completed_buffers_list_length() ==  _n_completed_buffers,
+            "Completed buffer length is wrong.");
+}
+
+void PtrQueueSet::set_buffer_size(size_t sz) {
+  assert(_sz == 0 && sz > 0, "Should be called only once.");
+  _sz = sz * oopSize;
+}
+
+void PtrQueueSet::set_process_completed_threshold(size_t sz) {
+  _process_completed_threshold = sz;
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/ptrQueue.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,229 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// There are various techniques that require threads to be able to log
+// addresses.  For example, a generational write barrier might log
+// the addresses of modified old-generation objects.  This type supports
+// this operation.
+
+class PtrQueueSet;
+
+class PtrQueue: public CHeapObj {
+
+protected:
+  // The ptr queue set to which this queue belongs.
+  PtrQueueSet* _qset;
+
+  // Whether updates should be logged.
+  bool _active;
+
+  // The buffer.
+  void** _buf;
+  // The index at which an object was last enqueued.  Starts at "_sz"
+  // (indicating an empty buffer) and goes towards zero.
+  size_t _index;
+
+  // The size of the buffer.
+  size_t _sz;
+
+  // If true, the queue is permanent, and doesn't need to deallocate
+  // its buffer in the destructor (since that obtains a lock which may not
+  // be legally locked by then.
+  bool _perm;
+
+  // If there is a lock associated with this buffer, this is that lock.
+  Mutex* _lock;
+
+  PtrQueueSet* qset() { return _qset; }
+
+public:
+  // Initialize this queue to contain a null buffer, and be part of the
+  // given PtrQueueSet.
+  PtrQueue(PtrQueueSet*, bool perm = false);
+  // Release any contained resources.
+  ~PtrQueue();
+
+  // Associate a lock with a ptr queue.
+  void set_lock(Mutex* lock) { _lock = lock; }
+
+  void reset() { if (_buf != NULL) _index = _sz; }
+
+  // Enqueues the given "obj".
+  void enqueue(void* ptr) {
+    if (!_active) return;
+    else enqueue_known_active(ptr);
+  }
+
+  inline void handle_zero_index();
+  void locking_enqueue_completed_buffer(void** buf);
+
+  void enqueue_known_active(void* ptr);
+
+  size_t size() {
+    assert(_sz >= _index, "Invariant.");
+    return _buf == NULL ? 0 : _sz - _index;
+  }
+
+  // Set the "active" property of the queue to "b".  An enqueue to an
+  // inactive thread is a no-op.  Setting a queue to inactive resets its
+  // log to the empty state.
+  void set_active(bool b) {
+    _active = b;
+    if (!b && _buf != NULL) {
+      _index = _sz;
+    } else if (b && _buf != NULL) {
+      assert(_index == _sz, "invariant: queues are empty when activated.");
+    }
+  }
+
+  static int byte_index_to_index(int ind) {
+    assert((ind % oopSize) == 0, "Invariant.");
+    return ind / oopSize;
+  }
+
+  static int index_to_byte_index(int byte_ind) {
+    return byte_ind * oopSize;
+  }
+
+  // To support compiler.
+  static ByteSize byte_offset_of_index() {
+    return byte_offset_of(PtrQueue, _index);
+  }
+  static ByteSize byte_width_of_index() { return in_ByteSize(sizeof(size_t)); }
+
+  static ByteSize byte_offset_of_buf() {
+    return byte_offset_of(PtrQueue, _buf);
+  }
+  static ByteSize byte_width_of_buf() { return in_ByteSize(sizeof(void*)); }
+
+  static ByteSize byte_offset_of_active() {
+    return byte_offset_of(PtrQueue, _active);
+  }
+  static ByteSize byte_width_of_active() { return in_ByteSize(sizeof(bool)); }
+
+};
+
+// A PtrQueueSet represents resources common to a set of pointer queues.
+// In particular, the individual queues allocate buffers from this shared
+// set, and return completed buffers to the set.
+// All these variables are are protected by the TLOQ_CBL_mon. XXX ???
+class PtrQueueSet: public CHeapObj {
+
+protected:
+
+  class CompletedBufferNode: public CHeapObj {
+  public:
+    void** buf;
+    size_t index;
+    CompletedBufferNode* next;
+    CompletedBufferNode() : buf(NULL),
+      index(0), next(NULL){ }
+  };
+
+  Monitor* _cbl_mon;  // Protects the fields below.
+  CompletedBufferNode* _completed_buffers_head;
+  CompletedBufferNode* _completed_buffers_tail;
+  size_t _n_completed_buffers;
+  size_t _process_completed_threshold;
+  volatile bool _process_completed;
+
+  // This (and the interpretation of the first element as a "next"
+  // pointer) are protected by the TLOQ_FL_lock.
+  Mutex* _fl_lock;
+  void** _buf_free_list;
+  size_t _buf_free_list_sz;
+
+  // The size of all buffers in the set.
+  size_t _sz;
+
+  bool _all_active;
+
+  // If true, notify_all on _cbl_mon when the threshold is reached.
+  bool _notify_when_complete;
+
+  // Maximum number of elements allowed on completed queue: after that,
+  // enqueuer does the work itself.  Zero indicates no maximum.
+  int _max_completed_queue;
+
+  int completed_buffers_list_length();
+  void assert_completed_buffer_list_len_correct_locked();
+  void assert_completed_buffer_list_len_correct();
+
+protected:
+  // A mutator thread does the the work of processing a buffer.
+  // Returns "true" iff the work is complete (and the buffer may be
+  // deallocated).
+  virtual bool mut_process_buffer(void** buf) {
+    ShouldNotReachHere();
+    return false;
+  }
+
+public:
+  // Create an empty ptr queue set.
+  PtrQueueSet(bool notify_when_complete = false);
+
+  // Because of init-order concerns, we can't pass these as constructor
+  // arguments.
+  void initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                  int max_completed_queue = 0) {
+    _max_completed_queue = max_completed_queue;
+    assert(cbl_mon != NULL && fl_lock != NULL, "Init order issue?");
+    _cbl_mon = cbl_mon; _fl_lock = fl_lock;
+  }
+
+  // Return an empty oop array of size _sz (required to be non-zero).
+  void** allocate_buffer();
+
+  // Return an empty buffer to the free list.  The "buf" argument is
+  // required to be a pointer to the head of an array of length "_sz".
+  void deallocate_buffer(void** buf);
+
+  // Declares that "buf" is a complete buffer.
+  void enqueue_complete_buffer(void** buf, size_t index = 0,
+                               bool ignore_max_completed = false);
+
+  bool completed_buffers_exist_dirty() {
+    return _n_completed_buffers > 0;
+  }
+
+  bool process_completed_buffers() { return _process_completed; }
+
+  bool active() { return _all_active; }
+
+  // Set the buffer size.  Should be called before any "enqueue" operation
+  // can be called.  And should only be called once.
+  void set_buffer_size(size_t sz);
+
+  // Get the buffer size.
+  size_t buffer_size() { return _sz; }
+
+  // Set the number of completed buffers that triggers log processing.
+  void set_process_completed_threshold(size_t sz);
+
+  // Must only be called at a safe point.  Indicates that the buffer free
+  // list size may be reduced, if that is deemed desirable.
+  void reduce_free_list();
+
+  size_t completed_buffers_num() { return _n_completed_buffers; }
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/ptrQueue.inline.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/ptrQueue.inline.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+void PtrQueue::handle_zero_index() {
+  assert(0 == _index, "Precondition.");
+  // This thread records the full buffer and allocates a new one (while
+  // holding the lock if there is one).
+  void** buf = _buf;
+  _buf = qset()->allocate_buffer();
+  _sz = qset()->buffer_size();
+  _index = _sz;
+  assert(0 <= _index && _index <= _sz, "Invariant.");
+  if (buf != NULL) {
+    if (_lock) {
+      locking_enqueue_completed_buffer(buf);
+    } else {
+      qset()->enqueue_complete_buffer(buf);
+    }
+  }
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/satbQueue.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_satbQueue.cpp.incl"
+
+void ObjPtrQueue::apply_closure(ObjectClosure* cl) {
+  if (_buf != NULL) {
+    apply_closure_to_buffer(cl, _buf, _index, _sz);
+    _index = _sz;
+  }
+}
+
+void ObjPtrQueue::apply_closure_to_buffer(ObjectClosure* cl,
+                                          void** buf, size_t index, size_t sz) {
+  if (cl == NULL) return;
+  for (size_t i = index; i < sz; i += oopSize) {
+    oop obj = (oop)buf[byte_index_to_index((int)i)];
+    // There can be NULL entries because of destructors.
+    if (obj != NULL) {
+      cl->do_object(obj);
+    }
+  }
+}
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+
+SATBMarkQueueSet::SATBMarkQueueSet() :
+  PtrQueueSet(),
+  _closure(NULL), _par_closures(NULL),
+  _shared_satb_queue(this, true /*perm*/)
+{}
+
+void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                                  int max_completed_queue,
+                                  Mutex* lock) {
+  PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue);
+  _shared_satb_queue.set_lock(lock);
+  if (ParallelGCThreads > 0) {
+    _par_closures = NEW_C_HEAP_ARRAY(ObjectClosure*, ParallelGCThreads);
+  }
+}
+
+
+void SATBMarkQueueSet::handle_zero_index_for_thread(JavaThread* t) {
+  t->satb_mark_queue().handle_zero_index();
+}
+
+void SATBMarkQueueSet::set_active_all_threads(bool b) {
+  _all_active = b;
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().set_active(b);
+  }
+}
+
+void SATBMarkQueueSet::set_closure(ObjectClosure* closure) {
+  _closure = closure;
+}
+
+void SATBMarkQueueSet::set_par_closure(int i, ObjectClosure* par_closure) {
+  assert(ParallelGCThreads > 0 && _par_closures != NULL, "Precondition");
+  _par_closures[i] = par_closure;
+}
+
+void SATBMarkQueueSet::iterate_closure_all_threads() {
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().apply_closure(_closure);
+  }
+  shared_satb_queue()->apply_closure(_closure);
+}
+
+void SATBMarkQueueSet::par_iterate_closure_all_threads(int worker) {
+  SharedHeap* sh = SharedHeap::heap();
+  int parity = sh->strong_roots_parity();
+
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    if (t->claim_oops_do(true, parity)) {
+      t->satb_mark_queue().apply_closure(_par_closures[worker]);
+    }
+  }
+  // We'll have worker 0 do this one.
+  if (worker == 0) {
+    shared_satb_queue()->apply_closure(_par_closures[0]);
+  }
+}
+
+bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par,
+                                                              int worker) {
+  CompletedBufferNode* nd = NULL;
+  {
+    MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+    if (_completed_buffers_head != NULL) {
+      nd = _completed_buffers_head;
+      _completed_buffers_head = nd->next;
+      if (_completed_buffers_head == NULL) _completed_buffers_tail = NULL;
+      _n_completed_buffers--;
+      if (_n_completed_buffers == 0) _process_completed = false;
+    }
+  }
+  ObjectClosure* cl = (par ? _par_closures[worker] : _closure);
+  if (nd != NULL) {
+    ObjPtrQueue::apply_closure_to_buffer(cl, nd->buf, 0, _sz);
+    deallocate_buffer(nd->buf);
+    delete nd;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void SATBMarkQueueSet::abandon_partial_marking() {
+  CompletedBufferNode* buffers_to_delete = NULL;
+  {
+    MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+    while (_completed_buffers_head != NULL) {
+      CompletedBufferNode* nd = _completed_buffers_head;
+      _completed_buffers_head = nd->next;
+      nd->next = buffers_to_delete;
+      buffers_to_delete = nd;
+    }
+    _completed_buffers_tail = NULL;
+    _n_completed_buffers = 0;
+    debug_only(assert_completed_buffer_list_len_correct_locked());
+  }
+  while (buffers_to_delete != NULL) {
+    CompletedBufferNode* nd = buffers_to_delete;
+    buffers_to_delete = nd->next;
+    deallocate_buffer(nd->buf);
+    delete nd;
+  }
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  // So we can safely manipulate these queues.
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().reset();
+  }
+  shared_satb_queue()->reset();
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/satbQueue.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class ObjectClosure;
+class JavaThread;
+
+// A ptrQueue whose elements are "oops", pointers to object heads.
+class ObjPtrQueue: public PtrQueue {
+public:
+  ObjPtrQueue(PtrQueueSet* qset_, bool perm = false) :
+    PtrQueue(qset_, perm)
+  {}
+  // Apply the closure to all elements, and reset the index to make the
+  // buffer empty.
+  void apply_closure(ObjectClosure* cl);
+
+  // Apply the closure to all elements of "buf", down to "index" (inclusive.)
+  static void apply_closure_to_buffer(ObjectClosure* cl,
+                                      void** buf, size_t index, size_t sz);
+
+};
+
+
+
+class SATBMarkQueueSet: public PtrQueueSet {
+  ObjectClosure* _closure;
+  ObjectClosure** _par_closures;  // One per ParGCThread.
+
+  ObjPtrQueue _shared_satb_queue;
+
+  // Utility function to support sequential and parallel versions.  If
+  // "par" is true, then "worker" is the par thread id; if "false", worker
+  // is ignored.
+  bool apply_closure_to_completed_buffer_work(bool par, int worker);
+
+
+public:
+  SATBMarkQueueSet();
+
+  void initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                  int max_completed_queue = 0,
+                  Mutex* lock = NULL);
+
+  static void handle_zero_index_for_thread(JavaThread* t);
+
+  // Apply "set_active(b)" to all thread tloq's.  Should be called only
+  // with the world stopped.
+  void set_active_all_threads(bool b);
+
+  // Register "blk" as "the closure" for all queues.  Only one such closure
+  // is allowed.  The "apply_closure_to_completed_buffer" method will apply
+  // this closure to a completed buffer, and "iterate_closure_all_threads"
+  // applies it to partially-filled buffers (the latter should only be done
+  // with the world stopped).
+  void set_closure(ObjectClosure* closure);
+  // Set the parallel closures: pointer is an array of pointers to
+  // closures, one for each parallel GC thread.
+  void set_par_closure(int i, ObjectClosure* closure);
+
+  // If there is a registered closure for buffers, apply it to all entries
+  // in all currently-active buffers.  This should only be applied at a
+  // safepoint.  (Currently must not be called in parallel; this should
+  // change in the future.)
+  void iterate_closure_all_threads();
+  // Parallel version of the above.
+  void par_iterate_closure_all_threads(int worker);
+
+  // If there exists some completed buffer, pop it, then apply the
+  // registered closure to all its elements, and return true.  If no
+  // completed buffers exist, return false.
+  bool apply_closure_to_completed_buffer() {
+    return apply_closure_to_completed_buffer_work(false, 0);
+  }
+  // Parallel version of the above.
+  bool par_apply_closure_to_completed_buffer(int worker) {
+    return apply_closure_to_completed_buffer_work(true, worker);
+  }
+
+  ObjPtrQueue* shared_satb_queue() { return &_shared_satb_queue; }
+
+  // If a marking is being abandoned, reset any unprocessed log buffers.
+  void abandon_partial_marking();
+
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/sparsePRT.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/sparsePRT.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,530 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_sparsePRT.cpp.incl"
+
+#define SPARSE_PRT_VERBOSE 0
+
+#define UNROLL_CARD_LOOPS 1
+
+void SparsePRT::init_iterator(SparsePRTIter* sprt_iter) {
+    sprt_iter->init(this);
+}
+
+void SparsePRTEntry::init(short region_ind) {
+  _region_ind = region_ind;
+  _next_index = NullEntry;
+#if UNROLL_CARD_LOOPS
+  assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
+  _cards[0] = NullEntry;
+  _cards[1] = NullEntry;
+  _cards[2] = NullEntry;
+  _cards[3] = NullEntry;
+#else
+  for (int i = 0; i < CardsPerEntry; i++) _cards[i] = NullEntry;
+#endif
+}
+
+bool SparsePRTEntry::contains_card(short card_index) const {
+#if UNROLL_CARD_LOOPS
+  assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
+  if (_cards[0] == card_index) return true;
+  if (_cards[1] == card_index) return true;
+  if (_cards[2] == card_index) return true;
+  if (_cards[3] == card_index) return true;
+#else
+  for (int i = 0; i < CardsPerEntry; i++) {
+    if (_cards[i] == card_index) return true;
+  }
+#endif
+  // Otherwise, we're full.
+  return false;
+}
+
+int SparsePRTEntry::num_valid_cards() const {
+  int sum = 0;
+#if UNROLL_CARD_LOOPS
+  assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
+  if (_cards[0] != NullEntry) sum++;
+  if (_cards[1] != NullEntry) sum++;
+  if (_cards[2] != NullEntry) sum++;
+  if (_cards[3] != NullEntry) sum++;
+#else
+  for (int i = 0; i < CardsPerEntry; i++) {
+    if (_cards[i] != NulLEntry) sum++;
+  }
+#endif
+  // Otherwise, we're full.
+  return sum;
+}
+
+SparsePRTEntry::AddCardResult SparsePRTEntry::add_card(short card_index) {
+#if UNROLL_CARD_LOOPS
+  assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
+  short c = _cards[0];
+  if (c == card_index) return found;
+  if (c == NullEntry) { _cards[0] = card_index; return added; }
+  c = _cards[1];
+  if (c == card_index) return found;
+  if (c == NullEntry) { _cards[1] = card_index; return added; }
+  c = _cards[2];
+  if (c == card_index) return found;
+  if (c == NullEntry) { _cards[2] = card_index; return added; }
+  c = _cards[3];
+  if (c == card_index) return found;
+  if (c == NullEntry) { _cards[3] = card_index; return added; }
+#else
+  for (int i = 0; i < CardsPerEntry; i++) {
+    short c = _cards[i];
+    if (c == card_index) return found;
+    if (c == NullEntry) { _cards[i] = card_index; return added; }
+  }
+#endif
+  // Otherwise, we're full.
+  return overflow;
+}
+
+void SparsePRTEntry::copy_cards(short* cards) const {
+#if UNROLL_CARD_LOOPS
+  assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
+  cards[0] = _cards[0];
+  cards[1] = _cards[1];
+  cards[2] = _cards[2];
+  cards[3] = _cards[3];
+#else
+  for (int i = 0; i < CardsPerEntry; i++) {
+    cards[i] = _cards[i];
+  }
+#endif
+}
+
+void SparsePRTEntry::copy_cards(SparsePRTEntry* e) const {
+  copy_cards(&e->_cards[0]);
+}
+
+// ----------------------------------------------------------------------
+
+RSHashTable::RSHashTable(size_t capacity) :
+  _capacity(capacity), _capacity_mask(capacity-1),
+  _occupied_entries(0), _occupied_cards(0),
+  _entries(NEW_C_HEAP_ARRAY(SparsePRTEntry, capacity)),
+  _buckets(NEW_C_HEAP_ARRAY(short, capacity)),
+  _next_deleted(NULL), _deleted(false),
+  _free_list(NullEntry), _free_region(0)
+{
+  clear();
+}
+
+RSHashTable::~RSHashTable() {
+  if (_entries != NULL) {
+    FREE_C_HEAP_ARRAY(SparsePRTEntry, _entries);
+    _entries = NULL;
+  }
+  if (_buckets != NULL) {
+    FREE_C_HEAP_ARRAY(short, _buckets);
+    _buckets = NULL;
+  }
+}
+
+void RSHashTable::clear() {
+  _occupied_entries = 0;
+  _occupied_cards = 0;
+  guarantee(_entries != NULL, "INV");
+  guarantee(_buckets != NULL, "INV");
+  // This will put -1 == NullEntry in the key field of all entries.
+  memset(_entries, -1, _capacity * sizeof(SparsePRTEntry));
+  memset(_buckets, -1, _capacity * sizeof(short));
+  _free_list = NullEntry;
+  _free_region = 0;
+}
+
+bool RSHashTable::add_card(short region_ind, short card_index) {
+  SparsePRTEntry* e = entry_for_region_ind_create(region_ind);
+  assert(e != NULL && e->r_ind() == region_ind,
+         "Postcondition of call above.");
+  SparsePRTEntry::AddCardResult res = e->add_card(card_index);
+  if (res == SparsePRTEntry::added) _occupied_cards++;
+#if SPARSE_PRT_VERBOSE
+  gclog_or_tty->print_cr("       after add_card[%d]: valid-cards = %d.",
+                pointer_delta(e, _entries, sizeof(SparsePRTEntry)),
+                e->num_valid_cards());
+#endif
+  assert(e->num_valid_cards() > 0, "Postcondition");
+  return res != SparsePRTEntry::overflow;
+}
+
+bool RSHashTable::get_cards(short region_ind, short* cards) {
+  short ind = (short) (region_ind & capacity_mask());
+  short cur_ind = _buckets[ind];
+  SparsePRTEntry* cur;
+  while (cur_ind != NullEntry &&
+         (cur = entry(cur_ind))->r_ind() != region_ind) {
+    cur_ind = cur->next_index();
+  }
+
+  if (cur_ind == NullEntry) return false;
+  // Otherwise...
+  assert(cur->r_ind() == region_ind, "Postcondition of loop + test above.");
+  assert(cur->num_valid_cards() > 0, "Inv");
+  cur->copy_cards(cards);
+  return true;
+}
+
+bool RSHashTable::delete_entry(short region_ind) {
+  short ind = (short) (region_ind & capacity_mask());
+  short* prev_loc = &_buckets[ind];
+  short cur_ind = *prev_loc;
+  SparsePRTEntry* cur;
+  while (cur_ind != NullEntry &&
+         (cur = entry(cur_ind))->r_ind() != region_ind) {
+    prev_loc = cur->next_index_addr();
+    cur_ind = *prev_loc;
+  }
+
+  if (cur_ind == NullEntry) return false;
+  // Otherwise, splice out "cur".
+  *prev_loc = cur->next_index();
+  _occupied_cards -= cur->num_valid_cards();
+  free_entry(cur_ind);
+  _occupied_entries--;
+  return true;
+}
+
+SparsePRTEntry* RSHashTable::entry_for_region_ind(short region_ind) const {
+  assert(occupied_entries() < capacity(), "Precondition");
+  short ind = (short) (region_ind & capacity_mask());
+  short cur_ind = _buckets[ind];
+  SparsePRTEntry* cur;
+  // XXX
+  // int k = 0;
+  while (cur_ind != NullEntry &&
+         (cur = entry(cur_ind))->r_ind() != region_ind) {
+    /*
+    k++;
+    if (k > 10) {
+      gclog_or_tty->print_cr("RSHashTable::entry_for_region_ind(%d): "
+                    "k = %d, cur_ind = %d.", region_ind, k, cur_ind);
+      if (k >= 1000) {
+        while (1) ;
+      }
+    }
+    */
+    cur_ind = cur->next_index();
+  }
+
+  if (cur_ind != NullEntry) {
+    assert(cur->r_ind() == region_ind, "Loop postcondition + test");
+    return cur;
+  } else {
+    return NULL;
+  }
+}
+
+SparsePRTEntry* RSHashTable::entry_for_region_ind_create(short region_ind) {
+  SparsePRTEntry* res = entry_for_region_ind(region_ind);
+  if (res == NULL) {
+    short new_ind = alloc_entry();
+    assert(0 <= new_ind && (size_t)new_ind < capacity(), "There should be room.");
+    res = entry(new_ind);
+    res->init(region_ind);
+    // Insert at front.
+    short ind = (short) (region_ind & capacity_mask());
+    res->set_next_index(_buckets[ind]);
+    _buckets[ind] = new_ind;
+    _occupied_entries++;
+  }
+  return res;
+}
+
+short RSHashTable::alloc_entry() {
+  short res;
+  if (_free_list != NullEntry) {
+    res = _free_list;
+    _free_list = entry(res)->next_index();
+    return res;
+  } else if ((size_t) _free_region+1 < capacity()) {
+    res = _free_region;
+    _free_region++;
+    return res;
+  } else {
+    return NullEntry;
+  }
+}
+
+
+void RSHashTable::free_entry(short fi) {
+  entry(fi)->set_next_index(_free_list);
+  _free_list = fi;
+}
+
+
+void RSHashTable::add_entry(SparsePRTEntry* e) {
+  assert(e->num_valid_cards() > 0, "Precondition.");
+  SparsePRTEntry* e2 = entry_for_region_ind_create(e->r_ind());
+  e->copy_cards(e2);
+  _occupied_cards += e2->num_valid_cards();
+  assert(e2->num_valid_cards() > 0, "Postcondition.");
+}
+
+RSHashTable* RSHashTable::_head_deleted_list = NULL;
+
+void RSHashTable::add_to_deleted_list(RSHashTable* rsht) {
+  assert(!rsht->deleted(), "Should delete only once.");
+  rsht->set_deleted(true);
+  RSHashTable* hd = _head_deleted_list;
+  while (true) {
+    rsht->_next_deleted = hd;
+    RSHashTable* res =
+      (RSHashTable*)
+      Atomic::cmpxchg_ptr(rsht, &_head_deleted_list, hd);
+    if (res == hd) return;
+    else hd = res;
+  }
+}
+
+RSHashTable* RSHashTable::get_from_deleted_list() {
+  RSHashTable* hd = _head_deleted_list;
+  while (hd != NULL) {
+    RSHashTable* next = hd->next_deleted();
+    RSHashTable* res =
+      (RSHashTable*)
+      Atomic::cmpxchg_ptr(next, &_head_deleted_list, hd);
+    if (res == hd) {
+      hd->set_next_deleted(NULL);
+      hd->set_deleted(false);
+      return hd;
+    } else {
+      hd = res;
+    }
+  }
+  return NULL;
+}
+
+short /* RSHashTable:: */ RSHashTableIter::find_first_card_in_list() {
+  short res;
+  while (_bl_ind != RSHashTable::NullEntry) {
+    res = _rsht->entry(_bl_ind)->card(0);
+    if (res != SparsePRTEntry::NullEntry) {
+      return res;
+    } else {
+      _bl_ind = _rsht->entry(_bl_ind)->next_index();
+    }
+  }
+  // Otherwise, none found:
+  return SparsePRTEntry::NullEntry;
+}
+
+size_t /* RSHashTable:: */ RSHashTableIter::compute_card_ind(short ci) {
+  return
+    _heap_bot_card_ind
+    + (_rsht->entry(_bl_ind)->r_ind() * CardsPerRegion)
+    + ci;
+}
+
+bool /* RSHashTable:: */ RSHashTableIter::has_next(size_t& card_index) {
+  _card_ind++;
+  short ci;
+  if (_card_ind < SparsePRTEntry::CardsPerEntry &&
+      ((ci = _rsht->entry(_bl_ind)->card(_card_ind)) !=
+       SparsePRTEntry::NullEntry)) {
+    card_index = compute_card_ind(ci);
+    return true;
+  }
+  // Otherwise, must find the next valid entry.
+  _card_ind = 0;
+
+  if (_bl_ind != RSHashTable::NullEntry) {
+      _bl_ind = _rsht->entry(_bl_ind)->next_index();
+      ci = find_first_card_in_list();
+      if (ci != SparsePRTEntry::NullEntry) {
+        card_index = compute_card_ind(ci);
+        return true;
+      }
+  }
+  // If we didn't return above, must go to the next non-null table index.
+  _tbl_ind++;
+  while ((size_t)_tbl_ind < _rsht->capacity()) {
+    _bl_ind = _rsht->_buckets[_tbl_ind];
+    ci = find_first_card_in_list();
+    if (ci != SparsePRTEntry::NullEntry) {
+      card_index = compute_card_ind(ci);
+      return true;
+    }
+    // Otherwise, try next entry.
+    _tbl_ind++;
+  }
+  // Otherwise, there were no entry.
+  return false;
+}
+
+bool RSHashTable::contains_card(short region_index, short card_index) const {
+  SparsePRTEntry* e = entry_for_region_ind(region_index);
+  return (e != NULL && e->contains_card(card_index));
+}
+
+size_t RSHashTable::mem_size() const {
+  return sizeof(this) + capacity() * (sizeof(SparsePRTEntry) + sizeof(short));
+}
+
+
+// ----------------------------------------------------------------------
+
+SparsePRT* SparsePRT::_head_expanded_list = NULL;
+
+void SparsePRT::add_to_expanded_list(SparsePRT* sprt) {
+  // We could expand multiple times in a pause -- only put on list once.
+  if (sprt->expanded()) return;
+  sprt->set_expanded(true);
+  SparsePRT* hd = _head_expanded_list;
+  while (true) {
+    sprt->_next_expanded = hd;
+    SparsePRT* res =
+      (SparsePRT*)
+      Atomic::cmpxchg_ptr(sprt, &_head_expanded_list, hd);
+    if (res == hd) return;
+    else hd = res;
+  }
+}
+
+SparsePRT* SparsePRT::get_from_expanded_list() {
+  SparsePRT* hd = _head_expanded_list;
+  while (hd != NULL) {
+    SparsePRT* next = hd->next_expanded();
+    SparsePRT* res =
+      (SparsePRT*)
+      Atomic::cmpxchg_ptr(next, &_head_expanded_list, hd);
+    if (res == hd) {
+      hd->set_next_expanded(NULL);
+      return hd;
+    } else {
+      hd = res;
+    }
+  }
+  return NULL;
+}
+
+
+void SparsePRT::cleanup_all() {
+  // First clean up all expanded tables so they agree on next and cur.
+  SparsePRT* sprt = get_from_expanded_list();
+  while (sprt != NULL) {
+    sprt->cleanup();
+    sprt = get_from_expanded_list();
+  }
+  // Now delete all deleted RSHashTables.
+  RSHashTable* rsht = RSHashTable::get_from_deleted_list();
+  while (rsht != NULL) {
+#if SPARSE_PRT_VERBOSE
+    gclog_or_tty->print_cr("About to delete RSHT " PTR_FORMAT ".", rsht);
+#endif
+    delete rsht;
+    rsht = RSHashTable::get_from_deleted_list();
+  }
+}
+
+
+SparsePRT::SparsePRT(HeapRegion* hr) :
+  _expanded(false), _next_expanded(NULL)
+{
+  _cur = new RSHashTable(InitialCapacity);
+  _next = _cur;
+}
+
+SparsePRT::~SparsePRT() {
+  assert(_next != NULL && _cur != NULL, "Inv");
+  if (_cur != _next) { delete _cur; }
+  delete _next;
+}
+
+
+size_t SparsePRT::mem_size() const {
+  // We ignore "_cur" here, because it either = _next, or else it is
+  // on the deleted list.
+  return sizeof(this) + _next->mem_size();
+}
+
+bool SparsePRT::add_card(short region_id, short card_index) {
+#if SPARSE_PRT_VERBOSE
+  gclog_or_tty->print_cr("  Adding card %d from region %d to region %d sparse.",
+                card_index, region_id, _hr->hrs_index());
+#endif
+  if (_next->occupied_entries() * 2 > _next->capacity()) {
+    expand();
+  }
+  return _next->add_card(region_id, card_index);
+}
+
+bool SparsePRT::get_cards(short region_id, short* cards) {
+  return _next->get_cards(region_id, cards);
+}
+
+bool SparsePRT::delete_entry(short region_id) {
+  return _next->delete_entry(region_id);
+}
+
+void SparsePRT::clear() {
+  // If they differ, _next is bigger then cur, so next has no chance of
+  // being the initial size.
+  if (_next != _cur) {
+    delete _next;
+  }
+
+  if (_cur->capacity() != InitialCapacity) {
+    delete _cur;
+    _cur = new RSHashTable(InitialCapacity);
+  } else {
+    _cur->clear();
+  }
+  _next = _cur;
+}
+
+void SparsePRT::cleanup() {
+  // Make sure that the current and next tables agree.  (Another mechanism
+  // takes care of deleting now-unused tables.)
+  _cur = _next;
+}
+
+void SparsePRT::expand() {
+  RSHashTable* last = _next;
+  _next = new RSHashTable(last->capacity() * 2);
+
+#if SPARSE_PRT_VERBOSE
+  gclog_or_tty->print_cr("  Expanded sparse table for %d to %d.",
+                _hr->hrs_index(), _next->capacity());
+#endif
+  for (size_t i = 0; i < last->capacity(); i++) {
+    SparsePRTEntry* e = last->entry((int)i);
+    if (e->valid_entry()) {
+#if SPARSE_PRT_VERBOSE
+      gclog_or_tty->print_cr("    During expansion, transferred entry for %d.",
+                    e->r_ind());
+#endif
+      _next->add_entry(e);
+    }
+  }
+  if (last != _cur)
+    RSHashTable::add_to_deleted_list(last);
+  add_to_expanded_list(this);
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/sparsePRT.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/sparsePRT.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,308 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Sparse remembered set for a heap region (the "owning" region).  Maps
+// indices of other regions to short sequences of cards in the other region
+// that might contain pointers into the owner region.
+
+// These tables only expand while they are accessed in parallel --
+// deletions may be done in single-threaded code.  This allows us to allow
+// unsynchronized reads/iterations, as long as expansions caused by
+// insertions only enqueue old versions for deletions, but do not delete
+// old versions synchronously.
+
+
+class SparsePRTEntry {
+public:
+  enum SomePublicConstants {
+    CardsPerEntry = (short)4,
+    NullEntry = (short)-1,
+    DeletedEntry = (short)-2
+  };
+
+private:
+  short _region_ind;
+  short _next_index;
+  short _cards[CardsPerEntry];
+
+public:
+
+  // Set the region_ind to the given value, and delete all cards.
+  inline void init(short region_ind);
+
+  short r_ind() const { return _region_ind; }
+  bool valid_entry() const { return r_ind() >= 0; }
+  void set_r_ind(short rind) { _region_ind = rind; }
+
+  short next_index() const { return _next_index; }
+  short* next_index_addr() { return &_next_index; }
+  void set_next_index(short ni) { _next_index = ni; }
+
+  // Returns "true" iff the entry contains the given card index.
+  inline bool contains_card(short card_index) const;
+
+  // Returns the number of non-NULL card entries.
+  inline int num_valid_cards() const;
+
+  // Requires that the entry not contain the given card index.  If there is
+  // space available, add the given card index to the entry and return
+  // "true"; otherwise, return "false" to indicate that the entry is full.
+  enum AddCardResult {
+    overflow,
+    found,
+    added
+  };
+  inline AddCardResult add_card(short card_index);
+
+  // Copy the current entry's cards into "cards".
+  inline void copy_cards(short* cards) const;
+  // Copy the current entry's cards into the "_card" array of "e."
+  inline void copy_cards(SparsePRTEntry* e) const;
+
+  inline short card(int i) const { return _cards[i]; }
+};
+
+
+class RSHashTable : public CHeapObj {
+
+  friend class RSHashTableIter;
+
+  enum SomePrivateConstants {
+    NullEntry = -1
+  };
+
+  size_t _capacity;
+  size_t _capacity_mask;
+  size_t _occupied_entries;
+  size_t _occupied_cards;
+
+  SparsePRTEntry* _entries;
+  short* _buckets;
+  short  _free_region;
+  short  _free_list;
+
+  static RSHashTable* _head_deleted_list;
+  RSHashTable* _next_deleted;
+  RSHashTable* next_deleted() { return _next_deleted; }
+  void set_next_deleted(RSHashTable* rsht) { _next_deleted = rsht; }
+  bool _deleted;
+  void set_deleted(bool b) { _deleted = b; }
+
+  // Requires that the caller hold a lock preventing parallel modifying
+  // operations, and that the the table be less than completely full.  If
+  // an entry for "region_ind" is already in the table, finds it and
+  // returns its address; otherwise returns "NULL."
+  SparsePRTEntry* entry_for_region_ind(short region_ind) const;
+
+  // Requires that the caller hold a lock preventing parallel modifying
+  // operations, and that the the table be less than completely full.  If
+  // an entry for "region_ind" is already in the table, finds it and
+  // returns its address; otherwise allocates, initializes, inserts and
+  // returns a new entry for "region_ind".
+  SparsePRTEntry* entry_for_region_ind_create(short region_ind);
+
+  // Returns the index of the next free entry in "_entries".
+  short alloc_entry();
+  // Declares the entry "fi" to be free.  (It must have already been
+  // deleted from any bucket lists.
+  void free_entry(short fi);
+
+public:
+  RSHashTable(size_t capacity);
+  ~RSHashTable();
+
+  // Attempts to ensure that the given card_index in the given region is in
+  // the sparse table.  If successful (because the card was already
+  // present, or because it was successfullly added) returns "true".
+  // Otherwise, returns "false" to indicate that the addition would
+  // overflow the entry for the region.  The caller must transfer these
+  // entries to a larger-capacity representation.
+  bool add_card(short region_id, short card_index);
+
+  bool get_cards(short region_id, short* cards);
+  bool delete_entry(short region_id);
+
+  bool contains_card(short region_id, short card_index) const;
+
+  void add_entry(SparsePRTEntry* e);
+
+  void clear();
+
+  size_t capacity() const      { return _capacity;       }
+  size_t capacity_mask() const { return _capacity_mask;  }
+  size_t occupied_entries() const { return _occupied_entries; }
+  size_t occupied_cards() const   { return _occupied_cards;   }
+  size_t mem_size() const;
+  bool deleted() { return _deleted; }
+
+  SparsePRTEntry* entry(int i) const { return &_entries[i]; }
+
+  void print();
+
+  static void add_to_deleted_list(RSHashTable* rsht);
+  static RSHashTable* get_from_deleted_list();
+
+
+};
+
+  // ValueObj because will be embedded in HRRS iterator.
+class RSHashTableIter: public CHeapObj {
+    short _tbl_ind;
+    short _bl_ind;
+    short _card_ind;
+    RSHashTable* _rsht;
+    size_t _heap_bot_card_ind;
+
+    enum SomePrivateConstants {
+      CardsPerRegion = HeapRegion::GrainBytes >> CardTableModRefBS::card_shift
+    };
+
+    // If the bucket list pointed to by _bl_ind contains a card, sets
+    // _bl_ind to the index of that entry, and returns the card.
+    // Otherwise, returns SparseEntry::NullEnty.
+    short find_first_card_in_list();
+    // Computes the proper card index for the card whose offset in the
+    // current region (as indicated by _bl_ind) is "ci".
+    // This is subject to errors when there is iteration concurrent with
+    // modification, but these errors should be benign.
+    size_t compute_card_ind(short ci);
+
+  public:
+    RSHashTableIter(size_t heap_bot_card_ind) :
+      _tbl_ind(RSHashTable::NullEntry),
+      _bl_ind(RSHashTable::NullEntry),
+      _card_ind((SparsePRTEntry::CardsPerEntry-1)),
+      _rsht(NULL),
+      _heap_bot_card_ind(heap_bot_card_ind)
+    {}
+
+    void init(RSHashTable* rsht) {
+      _rsht = rsht;
+      _tbl_ind = -1; // So that first increment gets to 0.
+      _bl_ind = RSHashTable::NullEntry;
+      _card_ind = (SparsePRTEntry::CardsPerEntry-1);
+    }
+
+    bool has_next(size_t& card_index);
+
+  };
+
+// Concurrent accesss to a SparsePRT must be serialized by some external
+// mutex.
+
+class SparsePRTIter;
+
+class SparsePRT : public CHeapObj {
+  //  Iterations are done on the _cur hash table, since they only need to
+  //  see entries visible at the start of a collection pause.
+  //  All other operations are done using the _next hash table.
+  RSHashTable* _cur;
+  RSHashTable* _next;
+
+  HeapRegion* _hr;
+
+  enum SomeAdditionalPrivateConstants {
+    InitialCapacity = 16
+  };
+
+  void expand();
+
+  bool _expanded;
+
+  bool expanded() { return _expanded; }
+  void set_expanded(bool b) { _expanded = b; }
+
+  SparsePRT* _next_expanded;
+
+  SparsePRT* next_expanded() { return _next_expanded; }
+  void set_next_expanded(SparsePRT* nxt) { _next_expanded = nxt; }
+
+
+  static SparsePRT* _head_expanded_list;
+
+public:
+  SparsePRT(HeapRegion* hr);
+
+  ~SparsePRT();
+
+  size_t occupied() const { return _next->occupied_cards(); }
+  size_t mem_size() const;
+
+  // Attempts to ensure that the given card_index in the given region is in
+  // the sparse table.  If successful (because the card was already
+  // present, or because it was successfullly added) returns "true".
+  // Otherwise, returns "false" to indicate that the addition would
+  // overflow the entry for the region.  The caller must transfer these
+  // entries to a larger-capacity representation.
+  bool add_card(short region_id, short card_index);
+
+  // If the table hold an entry for "region_ind",  Copies its
+  // cards into "cards", which must be an array of length at least
+  // "CardsPerEntry", and returns "true"; otherwise, returns "false".
+  bool get_cards(short region_ind, short* cards);
+
+  // If there is an entry for "region_ind", removes it and return "true";
+  // otherwise returns "false."
+  bool delete_entry(short region_ind);
+
+  // Clear the table, and reinitialize to initial capacity.
+  void clear();
+
+  // Ensure that "_cur" and "_next" point to the same table.
+  void cleanup();
+
+  // Clean up all tables on the expanded list.  Called single threaded.
+  static void cleanup_all();
+  RSHashTable* next() const { return _next; }
+
+
+  void init_iterator(SparsePRTIter* sprt_iter);
+
+  static void add_to_expanded_list(SparsePRT* sprt);
+  static SparsePRT* get_from_expanded_list();
+
+  bool contains_card(short region_id, short card_index) const {
+    return _next->contains_card(region_id, card_index);
+  }
+
+#if 0
+  void verify_is_cleared();
+  void print();
+#endif
+};
+
+
+class SparsePRTIter: public /* RSHashTable:: */RSHashTableIter {
+public:
+  SparsePRTIter(size_t heap_bot_card_ind) :
+    /* RSHashTable:: */RSHashTableIter(heap_bot_card_ind)
+  {}
+
+  void init(const SparsePRT* sprt) {
+    RSHashTableIter::init(sprt->next());
+  }
+  bool has_next(size_t& card_index) {
+    return RSHashTableIter::has_next(card_index);
+  }
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/survRateGroup.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/survRateGroup.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,264 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_survRateGroup.cpp.incl"
+
+SurvRateGroup::SurvRateGroup(G1CollectorPolicy* g1p,
+                             const char* name,
+                             size_t summary_surv_rates_len) :
+    _g1p(g1p), _name(name),
+    _all_regions_allocated(0),
+    _curr_length(0), _scan_only_prefix(0), _setup_seq_num(0),
+    _array_length(0), _surv_rate(NULL), _accum_surv_rate_pred(NULL),
+    _accum_surv_rate(0.0), _surv_rate_pred(NULL), _last_pred(0.0),
+    _summary_surv_rates_len(summary_surv_rates_len),
+    _summary_surv_rates_max_len(0),
+    _summary_surv_rates(NULL) {
+
+  // the following will set up the arrays with length 1
+  _curr_length = 1;
+  stop_adding_regions();
+  guarantee( _array_length == 1, "invariant" );
+  guarantee( _surv_rate_pred[0] != NULL, "invariant" );
+  _surv_rate_pred[0]->add(0.4);
+  all_surviving_words_recorded(false);
+  _curr_length = 0;
+
+  if (summary_surv_rates_len > 0) {
+    size_t length = summary_surv_rates_len;
+      _summary_surv_rates = NEW_C_HEAP_ARRAY(NumberSeq*, length);
+    if (_summary_surv_rates == NULL) {
+      vm_exit_out_of_memory(sizeof(NumberSeq*) * length,
+                            "Not enough space for surv rate summary");
+    }
+    for (size_t i = 0; i < length; ++i)
+      _summary_surv_rates[i] = new NumberSeq();
+  }
+
+  start_adding_regions();
+}
+
+void
+SurvRateGroup::start_adding_regions() {
+  _setup_seq_num   = _array_length;
+  _curr_length     = _scan_only_prefix;
+  _accum_surv_rate = 0.0;
+
+#if 0
+  gclog_or_tty->print_cr("start adding regions, seq num %d, length %d",
+                         _setup_seq_num, _curr_length);
+#endif // 0
+}
+
+void
+SurvRateGroup::stop_adding_regions() {
+  size_t length = _curr_length;
+
+#if 0
+  gclog_or_tty->print_cr("stop adding regions, length %d", length);
+#endif // 0
+
+  if (length > _array_length) {
+    double* old_surv_rate = _surv_rate;
+    double* old_accum_surv_rate_pred = _accum_surv_rate_pred;
+    TruncatedSeq** old_surv_rate_pred = _surv_rate_pred;
+
+    _surv_rate = NEW_C_HEAP_ARRAY(double, length);
+    if (_surv_rate == NULL) {
+      vm_exit_out_of_memory(sizeof(double) * length,
+                            "Not enough space for surv rate array.");
+    }
+    _accum_surv_rate_pred = NEW_C_HEAP_ARRAY(double, length);
+    if (_accum_surv_rate_pred == NULL) {
+      vm_exit_out_of_memory(sizeof(double) * length,
+                         "Not enough space for accum surv rate pred array.");
+    }
+    _surv_rate_pred = NEW_C_HEAP_ARRAY(TruncatedSeq*, length);
+    if (_surv_rate == NULL) {
+      vm_exit_out_of_memory(sizeof(TruncatedSeq*) * length,
+                            "Not enough space for surv rate pred array.");
+    }
+
+    for (size_t i = 0; i < _array_length; ++i)
+      _surv_rate_pred[i] = old_surv_rate_pred[i];
+
+#if 0
+    gclog_or_tty->print_cr("stop adding regions, new seqs %d to %d",
+                  _array_length, length - 1);
+#endif // 0
+
+    for (size_t i = _array_length; i < length; ++i) {
+      _surv_rate_pred[i] = new TruncatedSeq(10);
+      // _surv_rate_pred[i]->add(last_pred);
+    }
+
+    _array_length = length;
+
+    if (old_surv_rate != NULL)
+      FREE_C_HEAP_ARRAY(double, old_surv_rate);
+    if (old_accum_surv_rate_pred != NULL)
+      FREE_C_HEAP_ARRAY(double, old_accum_surv_rate_pred);
+    if (old_surv_rate_pred != NULL)
+      FREE_C_HEAP_ARRAY(NumberSeq*, old_surv_rate_pred);
+  }
+
+  for (size_t i = 0; i < _array_length; ++i)
+    _surv_rate[i] = 0.0;
+}
+
+double
+SurvRateGroup::accum_surv_rate(size_t adjustment) {
+  // we might relax this one in the future...
+  guarantee( adjustment == 0 || adjustment == 1, "pre-condition" );
+
+  double ret = _accum_surv_rate;
+  if (adjustment > 0) {
+    TruncatedSeq* seq = get_seq(_curr_length+1);
+    double surv_rate = _g1p->get_new_prediction(seq);
+    ret += surv_rate;
+  }
+
+  return ret;
+}
+
+int
+SurvRateGroup::next_age_index() {
+  TruncatedSeq* seq = get_seq(_curr_length);
+  double surv_rate = _g1p->get_new_prediction(seq);
+  _accum_surv_rate += surv_rate;
+
+  ++_curr_length;
+  return (int) ++_all_regions_allocated;
+}
+
+void
+SurvRateGroup::record_scan_only_prefix(size_t scan_only_prefix) {
+  guarantee( scan_only_prefix <= _curr_length, "pre-condition" );
+  _scan_only_prefix = scan_only_prefix;
+}
+
+void
+SurvRateGroup::record_surviving_words(int age_in_group, size_t surv_words) {
+  guarantee( 0 <= age_in_group && (size_t) age_in_group < _curr_length,
+             "pre-condition" );
+  guarantee( _surv_rate[age_in_group] <= 0.00001,
+             "should only update each slot once" );
+
+  double surv_rate = (double) surv_words / (double) HeapRegion::GrainWords;
+  _surv_rate[age_in_group] = surv_rate;
+  _surv_rate_pred[age_in_group]->add(surv_rate);
+  if ((size_t)age_in_group < _summary_surv_rates_len) {
+    _summary_surv_rates[age_in_group]->add(surv_rate);
+    if ((size_t)(age_in_group+1) > _summary_surv_rates_max_len)
+      _summary_surv_rates_max_len = age_in_group+1;
+  }
+}
+
+void
+SurvRateGroup::all_surviving_words_recorded(bool propagate) {
+  if (propagate && _curr_length > 0) { // conservative
+    double surv_rate = _surv_rate_pred[_curr_length-1]->last();
+
+#if 0
+    gclog_or_tty->print_cr("propagating %1.2lf from %d to %d",
+                  surv_rate, _curr_length, _array_length - 1);
+#endif // 0
+
+    for (size_t i = _curr_length; i < _array_length; ++i) {
+      guarantee( _surv_rate[i] <= 0.00001,
+                 "the slot should not have been updated" );
+      _surv_rate_pred[i]->add(surv_rate);
+    }
+  }
+
+  double accum = 0.0;
+  double pred = 0.0;
+  for (size_t i = 0; i < _array_length; ++i) {
+    pred = _g1p->get_new_prediction(_surv_rate_pred[i]);
+    if (pred > 1.0) pred = 1.0;
+    accum += pred;
+    _accum_surv_rate_pred[i] = accum;
+    // gclog_or_tty->print_cr("age %3d, accum %10.2lf", i, accum);
+  }
+  _last_pred = pred;
+}
+
+#ifndef PRODUCT
+void
+SurvRateGroup::print() {
+  gclog_or_tty->print_cr("Surv Rate Group: %s (%d entries, %d scan-only)",
+                _name, _curr_length, _scan_only_prefix);
+  for (size_t i = 0; i < _curr_length; ++i) {
+    gclog_or_tty->print_cr("    age %4d   surv rate %6.2lf %%   pred %6.2lf %%%s",
+                  i, _surv_rate[i] * 100.0,
+                  _g1p->get_new_prediction(_surv_rate_pred[i]) * 100.0,
+                  (i < _scan_only_prefix) ? " S-O" : "    ");
+  }
+}
+
+void
+SurvRateGroup::print_surv_rate_summary() {
+  size_t length = _summary_surv_rates_max_len;
+  if (length == 0)
+    return;
+
+  gclog_or_tty->print_cr("");
+  gclog_or_tty->print_cr("%s Rate Summary (for up to age %d)", _name, length-1);
+  gclog_or_tty->print_cr("      age range     survival rate (avg)      samples (avg)");
+  gclog_or_tty->print_cr("  ---------------------------------------------------------");
+
+  size_t index = 0;
+  size_t limit = MIN2((int) length, 10);
+  while (index < limit) {
+    gclog_or_tty->print_cr("           %4d                 %6.2lf%%             %6.2lf",
+                  index, _summary_surv_rates[index]->avg() * 100.0,
+                  (double) _summary_surv_rates[index]->num());
+    ++index;
+  }
+
+  gclog_or_tty->print_cr("  ---------------------------------------------------------");
+
+  int num = 0;
+  double sum = 0.0;
+  int samples = 0;
+  while (index < length) {
+    ++num;
+    sum += _summary_surv_rates[index]->avg() * 100.0;
+    samples += _summary_surv_rates[index]->num();
+    ++index;
+
+    if (index == length || num % 10 == 0) {
+      gclog_or_tty->print_cr("   %4d .. %4d                 %6.2lf%%             %6.2lf",
+                    (index-1) / 10 * 10, index-1, sum / (double) num,
+                    (double) samples / (double) num);
+      sum = 0.0;
+      num = 0;
+      samples = 0;
+    }
+  }
+
+  gclog_or_tty->print_cr("  ---------------------------------------------------------");
+}
+#endif // PRODUCT
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/survRateGroup.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/survRateGroup.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class G1CollectorPolicy;
+
+class SurvRateGroup : public CHeapObj {
+private:
+  G1CollectorPolicy* _g1p;
+  const char* _name;
+
+  size_t  _array_length;
+  double* _surv_rate;
+  double* _accum_surv_rate_pred;
+  double  _last_pred;
+  double  _accum_surv_rate;
+  TruncatedSeq** _surv_rate_pred;
+  NumberSeq**    _summary_surv_rates;
+  size_t         _summary_surv_rates_len;
+  size_t         _summary_surv_rates_max_len;
+
+  int _all_regions_allocated;
+  size_t _curr_length;
+  size_t _scan_only_prefix;
+  size_t _setup_seq_num;
+
+public:
+  SurvRateGroup(G1CollectorPolicy* g1p,
+                const char* name,
+                size_t summary_surv_rates_len);
+  void start_adding_regions();
+  void stop_adding_regions();
+  void record_scan_only_prefix(size_t scan_only_prefix);
+  void record_surviving_words(int age_in_group, size_t surv_words);
+  void all_surviving_words_recorded(bool propagate);
+  const char* name() { return _name; }
+
+  size_t region_num() { return _curr_length; }
+  size_t scan_only_length() { return _scan_only_prefix; }
+  double accum_surv_rate_pred(int age) {
+    assert(age >= 0, "must be");
+    if ((size_t)age < _array_length)
+      return _accum_surv_rate_pred[age];
+    else {
+      double diff = (double) (age - _array_length + 1);
+      return _accum_surv_rate_pred[_array_length-1] + diff * _last_pred;
+    }
+  }
+
+  double accum_surv_rate(size_t adjustment);
+
+  TruncatedSeq* get_seq(size_t age) {
+    guarantee( 0 <= age, "pre-condition" );
+    if (age >= _setup_seq_num) {
+      guarantee( _setup_seq_num > 0, "invariant" );
+      age = _setup_seq_num-1;
+    }
+    TruncatedSeq* seq = _surv_rate_pred[age];
+    guarantee( seq != NULL, "invariant" );
+    return seq;
+  }
+
+  int next_age_index();
+  int age_in_group(int age_index) {
+    int ret = (int) (_all_regions_allocated -  age_index);
+    assert( ret >= 0, "invariant" );
+    return ret;
+  }
+  int recalculate_age_index(int age_index) {
+    int new_age_index = (int) _scan_only_prefix - age_in_group(age_index);
+    guarantee( new_age_index >= 0, "invariant" );
+    return new_age_index;
+  }
+  void finished_recalculating_age_indexes() {
+    _all_regions_allocated = (int) _scan_only_prefix;
+  }
+
+#ifndef PRODUCT
+  void print();
+  void print_surv_rate_summary();
+#endif // PRODUCT
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/vm_operations_g1.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_vm_operations_g1.cpp.incl"
+
+void VM_G1CollectForAllocation::doit() {
+  JvmtiGCForAllocationMarker jgcm;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  _res = g1h->satisfy_failed_allocation(_size);
+  assert(g1h->is_in_or_null(_res), "result not in heap");
+}
+
+void VM_G1CollectFull::doit() {
+  JvmtiGCFullMarker jgcm;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  GCCauseSetter x(g1h, _gc_cause);
+  g1h->do_full_collection(false /* clear_all_soft_refs */);
+}
+
+void VM_G1IncCollectionPause::doit() {
+  JvmtiGCForAllocationMarker jgcm;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  GCCauseSetter x(g1h, GCCause::_g1_inc_collection_pause);
+  g1h->do_collection_pause_at_safepoint(NULL);
+}
+
+void VM_G1PopRegionCollectionPause::doit() {
+  JvmtiGCForAllocationMarker jgcm;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  g1h->do_collection_pause_at_safepoint(_pop_region);
+}
+
+
+void VM_CGC_Operation::doit() {
+  gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
+  TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
+  TraceTime t(_printGCMessage, PrintGC, true, gclog_or_tty);
+  SharedHeap* sh = SharedHeap::heap();
+  // This could go away if CollectedHeap gave access to _gc_is_active...
+  if (sh != NULL) {
+    IsGCActiveMark x;
+    _cl->do_void();
+  } else {
+    _cl->do_void();
+  }
+}
+
+bool VM_CGC_Operation::doit_prologue() {
+  Heap_lock->lock();
+  SharedHeap::heap()->_thread_holds_heap_lock_for_gc = true;
+  return true;
+}
+
+void VM_CGC_Operation::doit_epilogue() {
+  SharedHeap::heap()->_thread_holds_heap_lock_for_gc = false;
+  Heap_lock->unlock();
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/g1/vm_operations_g1.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// VM_operations for the G1 collector.
+// VM_GC_Operation:
+//   - VM_CGC_Operation
+//   - VM_G1CollectFull
+//   - VM_G1CollectForAllocation
+//   - VM_G1IncCollectionPause
+//   - VM_G1PopRegionCollectionPause
+
+class VM_G1CollectFull: public VM_GC_Operation {
+ private:
+ public:
+  VM_G1CollectFull(int gc_count_before,
+                   GCCause::Cause gc_cause)
+    : VM_GC_Operation(gc_count_before)
+  {
+    _gc_cause = gc_cause;
+  }
+  ~VM_G1CollectFull() {}
+  virtual VMOp_Type type() const { return VMOp_G1CollectFull; }
+  virtual void doit();
+  virtual const char* name() const {
+    return "full garbage-first collection";
+  }
+};
+
+class VM_G1CollectForAllocation: public VM_GC_Operation {
+ private:
+  HeapWord*   _res;
+  size_t      _size;                       // size of object to be allocated
+ public:
+  VM_G1CollectForAllocation(size_t size, int gc_count_before)
+    : VM_GC_Operation(gc_count_before) {
+    _size        = size;
+    _res         = NULL;
+  }
+  ~VM_G1CollectForAllocation()        {}
+  virtual VMOp_Type type() const { return VMOp_G1CollectForAllocation; }
+  virtual void doit();
+  virtual const char* name() const {
+    return "garbage-first collection to satisfy allocation";
+  }
+  HeapWord* result() { return _res; }
+};
+
+class VM_G1IncCollectionPause: public VM_GC_Operation {
+ public:
+  VM_G1IncCollectionPause(int gc_count_before) :
+    VM_GC_Operation(gc_count_before) {}
+  virtual VMOp_Type type() const { return VMOp_G1IncCollectionPause; }
+  virtual void doit();
+  virtual const char* name() const {
+    return "garbage-first incremental collection pause";
+  }
+};
+
+class VM_G1PopRegionCollectionPause: public VM_GC_Operation {
+  HeapRegion* _pop_region;
+ public:
+  VM_G1PopRegionCollectionPause(int gc_count_before, HeapRegion* pop_region) :
+    VM_GC_Operation(gc_count_before),
+    _pop_region(pop_region)
+  {}
+  virtual VMOp_Type type() const { return VMOp_G1PopRegionCollectionPause; }
+  virtual void doit();
+  virtual const char* name() const {
+    return "garbage-first popular region collection pause";
+  }
+};
+
+// Concurrent GC stop-the-world operations such as initial and final mark;
+// consider sharing these with CMS's counterparts.
+class VM_CGC_Operation: public VM_Operation {
+  VoidClosure* _cl;
+  const char* _printGCMessage;
+ public:
+  VM_CGC_Operation(VoidClosure* cl, const char *printGCMsg) :
+    _cl(cl),
+    _printGCMessage(printGCMsg)
+    {}
+
+  ~VM_CGC_Operation() {}
+
+  virtual VMOp_Type type() const { return VMOp_CGC_Operation; }
+  virtual void doit();
+  virtual bool doit_prologue();
+  virtual void doit_epilogue();
+  virtual const char* name() const {
+    return "concurrent gc";
+  }
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep
--- a/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep	Thu Jun 05 15:57:56 2008 -0700
@@ -123,17 +123,6 @@
 
 compactingPermGenGen.cpp                concurrentMarkSweepGeneration.inline.hpp
 
-concurrentGCThread.cpp                  concurrentGCThread.hpp
-concurrentGCThread.cpp                  init.hpp
-concurrentGCThread.cpp                  instanceRefKlass.hpp
-concurrentGCThread.cpp                  interfaceSupport.hpp
-concurrentGCThread.cpp                  java.hpp
-concurrentGCThread.cpp                  javaCalls.hpp
-concurrentGCThread.cpp                  oop.inline.hpp
-concurrentGCThread.cpp                  systemDictionary.hpp
-
-concurrentGCThread.hpp                  thread.hpp
-
 concurrentMarkSweepGeneration.cpp       cardTableRS.hpp
 concurrentMarkSweepGeneration.cpp       cmsAdaptiveSizePolicy.hpp
 concurrentMarkSweepGeneration.cpp       cmsCollectorPolicy.hpp
@@ -165,7 +154,7 @@
 concurrentMarkSweepGeneration.cpp       vmCMSOperations.hpp
 concurrentMarkSweepGeneration.cpp       vmThread.hpp
 
-concurrentMarkSweepGeneration.hpp       bitMap.hpp
+concurrentMarkSweepGeneration.hpp       bitMap.inline.hpp
 concurrentMarkSweepGeneration.hpp       freeBlockDictionary.hpp
 concurrentMarkSweepGeneration.hpp       gSpaceCounters.hpp
 concurrentMarkSweepGeneration.hpp       gcStats.hpp
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/includeDB_gc_g1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/includeDB_gc_g1	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,349 @@
+//
+// Copyright 2004-2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+// CA 95054 USA or visit www.sun.com if you need additional information or
+// have any questions.
+//  
+//
+
+// NOTE: DO NOT CHANGE THIS COPYRIGHT TO NEW STYLE - IT WILL BREAK makeDeps!
+
+bufferingOopClosure.hpp			genOopClosures.hpp
+bufferingOopClosure.hpp			generation.hpp
+bufferingOopClosure.hpp			os.hpp
+
+cardTableRS.cpp				concurrentMark.hpp
+cardTableRS.cpp				g1SATBCardTableModRefBS.hpp
+
+collectionSetChooser.cpp		g1CollectedHeap.hpp
+collectionSetChooser.cpp		g1CollectorPolicy.hpp
+collectionSetChooser.cpp		collectionSetChooser.hpp
+
+collectionSetChooser.hpp		heapRegion.hpp
+collectionSetChooser.hpp                growableArray.hpp
+
+concurrentG1Refine.cpp			atomic.hpp
+concurrentG1Refine.cpp			concurrentG1Refine.hpp
+concurrentG1Refine.cpp			concurrentG1RefineThread.hpp
+concurrentG1Refine.cpp			copy.hpp
+concurrentG1Refine.cpp			g1CollectedHeap.hpp
+concurrentG1Refine.cpp			g1RemSet.hpp
+
+concurrentG1Refine.hpp			globalDefinitions.hpp
+
+concurrentG1RefineThread.cpp		concurrentG1Refine.hpp
+concurrentG1RefineThread.cpp		concurrentG1RefineThread.hpp
+concurrentG1RefineThread.cpp		g1CollectedHeap.hpp
+concurrentG1RefineThread.cpp            g1CollectorPolicy.hpp
+concurrentG1RefineThread.cpp		handles.inline.hpp
+concurrentG1RefineThread.cpp		mutexLocker.hpp
+concurrentG1RefineThread.cpp		resourceArea.hpp
+
+concurrentG1RefineThread.hpp		concurrentGCThread.hpp
+concurrentG1RefineThread.hpp		coTracker.hpp
+
+concurrentMark.cpp			concurrentMark.hpp
+concurrentMark.cpp			concurrentMarkThread.inline.hpp
+concurrentMark.cpp			g1CollectedHeap.inline.hpp
+concurrentMark.cpp                      g1CollectorPolicy.hpp
+concurrentMark.cpp                      g1RemSet.hpp
+concurrentMark.cpp		        gcOverheadReporter.hpp
+concurrentMark.cpp		        genOopClosures.inline.hpp
+concurrentMark.cpp                      heapRegionRemSet.hpp
+concurrentMark.cpp                      heapRegionSeq.inline.hpp
+concurrentMark.cpp                      handles.inline.hpp
+concurrentMark.cpp			java.hpp
+concurrentMark.cpp			oop.inline.hpp
+concurrentMark.cpp                      referencePolicy.hpp
+concurrentMark.cpp			resourceArea.hpp
+concurrentMark.cpp			symbolTable.hpp
+
+concurrentMark.hpp			coTracker.hpp
+concurrentMark.hpp			heapRegion.hpp
+concurrentMark.hpp			taskqueue.hpp
+
+concurrentMarkThread.cpp		concurrentMarkThread.inline.hpp
+concurrentMarkThread.cpp		g1CollectedHeap.inline.hpp
+concurrentMarkThread.cpp		g1CollectorPolicy.hpp
+concurrentMarkThread.cpp                g1MMUTracker.hpp
+concurrentMarkThread.cpp		resourceArea.hpp
+concurrentMarkThread.cpp		vm_operations_g1.hpp
+concurrentMarkThread.cpp                vmThread.hpp
+
+concurrentMarkThread.hpp		concurrentGCThread.hpp
+
+concurrentMarkThread.inline.hpp		concurrentMark.hpp
+concurrentMarkThread.inline.hpp		concurrentMarkThread.hpp
+
+concurrentZFThread.cpp			concurrentZFThread.hpp
+concurrentZFThread.cpp			heapRegion.hpp
+concurrentZFThread.cpp			g1CollectedHeap.inline.hpp
+concurrentZFThread.cpp			copy.hpp
+concurrentZFThread.cpp			mutexLocker.hpp
+concurrentZFThread.cpp			space.inline.hpp
+
+concurrentZFThread.hpp			concurrentGCThread.hpp
+concurrentZFThread.hpp			coTracker.hpp
+	  
+dirtyCardQueue.cpp                      atomic.hpp
+dirtyCardQueue.cpp                      dirtyCardQueue.hpp
+dirtyCardQueue.cpp			heapRegionRemSet.hpp
+dirtyCardQueue.cpp                      mutexLocker.hpp
+dirtyCardQueue.cpp                      ptrQueue.inline.hpp
+dirtyCardQueue.cpp                      safepoint.hpp
+dirtyCardQueue.cpp                      thread.hpp
+dirtyCardQueue.cpp                      thread_<os_family>.inline.hpp
+dirtyCardQueue.cpp                      workgroup.hpp
+
+dirtyCardQueue.hpp                      allocation.hpp
+dirtyCardQueue.hpp                      ptrQueue.hpp
+
+g1BlockOffsetTable.cpp			g1BlockOffsetTable.inline.hpp
+g1BlockOffsetTable.cpp			java.hpp
+g1BlockOffsetTable.cpp			oop.inline.hpp
+g1BlockOffsetTable.cpp			space.hpp
+
+g1BlockOffsetTable.hpp			globalDefinitions.hpp
+g1BlockOffsetTable.hpp			memRegion.hpp
+g1BlockOffsetTable.hpp			virtualspace.hpp
+
+g1BlockOffsetTable.inline.hpp		g1BlockOffsetTable.hpp
+g1BlockOffsetTable.inline.hpp		space.hpp
+
+g1CollectedHeap.cpp                     aprofiler.hpp
+g1CollectedHeap.cpp                     bufferingOopClosure.hpp
+g1CollectedHeap.cpp                     concurrentG1Refine.hpp
+g1CollectedHeap.cpp                     concurrentG1RefineThread.hpp
+g1CollectedHeap.cpp			concurrentMarkThread.inline.hpp
+g1CollectedHeap.cpp                     concurrentZFThread.hpp
+g1CollectedHeap.cpp                     g1CollectedHeap.inline.hpp
+g1CollectedHeap.cpp                     g1CollectorPolicy.hpp
+g1CollectedHeap.cpp                     g1MarkSweep.hpp
+g1CollectedHeap.cpp                     g1RemSet.hpp
+g1CollectedHeap.cpp                     g1OopClosures.inline.hpp
+g1CollectedHeap.cpp                     genOopClosures.inline.hpp
+g1CollectedHeap.cpp                     gcLocker.inline.hpp
+g1CollectedHeap.cpp                     gcOverheadReporter.hpp
+g1CollectedHeap.cpp                     generationSpec.hpp
+g1CollectedHeap.cpp                     heapRegionRemSet.hpp
+g1CollectedHeap.cpp                     heapRegionSeq.inline.hpp
+g1CollectedHeap.cpp                     icBuffer.hpp
+g1CollectedHeap.cpp                     isGCActiveMark.hpp
+g1CollectedHeap.cpp			oop.inline.hpp
+g1CollectedHeap.cpp			oop.pcgc.inline.hpp
+g1CollectedHeap.cpp			parGCAllocBuffer.hpp
+g1CollectedHeap.cpp                     vm_operations_g1.hpp
+g1CollectedHeap.cpp                     vmThread.hpp
+
+g1CollectedHeap.hpp                     barrierSet.hpp
+g1CollectedHeap.hpp                     heapRegion.hpp
+g1CollectedHeap.hpp                     memRegion.hpp
+g1CollectedHeap.hpp                     sharedHeap.hpp
+
+g1CollectedHeap.inline.hpp              concurrentMark.hpp
+g1CollectedHeap.inline.hpp              g1CollectedHeap.hpp
+g1CollectedHeap.inline.hpp              heapRegionSeq.hpp
+g1CollectedHeap.inline.hpp		taskqueue.hpp
+
+g1CollectorPolicy.cpp			concurrentG1Refine.hpp
+g1CollectorPolicy.cpp			concurrentMark.hpp
+g1CollectorPolicy.cpp			concurrentMarkThread.inline.hpp
+g1CollectorPolicy.cpp			debug.hpp
+g1CollectorPolicy.cpp			java.hpp
+g1CollectorPolicy.cpp                   g1CollectedHeap.hpp
+g1CollectorPolicy.cpp                   g1CollectorPolicy.hpp
+g1CollectorPolicy.cpp                   heapRegionRemSet.hpp
+g1CollectorPolicy.cpp			mutexLocker.hpp
+
+g1CollectorPolicy.hpp                   collectorPolicy.hpp
+g1CollectorPolicy.hpp                   collectionSetChooser.hpp
+g1CollectorPolicy.hpp			g1MMUTracker.hpp
+
+g1_globals.cpp				g1_globals.hpp
+
+g1_globals.hpp                          globals.hpp
+
+globals.cpp                             g1_globals.hpp
+top.hpp                                 g1_globals.hpp
+
+g1MarkSweep.cpp                         aprofiler.hpp
+g1MarkSweep.cpp                         codeCache.hpp
+g1MarkSweep.cpp                         events.hpp
+g1MarkSweep.cpp                         fprofiler.hpp
+g1MarkSweep.hpp                         g1CollectedHeap.hpp
+g1MarkSweep.cpp                         g1MarkSweep.hpp
+g1MarkSweep.cpp                         gcLocker.hpp
+g1MarkSweep.cpp                         genCollectedHeap.hpp
+g1MarkSweep.hpp                         heapRegion.hpp
+g1MarkSweep.cpp                         icBuffer.hpp
+g1MarkSweep.cpp                         instanceRefKlass.hpp
+g1MarkSweep.cpp                         javaClasses.hpp
+g1MarkSweep.cpp				jvmtiExport.hpp
+g1MarkSweep.cpp                         copy.hpp
+g1MarkSweep.cpp                         modRefBarrierSet.hpp
+g1MarkSweep.cpp                         oop.inline.hpp
+g1MarkSweep.cpp                         referencePolicy.hpp
+g1MarkSweep.cpp                         space.hpp
+g1MarkSweep.cpp                         symbolTable.hpp
+g1MarkSweep.cpp                         synchronizer.hpp
+g1MarkSweep.cpp                         systemDictionary.hpp
+g1MarkSweep.cpp                         thread.hpp
+g1MarkSweep.cpp                         vmSymbols.hpp
+g1MarkSweep.cpp                         vmThread.hpp
+
+g1MarkSweep.hpp                         generation.hpp
+g1MarkSweep.hpp                         growableArray.hpp
+g1MarkSweep.hpp                         markOop.hpp
+g1MarkSweep.hpp                         genMarkSweep.hpp
+g1MarkSweep.hpp                         oop.hpp
+g1MarkSweep.hpp                         timer.hpp
+g1MarkSweep.hpp                         universe.hpp
+
+g1OopClosures.inline.hpp		concurrentMark.hpp
+g1OopClosures.inline.hpp		g1OopClosures.hpp
+g1OopClosures.inline.hpp		g1CollectedHeap.hpp
+g1OopClosures.inline.hpp		g1RemSet.hpp
+
+g1MMUTracker.cpp			g1MMUTracker.hpp
+g1MMUTracker.cpp			ostream.hpp
+g1MMUTracker.cpp			mutexLocker.hpp
+
+g1MMUTracker.hpp			debug.hpp
+
+g1RemSet.cpp				bufferingOopClosure.hpp
+g1RemSet.cpp				concurrentG1Refine.hpp
+g1RemSet.cpp				concurrentG1RefineThread.hpp
+g1RemSet.cpp				g1BlockOffsetTable.inline.hpp
+g1RemSet.cpp				g1CollectedHeap.inline.hpp
+g1RemSet.cpp				g1CollectorPolicy.hpp
+g1RemSet.cpp				g1RemSet.inline.hpp
+g1RemSet.cpp				g1OopClosures.inline.hpp
+g1RemSet.cpp				heapRegionSeq.inline.hpp
+g1RemSet.cpp				intHisto.hpp
+g1RemSet.cpp				iterator.hpp
+g1RemSet.cpp				oop.inline.hpp
+
+g1RemSet.inline.hpp			g1RemSet.hpp
+g1RemSet.inline.hpp			heapRegionRemSet.hpp
+
+g1SATBCardTableModRefBS.cpp		g1SATBCardTableModRefBS.hpp
+g1SATBCardTableModRefBS.cpp		heapRegion.hpp
+g1SATBCardTableModRefBS.cpp		mutexLocker.hpp
+g1SATBCardTableModRefBS.cpp		thread.hpp
+g1SATBCardTableModRefBS.cpp		thread_<os_family>.inline.hpp
+g1SATBCardTableModRefBS.cpp		satbQueue.hpp
+
+g1SATBCardTableModRefBS.hpp		cardTableModRefBS.hpp
+g1SATBCardTableModRefBS.hpp		memRegion.hpp
+
+heapRegion.cpp                          concurrentZFThread.hpp
+heapRegion.cpp                          g1BlockOffsetTable.inline.hpp
+heapRegion.cpp                          g1CollectedHeap.inline.hpp
+heapRegion.cpp                          g1OopClosures.inline.hpp
+heapRegion.cpp                          genOopClosures.inline.hpp
+heapRegion.cpp                          heapRegion.inline.hpp
+heapRegion.cpp                          heapRegionRemSet.hpp
+heapRegion.cpp                          heapRegionSeq.inline.hpp
+heapRegion.cpp                          iterator.hpp
+heapRegion.cpp                          oop.inline.hpp
+
+heapRegion.hpp                          space.hpp
+heapRegion.hpp                          g1BlockOffsetTable.hpp
+heapRegion.hpp                          watermark.hpp
+heapRegion.hpp				g1_specialized_oop_closures.hpp
+heapRegion.hpp				survRateGroup.hpp
+
+heapRegionRemSet.hpp			sparsePRT.hpp
+
+heapRegionRemSet.cpp                    allocation.hpp
+heapRegionRemSet.cpp                    bitMap.inline.hpp
+heapRegionRemSet.cpp                    g1BlockOffsetTable.inline.hpp
+heapRegionRemSet.cpp                    g1CollectedHeap.inline.hpp
+heapRegionRemSet.cpp                    heapRegionRemSet.hpp
+heapRegionRemSet.cpp			heapRegionSeq.inline.hpp
+heapRegionRemSet.cpp                    globalDefinitions.hpp
+heapRegionRemSet.cpp                    space.inline.hpp
+
+heapRegionSeq.cpp                       allocation.hpp
+heapRegionSeq.cpp                       g1CollectedHeap.hpp
+heapRegionSeq.cpp                       heapRegionSeq.hpp
+
+heapRegionSeq.hpp                       growableArray.hpp
+heapRegionSeq.hpp                       heapRegion.hpp
+
+heapRegionSeq.inline.hpp                heapRegionSeq.hpp
+
+klass.hpp				g1OopClosures.hpp
+
+ptrQueue.cpp                            allocation.hpp
+ptrQueue.cpp                            allocation.inline.hpp
+ptrQueue.cpp                            mutex.hpp
+ptrQueue.cpp                            mutexLocker.hpp
+ptrQueue.cpp                            ptrQueue.hpp
+ptrQueue.cpp                            ptrQueue.inline.hpp
+ptrQueue.cpp                            thread_<os_family>.inline.hpp
+
+ptrQueue.hpp                            allocation.hpp
+ptrQueue.hpp                            sizes.hpp
+
+ptrQueue.inline.hpp                     ptrQueue.hpp
+
+satbQueue.cpp                           allocation.inline.hpp
+satbQueue.cpp                           mutexLocker.hpp
+satbQueue.cpp                           ptrQueue.inline.hpp
+satbQueue.cpp                           satbQueue.hpp
+satbQueue.cpp                           sharedHeap.hpp
+satbQueue.cpp                           thread.hpp
+
+satbQueue.hpp                           ptrQueue.hpp
+
+sparsePRT.cpp				allocation.inline.hpp
+sparsePRT.cpp				cardTableModRefBS.hpp
+sparsePRT.cpp				heapRegion.hpp
+sparsePRT.cpp				heapRegionRemSet.hpp
+sparsePRT.cpp				mutexLocker.hpp
+sparsePRT.cpp				sparsePRT.hpp
+sparsePRT.cpp				space.inline.hpp
+
+sparsePRT.hpp				allocation.hpp
+sparsePRT.hpp				cardTableModRefBS.hpp
+sparsePRT.hpp				globalDefinitions.hpp
+sparsePRT.hpp				heapRegion.hpp
+sparsePRT.hpp				mutex.hpp
+
+specialized_oop_closures.hpp		g1_specialized_oop_closures.hpp
+
+survRateGroup.hpp			numberSeq.hpp
+
+survRateGroup.cpp			allocation.hpp
+survRateGroup.cpp			g1CollectedHeap.hpp
+survRateGroup.cpp			g1CollectorPolicy.hpp
+survRateGroup.cpp			heapRegion.hpp
+survRateGroup.cpp			survRateGroup.hpp
+
+thread.cpp				concurrentMarkThread.inline.hpp
+
+universe.cpp                            g1CollectedHeap.hpp
+universe.cpp                            g1CollectorPolicy.hpp
+
+vm_operations_g1.hpp			vmGCOperations.hpp
+
+vm_operations_g1.cpp			vm_operations_g1.hpp
+vm_operations_g1.cpp                    g1CollectedHeap.hpp
+vm_operations_g1.cpp                    isGCActiveMark.hpp
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/includeDB_gc_parallelScavenge
--- a/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge	Thu Jun 05 15:57:56 2008 -0700
@@ -148,7 +148,6 @@
 parallelScavengeHeap.hpp                psYoungGen.hpp
 parallelScavengeHeap.hpp                ostream.hpp
 
-parMarkBitMap.cpp			bitMap.hpp
 parMarkBitMap.cpp			bitMap.inline.hpp
 parMarkBitMap.cpp			oop.inline.hpp
 parMarkBitMap.cpp			os.hpp
@@ -157,7 +156,6 @@
 parMarkBitMap.cpp			parMarkBitMap.inline.hpp
 parMarkBitMap.cpp                       psParallelCompact.hpp
 
-parMarkBitMap.hpp			bitMap.hpp
 parMarkBitMap.hpp			bitMap.inline.hpp
 parMarkBitMap.hpp			psVirtualspace.hpp
 
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/includeDB_gc_shared
--- a/src/share/vm/gc_implementation/includeDB_gc_shared	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/gc_implementation/includeDB_gc_shared	Thu Jun 05 15:57:56 2008 -0700
@@ -24,6 +24,23 @@
 
 // NOTE: DO NOT CHANGE THIS COPYRIGHT TO NEW STYLE - IT WILL BREAK makeDeps!
 
+concurrentGCThread.cpp                  concurrentGCThread.hpp
+concurrentGCThread.cpp                  init.hpp
+concurrentGCThread.cpp                  instanceRefKlass.hpp
+concurrentGCThread.cpp                  interfaceSupport.hpp
+concurrentGCThread.cpp                  java.hpp
+concurrentGCThread.cpp                  javaCalls.hpp
+concurrentGCThread.cpp                  oop.inline.hpp
+concurrentGCThread.cpp                  systemDictionary.hpp
+
+concurrentGCThread.hpp                  thread.hpp
+
+coTracker.hpp                           globalDefinitions.hpp
+coTracker.hpp                           numberSeq.hpp
+
+coTracker.cpp                           coTracker.hpp
+coTracker.cpp                           os.hpp
+
 allocationStats.cpp                     allocationStats.hpp
 allocationStats.cpp                     ostream.hpp
 
@@ -37,6 +54,13 @@
 gcAdaptivePolicyCounters.cpp            resourceArea.hpp
 gcAdaptivePolicyCounters.cpp            gcAdaptivePolicyCounters.hpp
 
+gcOverheadReporter.cpp                  allocation.inline.hpp
+gcOverheadReporter.cpp                  concurrentGCThread.hpp
+gcOverheadReporter.cpp                  coTracker.hpp
+gcOverheadReporter.cpp                  gcOverheadReporter.hpp
+gcOverheadReporter.cpp                  ostream.hpp
+gcOverheadReporter.cpp                  thread_<os_family>.inline.hpp
+
 gSpaceCounters.cpp                      generation.hpp
 gSpaceCounters.cpp                      resourceArea.hpp
 gSpaceCounters.cpp                      gSpaceCounters.hpp
@@ -72,3 +96,5 @@
 spaceCounters.hpp                       mutableSpace.hpp
 spaceCounters.hpp                       perfData.hpp
 spaceCounters.hpp                       generationCounters.hpp
+
+vmGCOperations.cpp                      g1CollectedHeap.hpp
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.hpp
--- a/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -192,16 +192,16 @@
 };
 
 inline ParMarkBitMap::ParMarkBitMap():
-  _beg_bits(NULL, 0),
-  _end_bits(NULL, 0)
+  _beg_bits(),
+  _end_bits()
 {
   _region_start = 0;
   _virtual_space = 0;
 }
 
 inline ParMarkBitMap::ParMarkBitMap(MemRegion covered_region):
-  _beg_bits(NULL, 0),
-  _end_bits(NULL, 0)
+  _beg_bits(),
+  _end_bits()
 {
   initialize(covered_region);
 }
@@ -325,7 +325,7 @@
 
 inline size_t ParMarkBitMap::obj_size(idx_t beg_bit) const
 {
-  const idx_t end_bit = _end_bits.find_next_one_bit(beg_bit, size());
+  const idx_t end_bit = _end_bits.get_next_one_offset_inline(beg_bit, size());
   assert(is_marked(beg_bit), "obj not marked");
   assert(end_bit < size(), "end bit missing");
   return obj_size(beg_bit, end_bit);
@@ -384,13 +384,13 @@
 inline ParMarkBitMap::idx_t
 ParMarkBitMap::find_obj_beg(idx_t beg, idx_t end) const
 {
-  return _beg_bits.find_next_one_bit(beg, end);
+  return _beg_bits.get_next_one_offset_inline_aligned_right(beg, end);
 }
 
 inline ParMarkBitMap::idx_t
 ParMarkBitMap::find_obj_end(idx_t beg, idx_t end) const
 {
-  return _end_bits.find_next_one_bit(beg, end);
+  return _end_bits.get_next_one_offset_inline_aligned_right(beg, end);
 }
 
 inline HeapWord*
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -184,6 +184,20 @@
   size_t tlab_capacity(Thread* thr) const;
   size_t unsafe_max_tlab_alloc(Thread* thr) const;
 
+  // Can a compiler initialize a new object without store barriers?
+  // This permission only extends from the creation of a new object
+  // via a TLAB up to the first subsequent safepoint.
+  virtual bool can_elide_tlab_store_barriers() const {
+    return true;
+  }
+
+  // Can a compiler elide a store barrier when it writes
+  // a permanent oop into the heap?  Applies when the compiler
+  // is storing x to the heap, where x->is_perm() is true.
+  virtual bool can_elide_permanent_oop_store_barriers() const {
+    return true;
+  }
+
   void oop_iterate(OopClosure* cl);
   void object_iterate(ObjectClosure* cl);
   void permanent_oop_iterate(OopClosure* cl);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/shared/coTracker.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/shared/coTracker.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_coTracker.cpp.incl"
+
+COTracker* COTracker::_head = NULL;
+double COTracker::_cpu_number = -1.0;
+
+void
+COTracker::resetPeriod(double now_sec, double vnow_sec) {
+  guarantee( _enabled, "invariant" );
+  _period_start_time_sec  = now_sec;
+  _period_start_vtime_sec = vnow_sec;
+}
+
+void
+COTracker::setConcOverhead(double time_stamp_sec,
+                           double conc_overhead) {
+  guarantee( _enabled, "invariant" );
+  _conc_overhead  = conc_overhead;
+  _time_stamp_sec = time_stamp_sec;
+  if (conc_overhead > 0.001)
+    _conc_overhead_seq.add(conc_overhead);
+}
+
+void
+COTracker::reset(double starting_conc_overhead) {
+  guarantee( _enabled, "invariant" );
+  double now_sec = os::elapsedTime();
+  setConcOverhead(now_sec, starting_conc_overhead);
+}
+
+void
+COTracker::start() {
+  guarantee( _enabled, "invariant" );
+  resetPeriod(os::elapsedTime(), os::elapsedVTime());
+}
+
+void
+COTracker::update(bool force_end) {
+  assert( _enabled, "invariant" );
+  double end_time_sec = os::elapsedTime();
+  double elapsed_time_sec = end_time_sec - _period_start_time_sec;
+  if (force_end || elapsed_time_sec > _update_period_sec) {
+    // reached the end of the period
+    double end_vtime_sec = os::elapsedVTime();
+    double elapsed_vtime_sec = end_vtime_sec - _period_start_vtime_sec;
+
+    double conc_overhead = elapsed_vtime_sec / elapsed_time_sec;
+
+    setConcOverhead(end_time_sec, conc_overhead);
+    resetPeriod(end_time_sec, end_vtime_sec);
+  }
+}
+
+void
+COTracker::updateForSTW(double start_sec, double end_sec) {
+  if (!_enabled)
+    return;
+
+  // During a STW pause, no concurrent GC thread has done any
+  // work. So, we can safely adjust the start of the current period by
+  // adding the duration of the STW pause to it, so that the STW pause
+  // doesn't affect the reading of the concurrent overhead (it's
+  // basically like excluding the time of the STW pause from the
+  // concurrent overhead calculation).
+
+  double stw_duration_sec = end_sec - start_sec;
+  guarantee( stw_duration_sec > 0.0, "invariant" );
+
+  if (outOfDate(start_sec))
+    _conc_overhead = 0.0;
+  else
+    _time_stamp_sec = end_sec;
+  _period_start_time_sec += stw_duration_sec;
+  _conc_overhead_seq = NumberSeq();
+
+  guarantee( os::elapsedTime() > _period_start_time_sec, "invariant" );
+}
+
+double
+COTracker::predConcOverhead() {
+  if (_enabled) {
+    // tty->print(" %1.2lf", _conc_overhead_seq.maximum());
+    return _conc_overhead_seq.maximum();
+  } else {
+    // tty->print(" DD");
+    return 0.0;
+  }
+}
+
+void
+COTracker::resetPred() {
+  _conc_overhead_seq = NumberSeq();
+}
+
+COTracker::COTracker(int group)
+    : _enabled(false),
+      _group(group),
+      _period_start_time_sec(-1.0),
+      _period_start_vtime_sec(-1.0),
+      _conc_overhead(-1.0),
+      _time_stamp_sec(-1.0),
+      _next(NULL) {
+  // GCOverheadReportingPeriodMS indicates how frequently the
+  // concurrent overhead will be recorded by the GC Overhead
+  // Reporter. We want to take readings less often than that. If we
+  // took readings more often than some of them might be lost.
+  _update_period_sec = ((double) GCOverheadReportingPeriodMS) / 1000.0 * 1.25;
+  _next = _head;
+  _head = this;
+
+  if (_cpu_number < 0.0)
+    _cpu_number = (double) os::processor_count();
+}
+
+// statics
+
+void
+COTracker::updateAllForSTW(double start_sec, double end_sec) {
+  for (COTracker* curr = _head; curr != NULL; curr = curr->_next) {
+    curr->updateForSTW(start_sec, end_sec);
+  }
+}
+
+double
+COTracker::totalConcOverhead(double now_sec) {
+  double total_conc_overhead = 0.0;
+
+  for (COTracker* curr = _head; curr != NULL; curr = curr->_next) {
+    double conc_overhead = curr->concOverhead(now_sec);
+    total_conc_overhead += conc_overhead;
+  }
+
+  return total_conc_overhead;
+}
+
+double
+COTracker::totalConcOverhead(double now_sec,
+                             size_t group_num,
+                             double* co_per_group) {
+  double total_conc_overhead = 0.0;
+
+  for (size_t i = 0; i < group_num; ++i)
+    co_per_group[i] = 0.0;
+
+  for (COTracker* curr = _head; curr != NULL; curr = curr->_next) {
+    size_t group = curr->_group;
+    assert( 0 <= group && group < group_num, "invariant" );
+    double conc_overhead = curr->concOverhead(now_sec);
+
+    co_per_group[group] += conc_overhead;
+    total_conc_overhead += conc_overhead;
+  }
+
+  return total_conc_overhead;
+}
+
+double
+COTracker::totalPredConcOverhead() {
+  double total_pred_conc_overhead = 0.0;
+  for (COTracker* curr = _head; curr != NULL; curr = curr->_next) {
+    total_pred_conc_overhead += curr->predConcOverhead();
+    curr->resetPred();
+  }
+  return total_pred_conc_overhead / _cpu_number;
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/shared/coTracker.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/shared/coTracker.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,181 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// COTracker keeps track of the concurrent overhead of a GC thread.
+
+// A thread that needs to be tracked must, itself, start up its
+// tracker with the start() method and then call the update() method
+// at regular intervals. What the tracker does is to calculate the
+// concurrent overhead of a process at a given update period. The
+// tracker starts and when is detects that it has exceeded the given
+// period, it calculates the duration of the period in wall-clock time
+// and the duration of the period in vtime (i.e. how much time the
+// concurrent processes really took up during this period). The ratio
+// of the latter over the former is the concurrent overhead of that
+// process for that period over a single CPU. This overhead is stored
+// on the tracker, "timestamped" with the wall-clock time of the end
+// of the period. When the concurrent overhead of this process needs
+// to be queried, this last "reading" provides a good approximation
+// (we assume that the concurrent overhead of a particular thread
+// stays largely constant over time). The timestamp is necessary to
+// detect when the process has stopped working and the recorded
+// reading hasn't been updated for some time.
+
+// Each concurrent GC thread is considered to be part of a "group"
+// (i.e. any available concurrent marking threads are part of the
+// "concurrent marking thread group"). A COTracker is associated with
+// a single group at construction-time. It's up to each collector to
+// decide how groups will be mapped to such an id (ids should start
+// from 0 and be consecutive; there's a hardcoded max group num
+// defined on the GCOverheadTracker class). The notion of a group has
+// been introduced to be able to identify how much overhead was
+// imposed by each group, instead of getting a single value that
+// covers all concurrent overhead.
+
+class COTracker {
+private:
+  // It indicates whether this tracker is enabled or not. When the
+  // tracker is disabled, then it returns 0.0 as the latest concurrent
+  // overhead and several methods (reset, start, and update) are not
+  // supposed to be called on it. This enabling / disabling facility
+  // is really provided to make a bit more explicit in the code when a
+  // particulary tracker of a processes that doesn't run all the time
+  // (e.g. concurrent marking) is supposed to be used and not it's not.
+  bool               _enabled;
+
+  // The ID of the group associated with this tracker.
+  int                _group;
+
+  // The update period of the tracker. A new value for the concurrent
+  // overhead of the associated process will be made at intervals no
+  // smaller than this.
+  double             _update_period_sec;
+
+  // The start times (both wall-block time and vtime) of the current
+  // interval.
+  double             _period_start_time_sec;
+  double             _period_start_vtime_sec;
+
+  // Number seq of the concurrent overhead readings within a period
+  NumberSeq          _conc_overhead_seq;
+
+  // The latest reading of the concurrent overhead (over a single CPU)
+  // imposed by the associated concurrent thread, made available at
+  // the indicated wall-clock time.
+  double             _conc_overhead;
+  double             _time_stamp_sec;
+
+  // The number of CPUs that the host machine has (for convenience
+  // really, as we'd have to keep translating it into a double)
+  static double      _cpu_number;
+
+  // Fields that keep a list of all trackers created. This is useful,
+  // since it allows us to sum up the concurrent overhead without
+  // having to write code for a specific collector to broadcast a
+  // request to all its concurrent processes.
+  COTracker*         _next;
+  static COTracker*  _head;
+
+  // It indicates that a new period is starting by updating the
+  // _period_start_time_sec and _period_start_vtime_sec fields.
+  void resetPeriod(double now_sec, double vnow_sec);
+  // It updates the latest concurrent overhead reading, taken at a
+  // given wall-clock time.
+  void setConcOverhead(double time_stamp_sec, double conc_overhead);
+
+  // It determines whether the time stamp of the latest concurrent
+  // overhead reading is out of date or not.
+  bool outOfDate(double now_sec) {
+    // The latest reading is considered out of date, if it was taken
+    // 1.2x the update period.
+    return (now_sec - _time_stamp_sec) > 1.2 * _update_period_sec;
+  }
+
+public:
+  // The constructor which associates the tracker with a group ID.
+  COTracker(int group);
+
+  // Methods to enable / disable the tracker and query whether it is enabled.
+  void enable()  { _enabled = true;  }
+  void disable() { _enabled = false; }
+  bool enabled() { return _enabled;  }
+
+  // It resets the tracker and sets concurrent overhead reading to be
+  // the given parameter and the associated time stamp to be now.
+  void reset(double starting_conc_overhead = 0.0);
+  // The tracker starts tracking. IT should only be called from the
+  // concurrent thread that is tracked by this tracker.
+  void start();
+  // It updates the tracker and, if the current period is longer than
+  // the update period, the concurrent overhead reading will be
+  // updated. force_end being true indicates that it's the last call
+  // to update() by this process before the tracker is disabled (the
+  // tracker can be re-enabled later if necessary).  It should only be
+  // called from the concurrent thread that is tracked by this tracker
+  // and while the thread has joined the STS.
+  void update(bool force_end = false);
+  // It adjusts the contents of the tracker to take into account a STW
+  // pause.
+  void updateForSTW(double start_sec, double end_sec);
+
+  // It returns the last concurrent overhead reading over a single
+  // CPU. If the reading is out of date, or the tracker is disabled,
+  // it returns 0.0.
+  double concCPUOverhead(double now_sec) {
+    if (!_enabled || outOfDate(now_sec))
+      return 0.0;
+    else
+      return _conc_overhead;
+  }
+
+  // It returns the last concurrent overhead reading over all CPUs
+  // that the host machine has. If the reading is out of date, or the
+  // tracker is disabled, it returns 0.0.
+  double concOverhead(double now_sec) {
+    return concCPUOverhead(now_sec) / _cpu_number;
+  }
+
+  double predConcOverhead();
+
+  void resetPred();
+
+  // statics
+
+  // It notifies all trackers about a STW pause.
+  static void updateAllForSTW(double start_sec, double end_sec);
+
+  // It returns the sum of the concurrent overhead readings of all
+  // available (and enabled) trackers for the given time stamp. The
+  // overhead is over all the CPUs of the host machine.
+
+  static double totalConcOverhead(double now_sec);
+  // Like the previous method, but it also sums up the overheads per
+  // group number. The length of the co_per_group array must be at
+  // least as large group_num
+  static double totalConcOverhead(double now_sec,
+                                  size_t group_num,
+                                  double* co_per_group);
+
+  static double totalPredConcOverhead();
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/shared/concurrentGCThread.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,314 @@
+/*
+ * Copyright 2001-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// CopyrightVersion 1.2
+
+# include "incls/_precompiled.incl"
+# include "incls/_concurrentGCThread.cpp.incl"
+
+bool ConcurrentGCThread::_should_terminate    = false;
+bool ConcurrentGCThread::_has_terminated      = false;
+int  ConcurrentGCThread::_CGC_flag            = CGC_nil;
+
+SuspendibleThreadSet ConcurrentGCThread::_sts;
+
+ConcurrentGCThread::ConcurrentGCThread() {
+  _sts.initialize();
+};
+
+void ConcurrentGCThread::stopWorldAndDo(VoidClosure* op) {
+  MutexLockerEx x(Heap_lock,
+                  Mutex::_no_safepoint_check_flag);
+  // warning("CGC: about to try stopping world");
+  SafepointSynchronize::begin();
+  // warning("CGC: successfully stopped world");
+  op->do_void();
+  SafepointSynchronize::end();
+  // warning("CGC: successfully restarted world");
+}
+
+void ConcurrentGCThread::safepoint_synchronize() {
+  _sts.suspend_all();
+}
+
+void ConcurrentGCThread::safepoint_desynchronize() {
+  _sts.resume_all();
+}
+
+void ConcurrentGCThread::create_and_start() {
+  if (os::create_thread(this, os::cgc_thread)) {
+    // XXX: need to set this to low priority
+    // unless "agressive mode" set; priority
+    // should be just less than that of VMThread.
+    os::set_priority(this, NearMaxPriority);
+    if (!_should_terminate && !DisableStartThread) {
+      os::start_thread(this);
+    }
+  }
+}
+
+void ConcurrentGCThread::initialize_in_thread() {
+  this->record_stack_base_and_size();
+  this->initialize_thread_local_storage();
+  this->set_active_handles(JNIHandleBlock::allocate_block());
+  // From this time Thread::current() should be working.
+  assert(this == Thread::current(), "just checking");
+}
+
+void ConcurrentGCThread::wait_for_universe_init() {
+  MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
+  while (!is_init_completed() && !_should_terminate) {
+    CGC_lock->wait(Mutex::_no_safepoint_check_flag, 200);
+  }
+}
+
+void ConcurrentGCThread::terminate() {
+  // Signal that it is terminated
+  {
+    MutexLockerEx mu(Terminator_lock,
+                     Mutex::_no_safepoint_check_flag);
+    _has_terminated = true;
+    Terminator_lock->notify();
+  }
+
+  // Thread destructor usually does this..
+  ThreadLocalStorage::set_thread(NULL);
+}
+
+
+void SuspendibleThreadSet::initialize_work() {
+  MutexLocker x(STS_init_lock);
+  if (!_initialized) {
+    _m             = new Monitor(Mutex::leaf,
+                                 "SuspendibleThreadSetLock", true);
+    _async         = 0;
+    _async_stop    = false;
+    _async_stopped = 0;
+    _initialized   = true;
+  }
+}
+
+void SuspendibleThreadSet::join() {
+  initialize();
+  MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
+  while (_async_stop) _m->wait(Mutex::_no_safepoint_check_flag);
+  _async++;
+  assert(_async > 0, "Huh.");
+}
+
+void SuspendibleThreadSet::leave() {
+  assert(_initialized, "Must be initialized.");
+  MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
+  _async--;
+  assert(_async >= 0, "Huh.");
+  if (_async_stop) _m->notify_all();
+}
+
+void SuspendibleThreadSet::yield(const char* id) {
+  assert(_initialized, "Must be initialized.");
+  if (_async_stop) {
+    MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
+    if (_async_stop) {
+      _async_stopped++;
+      assert(_async_stopped > 0, "Huh.");
+      if (_async_stopped == _async) {
+        if (ConcGCYieldTimeout > 0) {
+          double now = os::elapsedTime();
+          guarantee((now - _suspend_all_start) * 1000.0 <
+                    (double)ConcGCYieldTimeout,
+                    "Long delay; whodunit?");
+        }
+      }
+      _m->notify_all();
+      while (_async_stop) _m->wait(Mutex::_no_safepoint_check_flag);
+      _async_stopped--;
+      assert(_async >= 0, "Huh");
+      _m->notify_all();
+    }
+  }
+}
+
+void SuspendibleThreadSet::suspend_all() {
+  initialize();  // If necessary.
+  if (ConcGCYieldTimeout > 0) {
+    _suspend_all_start = os::elapsedTime();
+  }
+  MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
+  assert(!_async_stop, "Only one at a time.");
+  _async_stop = true;
+  while (_async_stopped < _async) _m->wait(Mutex::_no_safepoint_check_flag);
+}
+
+void SuspendibleThreadSet::resume_all() {
+  assert(_initialized, "Must be initialized.");
+  MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
+  assert(_async_stopped == _async, "Huh.");
+  _async_stop = false;
+  _m->notify_all();
+}
+
+static void _sltLoop(JavaThread* thread, TRAPS) {
+  SurrogateLockerThread* slt = (SurrogateLockerThread*)thread;
+  slt->loop();
+}
+
+SurrogateLockerThread::SurrogateLockerThread() :
+  JavaThread(&_sltLoop),
+  _monitor(Mutex::nonleaf, "SLTMonitor"),
+  _buffer(empty)
+{}
+
+SurrogateLockerThread* SurrogateLockerThread::make(TRAPS) {
+  klassOop k =
+    SystemDictionary::resolve_or_fail(vmSymbolHandles::java_lang_Thread(),
+                                      true, CHECK_NULL);
+  instanceKlassHandle klass (THREAD, k);
+  instanceHandle thread_oop = klass->allocate_instance_handle(CHECK_NULL);
+
+  const char thread_name[] = "Surrogate Locker Thread (CMS)";
+  Handle string = java_lang_String::create_from_str(thread_name, CHECK_NULL);
+
+  // Initialize thread_oop to put it into the system threadGroup
+  Handle thread_group (THREAD, Universe::system_thread_group());
+  JavaValue result(T_VOID);
+  JavaCalls::call_special(&result, thread_oop,
+                          klass,
+                          vmSymbolHandles::object_initializer_name(),
+                          vmSymbolHandles::threadgroup_string_void_signature(),
+                          thread_group,
+                          string,
+                          CHECK_NULL);
+
+  SurrogateLockerThread* res;
+  {
+    MutexLocker mu(Threads_lock);
+    res = new SurrogateLockerThread();
+
+    // At this point it may be possible that no osthread was created for the
+    // JavaThread due to lack of memory. We would have to throw an exception
+    // in that case. However, since this must work and we do not allow
+    // exceptions anyway, check and abort if this fails.
+    if (res == NULL || res->osthread() == NULL) {
+      vm_exit_during_initialization("java.lang.OutOfMemoryError",
+                                    "unable to create new native thread");
+    }
+    java_lang_Thread::set_thread(thread_oop(), res);
+    java_lang_Thread::set_priority(thread_oop(), NearMaxPriority);
+    java_lang_Thread::set_daemon(thread_oop());
+
+    res->set_threadObj(thread_oop());
+    Threads::add(res);
+    Thread::start(res);
+  }
+  os::yield(); // This seems to help with initial start-up of SLT
+  return res;
+}
+
+void SurrogateLockerThread::manipulatePLL(SLT_msg_type msg) {
+  MutexLockerEx x(&_monitor, Mutex::_no_safepoint_check_flag);
+  assert(_buffer == empty, "Should be empty");
+  assert(msg != empty, "empty message");
+  _buffer = msg;
+  while (_buffer != empty) {
+    _monitor.notify();
+    _monitor.wait(Mutex::_no_safepoint_check_flag);
+  }
+}
+
+// ======= Surrogate Locker Thread =============
+
+void SurrogateLockerThread::loop() {
+  BasicLock pll_basic_lock;
+  SLT_msg_type msg;
+  debug_only(unsigned int owned = 0;)
+
+  while (/* !isTerminated() */ 1) {
+    {
+      MutexLocker x(&_monitor);
+      // Since we are a JavaThread, we can't be here at a safepoint.
+      assert(!SafepointSynchronize::is_at_safepoint(),
+             "SLT is a JavaThread");
+      // wait for msg buffer to become non-empty
+      while (_buffer == empty) {
+        _monitor.notify();
+        _monitor.wait();
+      }
+      msg = _buffer;
+    }
+    switch(msg) {
+      case acquirePLL: {
+        instanceRefKlass::acquire_pending_list_lock(&pll_basic_lock);
+        debug_only(owned++;)
+        break;
+      }
+      case releaseAndNotifyPLL: {
+        assert(owned > 0, "Don't have PLL");
+        instanceRefKlass::release_and_notify_pending_list_lock(&pll_basic_lock);
+        debug_only(owned--;)
+        break;
+      }
+      case empty:
+      default: {
+        guarantee(false,"Unexpected message in _buffer");
+        break;
+      }
+    }
+    {
+      MutexLocker x(&_monitor);
+      // Since we are a JavaThread, we can't be here at a safepoint.
+      assert(!SafepointSynchronize::is_at_safepoint(),
+             "SLT is a JavaThread");
+      _buffer = empty;
+      _monitor.notify();
+    }
+  }
+  assert(!_monitor.owned_by_self(), "Should unlock before exit.");
+}
+
+
+// ===== STS Access From Outside CGCT =====
+
+void ConcurrentGCThread::stsYield(const char* id) {
+  assert( Thread::current()->is_ConcurrentGC_thread(),
+          "only a conc GC thread can call this" );
+  _sts.yield(id);
+}
+
+bool ConcurrentGCThread::stsShouldYield() {
+  assert( Thread::current()->is_ConcurrentGC_thread(),
+          "only a conc GC thread can call this" );
+  return _sts.should_yield();
+}
+
+void ConcurrentGCThread::stsJoin() {
+  assert( Thread::current()->is_ConcurrentGC_thread(),
+          "only a conc GC thread can call this" );
+  _sts.join();
+}
+
+void ConcurrentGCThread::stsLeave() {
+  assert( Thread::current()->is_ConcurrentGC_thread(),
+          "only a conc GC thread can call this" );
+  _sts.leave();
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/shared/concurrentGCThread.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2001-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class VoidClosure;
+
+// A SuspendibleThreadSet is (obviously) a set of threads that can be
+// suspended.  A thread can join and later leave the set, and periodically
+// yield.  If some thread (not in the set) requests, via suspend_all, that
+// the threads be suspended, then the requesting thread is blocked until
+// all the threads in the set have yielded or left the set.  (Threads may
+// not enter the set when an attempted suspension is in progress.)  The
+// suspending thread later calls resume_all, allowing the suspended threads
+// to continue.
+
+class SuspendibleThreadSet {
+  Monitor* _m;
+  int      _async;
+  bool     _async_stop;
+  int      _async_stopped;
+  bool     _initialized;
+  double   _suspend_all_start;
+
+  void initialize_work();
+
+ public:
+  SuspendibleThreadSet() : _initialized(false) {}
+
+  // Add the current thread to the set.  May block if a suspension
+  // is in progress.
+  void join();
+  // Removes the current thread from the set.
+  void leave();
+  // Returns "true" iff an suspension is in progress.
+  bool should_yield() { return _async_stop; }
+  // Suspends the current thread if a suspension is in progress (for
+  // the duration of the suspension.)
+  void yield(const char* id);
+  // Return when all threads in the set are suspended.
+  void suspend_all();
+  // Allow suspended threads to resume.
+  void resume_all();
+  // Redundant initializations okay.
+  void initialize() {
+    // Double-check dirty read idiom.
+    if (!_initialized) initialize_work();
+  }
+};
+
+
+class ConcurrentGCThread: public NamedThread {
+  friend class VMStructs;
+
+protected:
+  static bool _should_terminate;
+  static bool _has_terminated;
+
+  enum CGC_flag_type {
+    CGC_nil           = 0x0,
+    CGC_dont_suspend  = 0x1,
+    CGC_CGC_safepoint = 0x2,
+    CGC_VM_safepoint  = 0x4
+  };
+
+  static int _CGC_flag;
+
+  static bool CGC_flag_is_set(int b)       { return (_CGC_flag & b) != 0; }
+  static int set_CGC_flag(int b)           { return _CGC_flag |= b; }
+  static int reset_CGC_flag(int b)         { return _CGC_flag &= ~b; }
+
+  void stopWorldAndDo(VoidClosure* op);
+
+  // All instances share this one set.
+  static SuspendibleThreadSet _sts;
+
+  // Create and start the thread (setting it's priority high.)
+  void create_and_start();
+
+  // Do initialization steps in the thread: record stack base and size,
+  // init thread local storage, set JNI handle block.
+  void initialize_in_thread();
+
+  // Wait until Universe::is_fully_initialized();
+  void wait_for_universe_init();
+
+  // Record that the current thread is terminating, and will do more
+  // concurrent work.
+  void terminate();
+
+public:
+  // Constructor
+
+  ConcurrentGCThread();
+  ~ConcurrentGCThread() {} // Exists to call NamedThread destructor.
+
+  // Tester
+  bool is_ConcurrentGC_thread() const          { return true;       }
+
+  static void safepoint_synchronize();
+  static void safepoint_desynchronize();
+
+  // All overridings should probably do _sts::yield, but we allow
+  // overriding for distinguished debugging messages.  Default is to do
+  // nothing.
+  virtual void yield() {}
+
+  bool should_yield() { return _sts.should_yield(); }
+
+  // they are prefixed by sts since there are already yield() and
+  // should_yield() (non-static) methods in this class and it was an
+  // easy way to differentiate them.
+  static void stsYield(const char* id);
+  static bool stsShouldYield();
+  static void stsJoin();
+  static void stsLeave();
+
+};
+
+// The SurrogateLockerThread is used by concurrent GC threads for
+// manipulating Java monitors, in particular, currently for
+// manipulating the pending_list_lock. XXX
+class SurrogateLockerThread: public JavaThread {
+  friend class VMStructs;
+ public:
+  enum SLT_msg_type {
+    empty = 0,           // no message
+    acquirePLL,          // acquire pending list lock
+    releaseAndNotifyPLL  // notify and release pending list lock
+  };
+ private:
+  // the following are shared with the CMSThread
+  SLT_msg_type  _buffer;  // communication buffer
+  Monitor       _monitor; // monitor controlling buffer
+  BasicLock     _basicLock; // used for PLL locking
+
+ public:
+  static SurrogateLockerThread* make(TRAPS);
+
+  SurrogateLockerThread();
+
+  bool is_hidden_from_external_view() const     { return true; }
+
+  void loop(); // main method
+
+  void manipulatePLL(SLT_msg_type msg);
+
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/shared/gcOverheadReporter.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/shared/gcOverheadReporter.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_gcOverheadReporter.cpp.incl"
+
+class COReportingThread : public ConcurrentGCThread {
+private:
+  GCOverheadReporter* _reporter;
+
+public:
+  COReportingThread(GCOverheadReporter* reporter) : _reporter(reporter) {
+    guarantee( _reporter != NULL, "precondition" );
+    create_and_start();
+  }
+
+  virtual void run() {
+    initialize_in_thread();
+    wait_for_universe_init();
+
+    int period_ms = GCOverheadReportingPeriodMS;
+
+    while ( true ) {
+      os::sleep(Thread::current(), period_ms, false);
+
+      _sts.join();
+      double now_sec = os::elapsedTime();
+      _reporter->collect_and_record_conc_overhead(now_sec);
+      _sts.leave();
+    }
+
+    terminate();
+  }
+};
+
+GCOverheadReporter* GCOverheadReporter::_reporter = NULL;
+
+GCOverheadReporter::GCOverheadReporter(size_t group_num,
+                                       const char* group_names[],
+                                       size_t length)
+    : _group_num(group_num), _prev_end_sec(0.0) {
+  guarantee( 0 <= group_num && group_num <= MaxGCOverheadGroupNum,
+             "precondition" );
+
+  _base = NEW_C_HEAP_ARRAY(GCOverheadReporterEntry, length);
+  _top  = _base + length;
+  _curr = _base;
+
+  for (size_t i = 0; i < group_num; ++i) {
+    guarantee( group_names[i] != NULL, "precondition" );
+    _group_names[i] = group_names[i];
+  }
+}
+
+void
+GCOverheadReporter::add(double start_sec, double end_sec,
+                        double* conc_overhead,
+                        double stw_overhead) {
+  assert( _curr <= _top, "invariant" );
+
+  if (_curr == _top) {
+    guarantee( false, "trace full" );
+    return;
+  }
+
+  _curr->_start_sec       = start_sec;
+  _curr->_end_sec         = end_sec;
+  for (size_t i = 0; i < _group_num; ++i) {
+    _curr->_conc_overhead[i] =
+      (conc_overhead != NULL) ? conc_overhead[i] : 0.0;
+  }
+  _curr->_stw_overhead    = stw_overhead;
+
+  ++_curr;
+}
+
+void
+GCOverheadReporter::collect_and_record_conc_overhead(double end_sec) {
+  double start_sec = _prev_end_sec;
+  guarantee( end_sec > start_sec, "invariant" );
+
+  double conc_overhead[MaxGCOverheadGroupNum];
+  COTracker::totalConcOverhead(end_sec, _group_num, conc_overhead);
+  add_conc_overhead(start_sec, end_sec, conc_overhead);
+  _prev_end_sec = end_sec;
+}
+
+void
+GCOverheadReporter::record_stw_start(double start_sec) {
+  guarantee( start_sec > _prev_end_sec, "invariant" );
+  collect_and_record_conc_overhead(start_sec);
+}
+
+void
+GCOverheadReporter::record_stw_end(double end_sec) {
+  double start_sec = _prev_end_sec;
+  COTracker::updateAllForSTW(start_sec, end_sec);
+  add_stw_overhead(start_sec, end_sec, 1.0);
+
+  _prev_end_sec = end_sec;
+}
+
+void
+GCOverheadReporter::print() const {
+  tty->print_cr("");
+  tty->print_cr("GC Overhead (%d entries)", _curr - _base);
+  tty->print_cr("");
+  GCOverheadReporterEntry* curr = _base;
+  while (curr < _curr) {
+    double total = curr->_stw_overhead;
+    for (size_t i = 0; i < _group_num; ++i)
+      total += curr->_conc_overhead[i];
+
+    tty->print("OVERHEAD %12.8lf %12.8lf ",
+               curr->_start_sec, curr->_end_sec);
+
+    for (size_t i = 0; i < _group_num; ++i)
+      tty->print("%s %12.8lf ", _group_names[i], curr->_conc_overhead[i]);
+
+    tty->print_cr("STW %12.8lf TOT %12.8lf", curr->_stw_overhead, total);
+    ++curr;
+  }
+  tty->print_cr("");
+}
+
+// statics
+
+void
+GCOverheadReporter::initGCOverheadReporter(size_t group_num,
+                                           const char* group_names[]) {
+  guarantee( _reporter == NULL, "should only be called once" );
+  guarantee( 0 <= group_num && group_num <= MaxGCOverheadGroupNum,
+             "precondition" );
+  guarantee( group_names != NULL, "pre-condition" );
+
+  if (GCOverheadReporting) {
+    _reporter = new GCOverheadReporter(group_num, group_names);
+    new COReportingThread(_reporter);
+  }
+}
+
+void
+GCOverheadReporter::recordSTWStart(double start_sec) {
+  if (_reporter != NULL)
+    _reporter->record_stw_start(start_sec);
+}
+
+void
+GCOverheadReporter::recordSTWEnd(double end_sec) {
+  if (_reporter != NULL)
+    _reporter->record_stw_end(end_sec);
+}
+
+void
+GCOverheadReporter::printGCOverhead() {
+  if (_reporter != NULL)
+    _reporter->print();
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/shared/gcOverheadReporter.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/shared/gcOverheadReporter.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Keeps track of the GC overhead (both concurrent and STW). It stores
+// it in a large array and then prints it to tty at the end of the
+// execution.
+
+// See coTracker.hpp for the explanation on what groups are.
+
+// Let's set a maximum number of concurrent overhead groups, to
+// statically allocate any arrays we need and not to have to
+// malloc/free them. This is just a bit more convenient.
+enum {
+  MaxGCOverheadGroupNum = 4
+};
+
+typedef struct {
+  double _start_sec;
+  double _end_sec;
+
+  double _conc_overhead[MaxGCOverheadGroupNum];
+  double _stw_overhead;
+} GCOverheadReporterEntry;
+
+class GCOverheadReporter {
+  friend class COReportingThread;
+
+private:
+  enum PrivateConstants {
+    DefaultReporterLength = 128 * 1024
+  };
+
+  // Reference to the single instance of this class.
+  static GCOverheadReporter* _reporter;
+
+  // These three references point to the array that contains the GC
+  // overhead entries (_base is the base of the array, _top is the
+  // address passed the last entry of the array, _curr is the next
+  // entry to be used).
+  GCOverheadReporterEntry* _base;
+  GCOverheadReporterEntry* _top;
+  GCOverheadReporterEntry* _curr;
+
+  // The number of concurrent overhead groups.
+  size_t _group_num;
+
+  // The wall-clock time of the end of the last recorded period of GC
+  // overhead.
+  double _prev_end_sec;
+
+  // Names for the concurrent overhead groups.
+  const char* _group_names[MaxGCOverheadGroupNum];
+
+  // Add a new entry to the large array. conc_overhead being NULL is
+  // equivalent to an array full of 0.0s. conc_overhead should have a
+  // length of at least _group_num.
+  void add(double start_sec, double end_sec,
+           double* conc_overhead,
+           double stw_overhead);
+
+  // Add an entry that represents concurrent GC overhead.
+  // conc_overhead must be at least of length _group_num.
+  // conc_overhead being NULL is equivalent to an array full of 0.0s.
+  void add_conc_overhead(double start_sec, double end_sec,
+                         double* conc_overhead) {
+    add(start_sec, end_sec, conc_overhead, 0.0);
+  }
+
+  // Add an entry that represents STW GC overhead.
+  void add_stw_overhead(double start_sec, double end_sec,
+                        double stw_overhead) {
+    add(start_sec, end_sec, NULL, stw_overhead);
+  }
+
+  // It records the start of a STW pause (i.e. it records the
+  // concurrent overhead up to that point)
+  void record_stw_start(double start_sec);
+
+  // It records the end of a STW pause (i.e. it records the overhead
+  // associated with the pause and adjusts all the trackers to reflect
+  // the pause)
+  void record_stw_end(double end_sec);
+
+  // It queries all the trackers of their concurrent overhead and
+  // records it.
+  void collect_and_record_conc_overhead(double end_sec);
+
+  // It prints the contents of the GC overhead array
+  void print() const;
+
+
+  // Constructor. The same preconditions for group_num and group_names
+  // from initGCOverheadReporter apply here too.
+  GCOverheadReporter(size_t group_num,
+                     const char* group_names[],
+                     size_t length = DefaultReporterLength);
+
+public:
+
+  // statics
+
+  // It initialises the GCOverheadReporter and launches the concurrent
+  // overhead reporting thread. Both actions happen only if the
+  // GCOverheadReporting parameter is set. The length of the
+  // group_names array should be >= group_num and group_num should be
+  // <= MaxGCOverheadGroupNum. Entries group_namnes[0..group_num-1]
+  // should not be NULL.
+  static void initGCOverheadReporter(size_t group_num,
+                                     const char* group_names[]);
+
+  // The following three are provided for convenience and they are
+  // wrappers around record_stw_start(start_sec), record_stw_end(end_sec),
+  // and print(). Each of these checks whether GC overhead reporting
+  // is on (i.e. _reporter != NULL) and, if it is, calls the
+  // corresponding method. Saves from repeating this pattern again and
+  // again from the places where they need to be called.
+  static void recordSTWStart(double start_sec);
+  static void recordSTWEnd(double end_sec);
+  static void printGCOverhead();
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_implementation/shared/vmGCOperations.cpp
--- a/src/share/vm/gc_implementation/shared/vmGCOperations.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/gc_implementation/shared/vmGCOperations.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -74,6 +74,7 @@
   // If the GC count has changed someone beat us to the collection
   // Get the Heap_lock after the pending_list_lock.
   Heap_lock->lock();
+
   // Check invocations
   if (skip_operation()) {
     // skip collection
@@ -82,6 +83,8 @@
     _prologue_succeeded = false;
   } else {
     _prologue_succeeded = true;
+    SharedHeap* sh = SharedHeap::heap();
+    if (sh != NULL) sh->_thread_holds_heap_lock_for_gc = true;
   }
   return _prologue_succeeded;
 }
@@ -90,6 +93,8 @@
 void VM_GC_Operation::doit_epilogue() {
   assert(Thread::current()->is_Java_thread(), "just checking");
   // Release the Heap_lock first.
+  SharedHeap* sh = SharedHeap::heap();
+  if (sh != NULL) sh->_thread_holds_heap_lock_for_gc = false;
   Heap_lock->unlock();
   release_and_notify_pending_list_lock();
 }
@@ -148,12 +153,27 @@
 void VM_GenCollectForPermanentAllocation::doit() {
   JvmtiGCForAllocationMarker jgcm;
   notify_gc_begin(true);
-  GenCollectedHeap* gch = GenCollectedHeap::heap();
-  GCCauseSetter gccs(gch, _gc_cause);
-  gch->do_full_collection(gch->must_clear_all_soft_refs(),
-                          gch->n_gens() - 1);
-  _res = gch->perm_gen()->allocate(_size, false);
-  assert(gch->is_in_reserved_or_null(_res), "result not in heap");
+  SharedHeap* heap = (SharedHeap*)Universe::heap();
+  GCCauseSetter gccs(heap, _gc_cause);
+  switch (heap->kind()) {
+    case (CollectedHeap::GenCollectedHeap): {
+      GenCollectedHeap* gch = (GenCollectedHeap*)heap;
+      gch->do_full_collection(gch->must_clear_all_soft_refs(),
+                              gch->n_gens() - 1);
+      break;
+    }
+#ifndef SERIALGC
+    case (CollectedHeap::G1CollectedHeap): {
+      G1CollectedHeap* g1h = (G1CollectedHeap*)heap;
+      g1h->do_full_collection(_gc_cause == GCCause::_last_ditch_collection);
+      break;
+    }
+#endif // SERIALGC
+    default:
+      ShouldNotReachHere();
+  }
+  _res = heap->perm_gen()->allocate(_size, false);
+  assert(heap->is_in_reserved_or_null(_res), "result not in heap");
   if (_res == NULL && GC_locker::is_active_and_needs_gc()) {
     set_gc_locked();
   }
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_interface/collectedHeap.cpp
--- a/src/share/vm/gc_interface/collectedHeap.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/gc_interface/collectedHeap.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -138,13 +138,6 @@
   return new_obj;
 }
 
-bool CollectedHeap::can_elide_permanent_oop_store_barriers() const {
-  // %%% This needs refactoring.  (It was gating logic from the server compiler.)
-  guarantee(kind() < CollectedHeap::G1CollectedHeap, "");
-  return !UseConcMarkSweepGC;
-}
-
-
 HeapWord* CollectedHeap::allocate_new_tlab(size_t size) {
   guarantee(false, "thread-local allocation buffers not supported");
   return NULL;
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_interface/collectedHeap.hpp
--- a/src/share/vm/gc_interface/collectedHeap.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/gc_interface/collectedHeap.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -364,10 +364,8 @@
   // Can a compiler initialize a new object without store barriers?
   // This permission only extends from the creation of a new object
   // via a TLAB up to the first subsequent safepoint.
-  virtual bool can_elide_tlab_store_barriers() const {
-    guarantee(kind() < CollectedHeap::G1CollectedHeap, "else change or refactor this");
-    return true;
-  }
+  virtual bool can_elide_tlab_store_barriers() const = 0;
+
   // If a compiler is eliding store barriers for TLAB-allocated objects,
   // there is probably a corresponding slow path which can produce
   // an object allocated anywhere.  The compiler's runtime support
@@ -379,12 +377,10 @@
   // Can a compiler elide a store barrier when it writes
   // a permanent oop into the heap?  Applies when the compiler
   // is storing x to the heap, where x->is_perm() is true.
-  virtual bool can_elide_permanent_oop_store_barriers() const;
+  virtual bool can_elide_permanent_oop_store_barriers() const = 0;
 
   // Does this heap support heap inspection (+PrintClassHistogram?)
-  virtual bool supports_heap_inspection() const {
-    return false;   // Until RFE 5023697 is implemented
-  }
+  virtual bool supports_heap_inspection() const = 0;
 
   // Perform a collection of the heap; intended for use in implementing
   // "System.gc".  This probably implies as full a collection as the
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_interface/collectedHeap.inline.hpp
--- a/src/share/vm/gc_interface/collectedHeap.inline.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/gc_interface/collectedHeap.inline.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -121,7 +121,7 @@
       return result;
     }
   }
-  bool gc_overhead_limit_was_exceeded;
+  bool gc_overhead_limit_was_exceeded = false;
   result = Universe::heap()->mem_allocate(size,
                                           is_noref,
                                           false,
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/gc_interface/gcCause.hpp
--- a/src/share/vm/gc_interface/gcCause.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/gc_interface/gcCause.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -60,6 +60,8 @@
     _old_generation_too_full_to_scavenge,
     _adaptive_size_policy,
 
+    _g1_inc_collection_pause, _g1_pop_region_collection_pause,
+
     _last_ditch_collection,
     _last_gc_cause
   };
@@ -68,12 +70,14 @@
     return (cause == GCCause::_java_lang_system_gc ||
             cause == GCCause::_jvmti_force_gc);
   }
+
   inline static bool is_serviceability_requested_gc(GCCause::Cause
                                                              cause) {
     return (cause == GCCause::_jvmti_force_gc ||
             cause == GCCause::_heap_inspection ||
             cause == GCCause::_heap_dump);
   }
+
   // Return a string describing the GCCause.
   static const char* to_string(GCCause::Cause cause);
   // Return true if the GCCause is for a full collection.
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/includeDB_compiler1
--- a/src/share/vm/includeDB_compiler1	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/includeDB_compiler1	Thu Jun 05 15:57:56 2008 -0700
@@ -36,6 +36,9 @@
 c1_CFGPrinter.hpp                       c1_Compilation.hpp
 c1_CFGPrinter.hpp                       c1_Instruction.hpp
 
+cardTableModRefBS.cpp			c1_LIR.hpp
+cardTableModRefBS.cpp			c1_LIRGenerator.hpp
+
 c1_Canonicalizer.cpp                    c1_Canonicalizer.hpp
 c1_Canonicalizer.cpp                    c1_InstructionPrinter.hpp
 c1_Canonicalizer.cpp                    ciArray.hpp
@@ -55,6 +58,7 @@
 c1_CodeStubs_<arch>.cpp                 c1_LIRAssembler.hpp
 c1_CodeStubs_<arch>.cpp                 c1_MacroAssembler.hpp
 c1_CodeStubs_<arch>.cpp                 c1_Runtime1.hpp
+c1_CodeStubs_<arch>.cpp                 g1SATBCardTableModRefBS.hpp
 c1_CodeStubs_<arch>.cpp                 nativeInst_<arch>.hpp
 c1_CodeStubs_<arch>.cpp                 sharedRuntime.hpp
 c1_CodeStubs_<arch>.cpp                 vmreg_<arch>.inline.hpp
@@ -141,6 +145,7 @@
 c1_globals_<os_family>.hpp              globalDefinitions.hpp
 c1_globals_<os_family>.hpp              macros.hpp
 
+c1_GraphBuilder.cpp                     bitMap.inline.hpp
 c1_GraphBuilder.cpp                     bytecode.hpp
 c1_GraphBuilder.cpp                     c1_CFGPrinter.hpp
 c1_GraphBuilder.cpp                     c1_Canonicalizer.hpp
@@ -158,6 +163,7 @@
 c1_GraphBuilder.hpp                     ciMethodData.hpp
 c1_GraphBuilder.hpp                     ciStreams.hpp
 
+c1_IR.cpp                               bitMap.inline.hpp
 c1_IR.cpp                               c1_Compilation.hpp
 c1_IR.cpp                               c1_FrameMap.hpp
 c1_IR.cpp                               c1_GraphBuilder.hpp
@@ -232,33 +238,36 @@
 
 c1_LIRAssembler_<arch>.hpp              generate_platform_dependent_include
 
-c1_LIRGenerator.cpp                    c1_Compilation.hpp
-c1_LIRGenerator.cpp                    c1_FrameMap.hpp
-c1_LIRGenerator.cpp                    c1_Instruction.hpp
-c1_LIRGenerator.cpp                    c1_LIRAssembler.hpp
-c1_LIRGenerator.cpp                    c1_LIRGenerator.hpp
-c1_LIRGenerator.cpp                    c1_ValueStack.hpp
-c1_LIRGenerator.cpp                    ciArrayKlass.hpp
-c1_LIRGenerator.cpp                    ciInstance.hpp
-c1_LIRGenerator.cpp                    sharedRuntime.hpp
+c1_LIRGenerator.cpp                     bitMap.inline.hpp
+c1_LIRGenerator.cpp                     c1_Compilation.hpp
+c1_LIRGenerator.cpp                     c1_FrameMap.hpp
+c1_LIRGenerator.cpp                     c1_Instruction.hpp
+c1_LIRGenerator.cpp                     c1_LIRAssembler.hpp
+c1_LIRGenerator.cpp                     c1_LIRGenerator.hpp
+c1_LIRGenerator.cpp                     c1_ValueStack.hpp
+c1_LIRGenerator.cpp                     ciArrayKlass.hpp
+c1_LIRGenerator.cpp                     ciInstance.hpp
+c1_LIRGenerator.cpp                     heapRegion.hpp
+c1_LIRGenerator.cpp                     sharedRuntime.hpp
 
-c1_LIRGenerator.hpp                    c1_Instruction.hpp
-c1_LIRGenerator.hpp                    c1_LIR.hpp
-c1_LIRGenerator.hpp                    ciMethodData.hpp
-c1_LIRGenerator.hpp                    sizes.hpp
+c1_LIRGenerator.hpp                     c1_Instruction.hpp
+c1_LIRGenerator.hpp                     c1_LIR.hpp
+c1_LIRGenerator.hpp                     ciMethodData.hpp
+c1_LIRGenerator.hpp                     sizes.hpp
 
-c1_LIRGenerator_<arch>.cpp             c1_Compilation.hpp
-c1_LIRGenerator_<arch>.cpp             c1_FrameMap.hpp
-c1_LIRGenerator_<arch>.cpp             c1_Instruction.hpp
-c1_LIRGenerator_<arch>.cpp             c1_LIRAssembler.hpp
-c1_LIRGenerator_<arch>.cpp             c1_LIRGenerator.hpp
-c1_LIRGenerator_<arch>.cpp             c1_Runtime1.hpp
-c1_LIRGenerator_<arch>.cpp             c1_ValueStack.hpp
-c1_LIRGenerator_<arch>.cpp             ciArray.hpp
-c1_LIRGenerator_<arch>.cpp             ciObjArrayKlass.hpp
-c1_LIRGenerator_<arch>.cpp             ciTypeArrayKlass.hpp
-c1_LIRGenerator_<arch>.cpp             sharedRuntime.hpp
+c1_LIRGenerator_<arch>.cpp              c1_Compilation.hpp
+c1_LIRGenerator_<arch>.cpp              c1_FrameMap.hpp
+c1_LIRGenerator_<arch>.cpp              c1_Instruction.hpp
+c1_LIRGenerator_<arch>.cpp              c1_LIRAssembler.hpp
+c1_LIRGenerator_<arch>.cpp              c1_LIRGenerator.hpp
+c1_LIRGenerator_<arch>.cpp              c1_Runtime1.hpp
+c1_LIRGenerator_<arch>.cpp              c1_ValueStack.hpp
+c1_LIRGenerator_<arch>.cpp              ciArray.hpp
+c1_LIRGenerator_<arch>.cpp              ciObjArrayKlass.hpp
+c1_LIRGenerator_<arch>.cpp              ciTypeArrayKlass.hpp
+c1_LIRGenerator_<arch>.cpp              sharedRuntime.hpp
 
+c1_LinearScan.cpp                       bitMap.inline.hpp
 c1_LinearScan.cpp                       c1_CFGPrinter.hpp
 c1_LinearScan.cpp                       c1_Compilation.hpp
 c1_LinearScan.cpp                       c1_FrameMap.hpp
@@ -275,6 +284,7 @@
 c1_LinearScan.hpp                       c1_LIR.hpp
 c1_LinearScan.hpp                       c1_LIRGenerator.hpp
 
+c1_LinearScan_<arch>.cpp                bitMap.inline.hpp
 c1_LinearScan_<arch>.cpp                c1_Instruction.hpp
 c1_LinearScan_<arch>.cpp                c1_LinearScan.hpp
 
@@ -297,6 +307,7 @@
 
 c1_MacroAssembler_<arch>.hpp            generate_platform_dependent_include
 
+c1_Optimizer.cpp                        bitMap.inline.hpp
 c1_Optimizer.cpp                        c1_Canonicalizer.hpp
 c1_Optimizer.cpp                        c1_Optimizer.hpp
 c1_Optimizer.cpp                        c1_ValueMap.hpp
@@ -362,6 +373,7 @@
 c1_Runtime1_<arch>.cpp                  vframeArray.hpp
 c1_Runtime1_<arch>.cpp                  vmreg_<arch>.inline.hpp
 
+c1_ValueMap.cpp                         bitMap.inline.hpp
 c1_ValueMap.cpp                         c1_Canonicalizer.hpp
 c1_ValueMap.cpp                         c1_IR.hpp
 c1_ValueMap.cpp                         c1_ValueMap.hpp
@@ -432,4 +444,3 @@
 top.hpp                                 c1_globals.hpp
 
 vmStructs.hpp                           c1_Runtime1.hpp
-
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/includeDB_compiler2
--- a/src/share/vm/includeDB_compiler2	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/includeDB_compiler2	Thu Jun 05 15:57:56 2008 -0700
@@ -461,10 +461,13 @@
 graphKit.cpp                            addnode.hpp
 graphKit.cpp                            barrierSet.hpp
 graphKit.cpp                            cardTableModRefBS.hpp
+graphKit.cpp                            g1SATBCardTableModRefBS.hpp
 graphKit.cpp                            collectedHeap.hpp
 graphKit.cpp                            compileLog.hpp
 graphKit.cpp                            deoptimization.hpp
 graphKit.cpp                            graphKit.hpp
+graphKit.cpp                            heapRegion.hpp
+graphKit.cpp                            idealKit.hpp
 graphKit.cpp                            locknode.hpp
 graphKit.cpp                            machnode.hpp
 graphKit.cpp                            parse.hpp
@@ -484,6 +487,7 @@
 idealKit.cpp                            callnode.hpp
 idealKit.cpp                            cfgnode.hpp
 idealKit.cpp                            idealKit.hpp
+idealKit.cpp				runtime.hpp
 
 idealKit.hpp                            connode.hpp
 idealKit.hpp                            mulnode.hpp
@@ -915,9 +919,11 @@
 runtime.cpp                             connode.hpp
 runtime.cpp                             copy.hpp
 runtime.cpp                             fprofiler.hpp
+runtime.cpp                             g1SATBCardTableModRefBS.hpp
 runtime.cpp                             gcLocker.inline.hpp
 runtime.cpp                             graphKit.hpp
 runtime.cpp                             handles.inline.hpp
+runtime.cpp                             heapRegion.hpp
 runtime.cpp                             icBuffer.hpp
 runtime.cpp                             interfaceSupport.hpp
 runtime.cpp                             interpreter.hpp
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/includeDB_core
--- a/src/share/vm/includeDB_core	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/includeDB_core	Thu Jun 05 15:57:56 2008 -0700
@@ -287,6 +287,10 @@
 attachListener.hpp                      debug.hpp
 attachListener.hpp                      ostream.hpp
 
+barrierSet.cpp				barrierSet.hpp
+barrierSet.cpp			        collectedHeap.hpp
+barrierSet.cpp				universe.hpp
+
 barrierSet.hpp                          memRegion.hpp
 barrierSet.hpp                          oopsHierarchy.hpp
 
@@ -294,7 +298,7 @@
 barrierSet.inline.hpp                   cardTableModRefBS.hpp
 
 bcEscapeAnalyzer.cpp                    bcEscapeAnalyzer.hpp
-bcEscapeAnalyzer.cpp                    bitMap.hpp
+bcEscapeAnalyzer.cpp                    bitMap.inline.hpp
 bcEscapeAnalyzer.cpp                    bytecode.hpp
 bcEscapeAnalyzer.cpp                    ciConstant.hpp
 bcEscapeAnalyzer.cpp                    ciField.hpp
@@ -319,13 +323,12 @@
 biasedLocking.hpp                       growableArray.hpp
 biasedLocking.hpp                       handles.hpp
 
-bitMap.cpp                              bitMap.hpp
+bitMap.cpp                              allocation.inline.hpp
 bitMap.cpp                              bitMap.inline.hpp
 bitMap.cpp                              copy.hpp
 bitMap.cpp                              os_<os_family>.inline.hpp
 
 bitMap.hpp                              allocation.hpp
-bitMap.hpp                              ostream.hpp
 bitMap.hpp                              top.hpp
 
 bitMap.inline.hpp                       atomic.hpp
@@ -644,6 +647,7 @@
 ciMethod.cpp                            abstractCompiler.hpp
 ciMethod.cpp                            allocation.inline.hpp
 ciMethod.cpp                            bcEscapeAnalyzer.hpp
+ciMethod.cpp                            bitMap.inline.hpp
 ciMethod.cpp                            ciCallProfile.hpp
 ciMethod.cpp                            ciExceptionHandler.hpp
 ciMethod.cpp                            ciInstanceKlass.hpp
@@ -1757,7 +1761,7 @@
 
 genRemSet.hpp                           oop.hpp
 
-generateOopMap.cpp                      bitMap.hpp
+generateOopMap.cpp                      bitMap.inline.hpp
 generateOopMap.cpp                      bytecodeStream.hpp
 generateOopMap.cpp                      generateOopMap.hpp
 generateOopMap.cpp                      handles.inline.hpp
@@ -1805,6 +1809,8 @@
 generation.inline.hpp                   generation.hpp
 generation.inline.hpp                   space.hpp
 
+genOopClosures.hpp                      oop.hpp
+
 generationSpec.cpp                      compactPermGen.hpp
 generationSpec.cpp                      defNewGeneration.hpp
 generationSpec.cpp                      filemap.hpp
@@ -2216,6 +2222,11 @@
 invocationCounter.hpp                   exceptions.hpp
 invocationCounter.hpp                   handles.hpp
 
+intHisto.cpp				intHisto.hpp
+
+intHisto.hpp				allocation.hpp
+intHisto.hpp                            growableArray.hpp
+
 iterator.cpp                            iterator.hpp
 iterator.cpp                            oop.inline.hpp
 
@@ -2815,6 +2826,7 @@
 methodKlass.hpp                         methodOop.hpp
 
 methodLiveness.cpp                      allocation.inline.hpp
+methodLiveness.cpp                      bitMap.inline.hpp
 methodLiveness.cpp                      bytecode.hpp
 methodLiveness.cpp                      bytecodes.hpp
 methodLiveness.cpp                      ciMethod.hpp
@@ -2961,6 +2973,11 @@
 nmethod.hpp                             codeBlob.hpp
 nmethod.hpp                             pcDesc.hpp
 
+numberSeq.cpp				debug.hpp
+numberSeq.cpp				numberSeq.hpp
+numberSeq.cpp				globalDefinitions.hpp
+numberSeq.cpp				allocation.inline.hpp
+
 objArrayKlass.cpp                       collectedHeap.inline.hpp
 objArrayKlass.cpp                       copy.hpp
 objArrayKlass.cpp                       genOopClosures.inline.hpp
@@ -3403,8 +3420,6 @@
 referencePolicy.cpp                     referencePolicy.hpp
 referencePolicy.cpp                     universe.hpp
 
-referencePolicy.hpp                     oop.hpp
-
 referenceProcessor.cpp                  collectedHeap.hpp
 referenceProcessor.cpp                  collectedHeap.inline.hpp
 referenceProcessor.cpp                  java.hpp
@@ -3746,6 +3761,8 @@
 specialized_oop_closures.cpp            ostream.hpp
 specialized_oop_closures.cpp            specialized_oop_closures.hpp
 
+specialized_oop_closures.hpp            atomic.hpp
+
 stackMapFrame.cpp                       globalDefinitions.hpp
 stackMapFrame.cpp                       handles.inline.hpp
 stackMapFrame.cpp                       oop.inline.hpp
@@ -3988,7 +4005,6 @@
 
 taskqueue.hpp                           allocation.hpp
 taskqueue.hpp                           allocation.inline.hpp
-taskqueue.hpp                           debug.hpp
 taskqueue.hpp                           mutex.hpp
 taskqueue.hpp                           orderAccess_<os_arch>.inline.hpp
 
@@ -4026,6 +4042,7 @@
 
 templateInterpreterGenerator_<arch>.hpp generate_platform_dependent_include
 
+templateTable.cpp                       collectedHeap.hpp
 templateTable.cpp                       templateTable.hpp
 templateTable.cpp                       timer.hpp
 
@@ -4530,6 +4547,7 @@
 vm_operations.cpp                       compilerOracle.hpp
 vm_operations.cpp                       deoptimization.hpp
 vm_operations.cpp                       interfaceSupport.hpp
+vm_operations.cpp                       isGCActiveMark.hpp
 vm_operations.cpp                       resourceArea.hpp
 vm_operations.cpp                       threadService.hpp
 vm_operations.cpp                       thread_<os_family>.inline.hpp
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/includeDB_gc_parallel
--- a/src/share/vm/includeDB_gc_parallel	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/includeDB_gc_parallel	Thu Jun 05 15:57:56 2008 -0700
@@ -21,6 +21,10 @@
 // have any questions.
 //  
 
+assembler_<arch_model>.cpp              g1SATBCardTableModRefBS.hpp
+assembler_<arch_model>.cpp              g1CollectedHeap.inline.hpp
+assembler_<arch_model>.cpp              heapRegion.hpp
+
 collectorPolicy.cpp                     cmsAdaptiveSizePolicy.hpp
 collectorPolicy.cpp                     cmsGCAdaptivePolicyCounters.hpp
 
@@ -37,6 +41,9 @@
 
 heapInspection.cpp                      parallelScavengeHeap.hpp
 
+instanceKlass.cpp                       heapRegionSeq.inline.hpp
+instanceKlass.cpp                       g1CollectedHeap.inline.hpp
+instanceKlass.cpp                       g1OopClosures.inline.hpp
 instanceKlass.cpp                       oop.pcgc.inline.hpp
 instanceKlass.cpp                       psPromotionManager.inline.hpp
 instanceKlass.cpp                       psScavenge.inline.hpp
@@ -48,6 +55,9 @@
 instanceKlassKlass.cpp                  psScavenge.inline.hpp
 instanceKlassKlass.cpp                  parOopClosures.inline.hpp
 
+instanceRefKlass.cpp                    heapRegionSeq.inline.hpp
+instanceRefKlass.cpp                    g1CollectedHeap.inline.hpp
+instanceRefKlass.cpp                    g1OopClosures.inline.hpp
 instanceRefKlass.cpp                    oop.pcgc.inline.hpp
 instanceRefKlass.cpp                    psPromotionManager.inline.hpp
 instanceRefKlass.cpp                    psScavenge.inline.hpp
@@ -70,6 +80,7 @@
 
 memoryService.cpp                       cmsPermGen.hpp
 memoryService.cpp                       concurrentMarkSweepGeneration.hpp
+memoryService.cpp                       g1CollectedHeap.inline.hpp
 memoryService.cpp                       parNewGeneration.hpp
 memoryService.cpp                       parallelScavengeHeap.hpp
 memoryService.cpp                       psMemoryPool.hpp
@@ -80,6 +91,9 @@
 methodDataKlass.cpp                     oop.pcgc.inline.hpp
 methodDataKlass.cpp                     psScavenge.inline.hpp
 
+objArrayKlass.cpp                       heapRegionSeq.inline.hpp
+objArrayKlass.cpp                       g1CollectedHeap.inline.hpp
+objArrayKlass.cpp                       g1OopClosures.inline.hpp
 objArrayKlass.cpp                       oop.pcgc.inline.hpp
 objArrayKlass.cpp                       psPromotionManager.inline.hpp
 objArrayKlass.cpp                       psScavenge.inline.hpp
@@ -122,6 +136,9 @@
 thread.cpp                              concurrentMarkSweepThread.hpp
 thread.cpp                              pcTasks.hpp
 
+thread.hpp                              dirtyCardQueue.hpp
+thread.hpp                              satbQueue.hpp
+
 universe.cpp                            parallelScavengeHeap.hpp
 universe.cpp                            cmsCollectorPolicy.hpp
 universe.cpp                            cmsAdaptiveSizePolicy.hpp
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/includeDB_jvmti
--- a/src/share/vm/includeDB_jvmti	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/includeDB_jvmti	Thu Jun 05 15:57:56 2008 -0700
@@ -209,6 +209,7 @@
 jvmtiManageCapabilities.hpp             allocation.hpp
 jvmtiManageCapabilities.hpp             jvmti.h
 
+jvmtiRedefineClasses.cpp                bitMap.inline.hpp
 jvmtiRedefineClasses.cpp                codeCache.hpp
 jvmtiRedefineClasses.cpp                deoptimization.hpp
 jvmtiRedefineClasses.cpp                gcLocker.hpp
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/interpreter/templateTable.cpp
--- a/src/share/vm/interpreter/templateTable.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/interpreter/templateTable.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -172,6 +172,7 @@
 
 Template*                  TemplateTable::_desc;
 InterpreterMacroAssembler* TemplateTable::_masm;
+BarrierSet*                TemplateTable::_bs;
 
 
 void TemplateTable::def(Bytecodes::Code code, int flags, TosState in, TosState out, void (*gen)(), char filler) {
@@ -244,6 +245,8 @@
   // Initialize table
   TraceTime timer("TemplateTable initialization", TraceStartupTime);
 
+  _bs = Universe::heap()->barrier_set();
+
   // For better readability
   const char _    = ' ';
   const int  ____ = 0;
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/interpreter/templateTable.hpp
--- a/src/share/vm/interpreter/templateTable.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/interpreter/templateTable.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -82,6 +82,7 @@
   static Template*       _desc;                  // the current template to be generated
   static Bytecodes::Code bytecode()              { return _desc->bytecode(); }
 
+  static BarrierSet*     _bs;                    // Cache the barrier set.
  public:
   //%note templates_1
   static InterpreterMacroAssembler* _masm;       // the assembler used when generating templates
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/allocation.hpp
--- a/src/share/vm/memory/allocation.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/allocation.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -338,6 +338,12 @@
       DEBUG_ONLY(((ResourceObj *)res)->_allocation = RESOURCE_AREA;)
       return res;
   }
+  void* operator new(size_t size, void* where, allocation_type type) {
+      void* res = where;
+      // Set allocation type in the resource object
+      DEBUG_ONLY(((ResourceObj *)res)->_allocation = type;)
+      return res;
+  }
   void  operator delete(void* p);
 };
 
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/barrierSet.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/barrierSet.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,36 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_barrierSet.cpp.incl"
+
+// count is in HeapWord's
+void BarrierSet::static_write_ref_array_pre(HeapWord* start, size_t count) {
+   Universe::heap()->barrier_set()->write_ref_array_pre(MemRegion(start, start + count));
+}
+
+// count is in HeapWord's
+void BarrierSet::static_write_ref_array_post(HeapWord* start, size_t count) {
+   Universe::heap()->barrier_set()->write_ref_array_work(MemRegion(start, start + count));
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/barrierSet.hpp
--- a/src/share/vm/memory/barrierSet.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/barrierSet.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -32,6 +32,8 @@
     ModRef,
     CardTableModRef,
     CardTableExtension,
+    G1SATBCT,
+    G1SATBCTLogging,
     Other,
     Uninit
   };
@@ -42,14 +44,16 @@
 
 public:
 
+  BarrierSet() { _kind = Uninit; }
   // To get around prohibition on RTTI.
-  virtual BarrierSet::Name kind() { return _kind; }
+  BarrierSet::Name kind() { return _kind; }
   virtual bool is_a(BarrierSet::Name bsn) = 0;
 
   // These operations indicate what kind of barriers the BarrierSet has.
   virtual bool has_read_ref_barrier() = 0;
   virtual bool has_read_prim_barrier() = 0;
   virtual bool has_write_ref_barrier() = 0;
+  virtual bool has_write_ref_pre_barrier() = 0;
   virtual bool has_write_prim_barrier() = 0;
 
   // These functions indicate whether a particular access of the given
@@ -57,7 +61,8 @@
   virtual bool read_ref_needs_barrier(void* field) = 0;
   virtual bool read_prim_needs_barrier(HeapWord* field, size_t bytes) = 0;
   virtual bool write_ref_needs_barrier(void* field, oop new_val) = 0;
-  virtual bool write_prim_needs_barrier(HeapWord* field, size_t bytes, juint val1, juint val2) = 0;
+  virtual bool write_prim_needs_barrier(HeapWord* field, size_t bytes,
+                                        juint val1, juint val2) = 0;
 
   // The first four operations provide a direct implementation of the
   // barrier set.  An interpreter loop, for example, could call these
@@ -75,6 +80,13 @@
   // (For efficiency reasons, this operation is specialized for certain
   // barrier types.  Semantically, it should be thought of as a call to the
   // virtual "_work" function below, which must implement the barrier.)
+  // First the pre-write versions...
+  inline void write_ref_field_pre(void* field, oop new_val);
+protected:
+  virtual void write_ref_field_pre_work(void* field, oop new_val) {};
+public:
+
+  // ...then the post-write version.
   inline void write_ref_field(void* field, oop new_val);
 protected:
   virtual void write_ref_field_work(void* field, oop new_val) = 0;
@@ -92,6 +104,7 @@
   // the particular barrier.
   virtual bool has_read_ref_array_opt() = 0;
   virtual bool has_read_prim_array_opt() = 0;
+  virtual bool has_write_ref_array_pre_opt() { return true; }
   virtual bool has_write_ref_array_opt() = 0;
   virtual bool has_write_prim_array_opt() = 0;
 
@@ -104,7 +117,13 @@
   virtual void read_ref_array(MemRegion mr) = 0;
   virtual void read_prim_array(MemRegion mr) = 0;
 
+  virtual void write_ref_array_pre(MemRegion mr) {}
   inline void write_ref_array(MemRegion mr);
+
+  // Static versions, suitable for calling from generated code.
+  static void static_write_ref_array_pre(HeapWord* start, size_t count);
+  static void static_write_ref_array_post(HeapWord* start, size_t count);
+
 protected:
   virtual void write_ref_array_work(MemRegion mr) = 0;
 public:
@@ -120,33 +139,6 @@
   virtual void write_region_work(MemRegion mr) = 0;
 public:
 
-  // The remaining sets of operations are called by compilers or other code
-  // generators to insert barriers into generated code.  There may be
-  // several such code generators; the signatures of these
-  // barrier-generating functions may differ from generator to generator.
-  // There will be a set of four function signatures for each code
-  // generator, which accomplish the generation of barriers of the four
-  // kinds listed above.
-
-#ifdef TBD
-  // Generates code to invoke the barrier, if any, necessary when reading
-  // the ref field at "offset" in "obj".
-  virtual void gen_read_ref_field() = 0;
-
-  // Generates code to invoke the barrier, if any, necessary when reading
-  // the primitive field of "bytes" bytes at offset" in "obj".
-  virtual void gen_read_prim_field() = 0;
-
-  // Generates code to invoke the barrier, if any, necessary when writing
-  // "new_val" into the ref field at "offset" in "obj".
-  virtual void gen_write_ref_field() = 0;
-
-  // Generates code to invoke the barrier, if any, necessary when writing
-  // the "bytes"-byte value "new_val" into the primitive field at "offset"
-  // in "obj".
-  virtual void gen_write_prim_field() = 0;
-#endif
-
   // Some barrier sets create tables whose elements correspond to parts of
   // the heap; the CardTableModRefBS is an example.  Such barrier sets will
   // normally reserve space for such tables, and commit parts of the table
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/barrierSet.inline.hpp
--- a/src/share/vm/memory/barrierSet.inline.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/barrierSet.inline.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -26,6 +26,14 @@
 // performance-critical calls when when the barrier is the most common
 // card-table kind.
 
+void BarrierSet::write_ref_field_pre(void* field, oop new_val) {
+  if (kind() == CardTableModRef) {
+    ((CardTableModRefBS*)this)->inline_write_ref_field_pre(field, new_val);
+  } else {
+    write_ref_field_pre_work(field, new_val);
+  }
+}
+
 void BarrierSet::write_ref_field(void* field, oop new_val) {
   if (kind() == CardTableModRef) {
     ((CardTableModRefBS*)this)->inline_write_ref_field(field, new_val);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/blockOffsetTable.cpp
--- a/src/share/vm/memory/blockOffsetTable.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/blockOffsetTable.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -184,7 +184,7 @@
     "Offset card has an unexpected value");
   size_t start_card_for_region = start_card;
   u_char offset = max_jubyte;
-  for (int i = 0; i <= N_powers-1; i++) {
+  for (int i = 0; i < N_powers; i++) {
     // -1 so that the the card with the actual offset is counted.  Another -1
     // so that the reach ends in this region and not at the start
     // of the next.
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/blockOffsetTable.hpp
--- a/src/share/vm/memory/blockOffsetTable.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/blockOffsetTable.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -208,6 +208,7 @@
 //////////////////////////////////////////////////////////////////////////
 class BlockOffsetArray: public BlockOffsetTable {
   friend class VMStructs;
+  friend class G1BlockOffsetArray; // temp. until we restructure and cleanup
  protected:
   // The following enums are used by do_block_internal() below
   enum Action {
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/cardTableModRefBS.cpp
--- a/src/share/vm/memory/cardTableModRefBS.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/cardTableModRefBS.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -299,6 +299,17 @@
 }
 
 
+bool CardTableModRefBS::claim_card(size_t card_index) {
+  jbyte val = _byte_map[card_index];
+  if (val != claimed_card_val()) {
+    jbyte res = Atomic::cmpxchg((jbyte) claimed_card_val(), &_byte_map[card_index], val);
+    if (res == val)
+      return true;
+    else return false;
+  }
+  return false;
+}
+
 void CardTableModRefBS::non_clean_card_iterate(Space* sp,
                                                MemRegion mr,
                                                DirtyCardToOopClosure* dcto_cl,
@@ -398,7 +409,7 @@
   }
 }
 
-void CardTableModRefBS::invalidate(MemRegion mr) {
+void CardTableModRefBS::invalidate(MemRegion mr, bool whole_heap) {
   for (int i = 0; i < _cur_covered_regions; i++) {
     MemRegion mri = mr.intersection(_covered[i]);
     if (!mri.is_empty()) dirty_MemRegion(mri);
@@ -426,11 +437,15 @@
   }
 }
 
+void CardTableModRefBS::dirty(MemRegion mr) {
+  jbyte* first = byte_for(mr.start());
+  jbyte* last  = byte_after(mr.last());
+  memset(first, dirty_card, last-first);
+}
+
 // NOTES:
 // (1) Unlike mod_oop_in_space_iterate() above, dirty_card_iterate()
 //     iterates over dirty cards ranges in increasing address order.
-// (2) Unlike, e.g., dirty_card_range_after_preclean() below,
-//     this method does not make the dirty cards prelceaned.
 void CardTableModRefBS::dirty_card_iterate(MemRegion mr,
                                            MemRegionClosure* cl) {
   for (int i = 0; i < _cur_covered_regions; i++) {
@@ -456,7 +471,9 @@
   }
 }
 
-MemRegion CardTableModRefBS::dirty_card_range_after_preclean(MemRegion mr) {
+MemRegion CardTableModRefBS::dirty_card_range_after_reset(MemRegion mr,
+                                                          bool reset,
+                                                          int reset_val) {
   for (int i = 0; i < _cur_covered_regions; i++) {
     MemRegion mri = mr.intersection(_covered[i]);
     if (!mri.is_empty()) {
@@ -473,8 +490,10 @@
                dirty_cards++, next_entry++);
           MemRegion cur_cards(addr_for(cur_entry),
                               dirty_cards*card_size_in_words);
-          for (size_t i = 0; i < dirty_cards; i++) {
-             cur_entry[i] = precleaned_card;
+          if (reset) {
+            for (size_t i = 0; i < dirty_cards; i++) {
+              cur_entry[i] = reset_val;
+            }
           }
           return cur_cards;
         }
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/cardTableModRefBS.hpp
--- a/src/share/vm/memory/cardTableModRefBS.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/cardTableModRefBS.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -54,6 +54,7 @@
     clean_card                  = -1,
     dirty_card                  =  0,
     precleaned_card             =  1,
+    claimed_card                =  3,
     last_card                   =  4,
     CT_MR_BS_last_reserved      = 10
   };
@@ -150,17 +151,6 @@
     return byte_for(p) + 1;
   }
 
-  // Mapping from card marking array entry to address of first word
-  HeapWord* addr_for(const jbyte* p) const {
-    assert(p >= _byte_map && p < _byte_map + _byte_map_size,
-           "out of bounds access to card marking array");
-    size_t delta = pointer_delta(p, byte_map_base, sizeof(jbyte));
-    HeapWord* result = (HeapWord*) (delta << card_shift);
-    assert(_whole_heap.contains(result),
-           "out of bounds accessor from card marking array");
-    return result;
-  }
-
   // Iterate over the portion of the card-table which covers the given
   // region mr in the given space and apply cl to any dirty sub-regions
   // of mr. cl and dcto_cl must either be the same closure or cl must
@@ -263,16 +253,22 @@
     card_size_in_words          = card_size / sizeof(HeapWord)
   };
 
+  static int clean_card_val()      { return clean_card; }
+  static int dirty_card_val()      { return dirty_card; }
+  static int claimed_card_val()    { return claimed_card; }
+  static int precleaned_card_val() { return precleaned_card; }
+
   // For RTTI simulation.
-  BarrierSet::Name kind() { return BarrierSet::CardTableModRef; }
   bool is_a(BarrierSet::Name bsn) {
-    return bsn == BarrierSet::CardTableModRef || bsn == BarrierSet::ModRef;
+    return bsn == BarrierSet::CardTableModRef || ModRefBarrierSet::is_a(bsn);
   }
 
   CardTableModRefBS(MemRegion whole_heap, int max_covered_regions);
 
   // *** Barrier set functions.
 
+  bool has_write_ref_pre_barrier() { return false; }
+
   inline bool write_ref_needs_barrier(void* field, oop new_val) {
     // Note that this assumes the perm gen is the highest generation
     // in the address space
@@ -315,11 +311,33 @@
 
   // *** Card-table-barrier-specific things.
 
+  inline void inline_write_ref_field_pre(void* field, oop newVal) {}
+
   inline void inline_write_ref_field(void* field, oop newVal) {
     jbyte* byte = byte_for(field);
     *byte = dirty_card;
   }
 
+  // These are used by G1, when it uses the card table as a temporary data
+  // structure for card claiming.
+  bool is_card_dirty(size_t card_index) {
+    return _byte_map[card_index] == dirty_card_val();
+  }
+
+  void mark_card_dirty(size_t card_index) {
+    _byte_map[card_index] = dirty_card_val();
+  }
+
+  bool is_card_claimed(size_t card_index) {
+    return _byte_map[card_index] == claimed_card_val();
+  }
+
+  bool claim_card(size_t card_index);
+
+  bool is_card_clean(size_t card_index) {
+    return _byte_map[card_index] == clean_card_val();
+  }
+
   // Card marking array base (adjusted for heap low boundary)
   // This would be the 0th element of _byte_map, if the heap started at 0x0.
   // But since the heap starts at some higher address, this points to somewhere
@@ -344,8 +362,9 @@
   }
 
   // ModRefBS functions.
-  void invalidate(MemRegion mr);
+  virtual void invalidate(MemRegion mr, bool whole_heap = false);
   void clear(MemRegion mr);
+  void dirty(MemRegion mr);
   void mod_oop_in_space_iterate(Space* sp, OopClosure* cl,
                                 bool clear = false,
                                 bool before_save_marks = false);
@@ -375,18 +394,39 @@
 
   static uintx ct_max_alignment_constraint();
 
-  // Apply closure cl to the dirty cards lying completely
-  // within MemRegion mr, setting the cards to precleaned.
-  void      dirty_card_iterate(MemRegion mr, MemRegionClosure* cl);
+  // Apply closure "cl" to the dirty cards containing some part of
+  // MemRegion "mr".
+  void dirty_card_iterate(MemRegion mr, MemRegionClosure* cl);
 
   // Return the MemRegion corresponding to the first maximal run
-  // of dirty cards lying completely within MemRegion mr, after
-  // marking those cards precleaned.
-  MemRegion dirty_card_range_after_preclean(MemRegion mr);
+  // of dirty cards lying completely within MemRegion mr.
+  // If reset is "true", then sets those card table entries to the given
+  // value.
+  MemRegion dirty_card_range_after_reset(MemRegion mr, bool reset,
+                                         int reset_val);
 
   // Set all the dirty cards in the given region to precleaned state.
   void preclean_dirty_cards(MemRegion mr);
 
+  // Provide read-only access to the card table array.
+  const jbyte* byte_for_const(const void* p) const {
+    return byte_for(p);
+  }
+  const jbyte* byte_after_const(const void* p) const {
+    return byte_after(p);
+  }
+
+  // Mapping from card marking array entry to address of first word
+  HeapWord* addr_for(const jbyte* p) const {
+    assert(p >= _byte_map && p < _byte_map + _byte_map_size,
+           "out of bounds access to card marking array");
+    size_t delta = pointer_delta(p, byte_map_base, sizeof(jbyte));
+    HeapWord* result = (HeapWord*) (delta << card_shift);
+    assert(_whole_heap.contains(result),
+           "out of bounds accessor from card marking array");
+    return result;
+  }
+
   // Mapping from address to card marking array index.
   int index_for(void* p) {
     assert(_whole_heap.contains(p),
@@ -402,6 +442,7 @@
   static size_t par_chunk_heapword_alignment() {
     return CardsPerStrideChunk * card_size_in_words;
   }
+
 };
 
 class CardTableRS;
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/cardTableRS.cpp
--- a/src/share/vm/memory/cardTableRS.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/cardTableRS.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -27,10 +27,25 @@
 
 CardTableRS::CardTableRS(MemRegion whole_heap,
                          int max_covered_regions) :
-  GenRemSet(&_ct_bs),
-  _ct_bs(whole_heap, max_covered_regions),
-  _cur_youngergen_card_val(youngergenP1_card)
+  GenRemSet(),
+  _cur_youngergen_card_val(youngergenP1_card),
+  _regions_to_iterate(max_covered_regions - 1)
 {
+#ifndef SERIALGC
+  if (UseG1GC) {
+    if (G1RSBarrierUseQueue) {
+      _ct_bs = new G1SATBCardTableLoggingModRefBS(whole_heap,
+                                                  max_covered_regions);
+    } else {
+      _ct_bs = new G1SATBCardTableModRefBS(whole_heap, max_covered_regions);
+    }
+  } else {
+    _ct_bs = new CardTableModRefBSForCTRS(whole_heap, max_covered_regions);
+  }
+#else
+  _ct_bs = new CardTableModRefBSForCTRS(whole_heap, max_covered_regions);
+#endif
+  set_bs(_ct_bs);
   _last_cur_val_in_gen = new jbyte[GenCollectedHeap::max_gens + 1];
   if (_last_cur_val_in_gen == NULL) {
     vm_exit_during_initialization("Could not last_cur_val_in_gen array.");
@@ -38,20 +53,19 @@
   for (int i = 0; i < GenCollectedHeap::max_gens + 1; i++) {
     _last_cur_val_in_gen[i] = clean_card_val();
   }
-  _ct_bs.set_CTRS(this);
+  _ct_bs->set_CTRS(this);
 }
 
 void CardTableRS::resize_covered_region(MemRegion new_region) {
-  _ct_bs.resize_covered_region(new_region);
+  _ct_bs->resize_covered_region(new_region);
 }
 
 jbyte CardTableRS::find_unused_youngergenP_card_value() {
-  GenCollectedHeap* gch = GenCollectedHeap::heap();
   for (jbyte v = youngergenP1_card;
        v < cur_youngergen_and_prev_nonclean_card;
        v++) {
     bool seen = false;
-    for (int g = 0; g < gch->n_gens()+1; g++) {
+    for (int g = 0; g < _regions_to_iterate; g++) {
       if (_last_cur_val_in_gen[g] == v) {
         seen = true;
         break;
@@ -221,11 +235,11 @@
 
 void CardTableRS::younger_refs_in_space_iterate(Space* sp,
                                                 OopsInGenClosure* cl) {
-  DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, _ct_bs.precision(),
+  DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, _ct_bs->precision(),
                                                    cl->gen_boundary());
   ClearNoncleanCardWrapper clear_cl(dcto_cl, this);
 
-  _ct_bs.non_clean_card_iterate(sp, sp->used_region_at_save_marks(),
+  _ct_bs->non_clean_card_iterate(sp, sp->used_region_at_save_marks(),
                                 dcto_cl, &clear_cl, false);
 }
 
@@ -549,7 +563,7 @@
 
   if (ch->kind() == CollectedHeap::GenCollectedHeap) {
     GenCollectedHeap::heap()->generation_iterate(&blk, false);
-    _ct_bs.verify();
+    _ct_bs->verify();
 
     // If the old gen collections also collect perm, then we are only
     // interested in perm-to-young pointers, not perm-to-old pointers.
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/cardTableRS.hpp
--- a/src/share/vm/memory/cardTableRS.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/cardTableRS.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -44,7 +44,7 @@
     return CardTableModRefBS::card_is_dirty_wrt_gen_iter(cv);
   }
 
-  CardTableModRefBSForCTRS _ct_bs;
+  CardTableModRefBSForCTRS* _ct_bs;
 
   virtual void younger_refs_in_space_iterate(Space* sp, OopsInGenClosure* cl);
 
@@ -73,6 +73,8 @@
 
   jbyte _cur_youngergen_card_val;
 
+  int _regions_to_iterate;
+
   jbyte cur_youngergen_card_val() {
     return _cur_youngergen_card_val;
   }
@@ -96,7 +98,7 @@
 
   CardTableRS* as_CardTableRS() { return this; }
 
-  CardTableModRefBS* ct_bs() { return &_ct_bs; }
+  CardTableModRefBS* ct_bs() { return _ct_bs; }
 
   // Override.
   void prepare_for_younger_refs_iterate(bool parallel);
@@ -107,7 +109,7 @@
   void younger_refs_iterate(Generation* g, OopsInGenClosure* blk);
 
   void inline_write_ref_field_gc(void* field, oop new_val) {
-    jbyte* byte = _ct_bs.byte_for(field);
+    jbyte* byte = _ct_bs->byte_for(field);
     *byte = youngergen_card;
   }
   void write_ref_field_gc_work(void* field, oop new_val) {
@@ -122,25 +124,27 @@
   void resize_covered_region(MemRegion new_region);
 
   bool is_aligned(HeapWord* addr) {
-    return _ct_bs.is_card_aligned(addr);
+    return _ct_bs->is_card_aligned(addr);
   }
 
   void verify();
   void verify_aligned_region_empty(MemRegion mr);
 
-  void clear(MemRegion mr) { _ct_bs.clear(mr); }
+  void clear(MemRegion mr) { _ct_bs->clear(mr); }
   void clear_into_younger(Generation* gen, bool clear_perm);
 
-  void invalidate(MemRegion mr) { _ct_bs.invalidate(mr); }
+  void invalidate(MemRegion mr, bool whole_heap = false) {
+    _ct_bs->invalidate(mr, whole_heap);
+  }
   void invalidate_or_clear(Generation* gen, bool younger, bool perm);
 
   static uintx ct_max_alignment_constraint() {
     return CardTableModRefBS::ct_max_alignment_constraint();
   }
 
-  jbyte* byte_for(void* p)     { return _ct_bs.byte_for(p); }
-  jbyte* byte_after(void* p)   { return _ct_bs.byte_after(p); }
-  HeapWord* addr_for(jbyte* p) { return _ct_bs.addr_for(p); }
+  jbyte* byte_for(void* p)     { return _ct_bs->byte_for(p); }
+  jbyte* byte_after(void* p)   { return _ct_bs->byte_after(p); }
+  HeapWord* addr_for(jbyte* p) { return _ct_bs->addr_for(p); }
 
   bool is_prev_nonclean_card_val(jbyte v) {
     return
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/collectorPolicy.cpp
--- a/src/share/vm/memory/collectorPolicy.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/collectorPolicy.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -31,11 +31,11 @@
   if (PermSize > MaxPermSize) {
     MaxPermSize = PermSize;
   }
-  PermSize = align_size_down(PermSize, min_alignment());
+  PermSize = MAX2(min_alignment(), align_size_down_(PermSize, min_alignment()));
   MaxPermSize = align_size_up(MaxPermSize, max_alignment());
 
-  MinPermHeapExpansion = align_size_down(MinPermHeapExpansion, min_alignment());
-  MaxPermHeapExpansion = align_size_down(MaxPermHeapExpansion, min_alignment());
+  MinPermHeapExpansion = MAX2(min_alignment(), align_size_down_(MinPermHeapExpansion, min_alignment()));
+  MaxPermHeapExpansion = MAX2(min_alignment(), align_size_down_(MaxPermHeapExpansion, min_alignment()));
 
   MinHeapDeltaBytes = align_size_up(MinHeapDeltaBytes, min_alignment());
 
@@ -55,25 +55,21 @@
 
 void CollectorPolicy::initialize_size_info() {
   // User inputs from -mx and ms are aligned
-  _initial_heap_byte_size = align_size_up(Arguments::initial_heap_size(),
-                                          min_alignment());
-  set_min_heap_byte_size(align_size_up(Arguments::min_heap_size(),
-                                          min_alignment()));
-  set_max_heap_byte_size(align_size_up(MaxHeapSize, max_alignment()));
-
-  // Check validity of heap parameters from launcher
+  set_initial_heap_byte_size(Arguments::initial_heap_size());
   if (initial_heap_byte_size() == 0) {
     set_initial_heap_byte_size(NewSize + OldSize);
-  } else {
-    Universe::check_alignment(initial_heap_byte_size(), min_alignment(),
-                            "initial heap");
   }
+  set_initial_heap_byte_size(align_size_up(_initial_heap_byte_size,
+                                           min_alignment()));
+
+  set_min_heap_byte_size(Arguments::min_heap_size());
   if (min_heap_byte_size() == 0) {
     set_min_heap_byte_size(NewSize + OldSize);
-  } else {
-    Universe::check_alignment(min_heap_byte_size(), min_alignment(),
-                            "initial heap");
   }
+  set_min_heap_byte_size(align_size_up(_min_heap_byte_size,
+                                       min_alignment()));
+
+  set_max_heap_byte_size(align_size_up(MaxHeapSize, max_alignment()));
 
   // Check heap parameter properties
   if (initial_heap_byte_size() < M) {
@@ -121,8 +117,6 @@
                                            int max_covered_regions) {
   switch (rem_set_name()) {
   case GenRemSet::CardTable: {
-    if (barrier_set_name() != BarrierSet::CardTableModRef)
-      vm_exit_during_initialization("Mismatch between RS and BS.");
     CardTableRS* res = new CardTableRS(whole_heap, max_covered_regions);
     return res;
   }
@@ -345,7 +339,7 @@
 
     // At this point all three sizes have been checked against the
     // maximum sizes but have not been checked for consistency
-    // amoung the three.
+    // among the three.
 
     // Final check min <= initial <= max
     set_min_gen0_size(MIN2(_min_gen0_size, _max_gen0_size));
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/collectorPolicy.hpp
--- a/src/share/vm/memory/collectorPolicy.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/collectorPolicy.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -39,10 +39,12 @@
 // Forward declarations.
 class GenCollectorPolicy;
 class TwoGenerationCollectorPolicy;
+class AdaptiveSizePolicy;
 #ifndef SERIALGC
 class ConcurrentMarkSweepPolicy;
+class G1CollectorPolicy;
 #endif // SERIALGC
-class AdaptiveSizePolicy;
+
 class GCPolicyCounters;
 class PermanentGenerationSpec;
 class MarkSweepPolicy;
@@ -55,7 +57,7 @@
   // Requires that the concrete subclass sets the alignment constraints
   // before calling.
   virtual void initialize_flags();
-  virtual void initialize_size_info() = 0;
+  virtual void initialize_size_info();
   // Initialize "_permanent_generation" to a spec for the given kind of
   // Perm Gen.
   void initialize_perm_generation(PermGen::Name pgnm);
@@ -91,17 +93,18 @@
   enum Name {
     CollectorPolicyKind,
     TwoGenerationCollectorPolicyKind,
-    TrainPolicyKind,
     ConcurrentMarkSweepPolicyKind,
-    ASConcurrentMarkSweepPolicyKind
+    ASConcurrentMarkSweepPolicyKind,
+    G1CollectorPolicyKind
   };
 
   // Identification methods.
-  virtual GenCollectorPolicy*           as_generation_policy()          { return NULL; }
+  virtual GenCollectorPolicy*           as_generation_policy()            { return NULL; }
   virtual TwoGenerationCollectorPolicy* as_two_generation_policy()        { return NULL; }
   virtual MarkSweepPolicy*              as_mark_sweep_policy()            { return NULL; }
 #ifndef SERIALGC
   virtual ConcurrentMarkSweepPolicy*    as_concurrent_mark_sweep_policy() { return NULL; }
+  virtual G1CollectorPolicy*            as_g1_policy()                    { return NULL; }
 #endif // SERIALGC
   // Note that these are not virtual.
   bool is_generation_policy()            { return as_generation_policy() != NULL; }
@@ -109,10 +112,13 @@
   bool is_mark_sweep_policy()            { return as_mark_sweep_policy() != NULL; }
 #ifndef SERIALGC
   bool is_concurrent_mark_sweep_policy() { return as_concurrent_mark_sweep_policy() != NULL; }
+  bool is_g1_policy()                    { return as_g1_policy() != NULL; }
 #else  // SERIALGC
   bool is_concurrent_mark_sweep_policy() { return false; }
+  bool is_g1_policy()                    { return false; }
 #endif // SERIALGC
 
+
   virtual PermanentGenerationSpec *permanent_generation() {
     assert(_permanent_generation != NULL, "Sanity check");
     return _permanent_generation;
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/defNewGeneration.cpp
--- a/src/share/vm/memory/defNewGeneration.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/defNewGeneration.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -214,20 +214,26 @@
   MemRegion fromMR((HeapWord*)from_start, (HeapWord*)to_start);
   MemRegion toMR  ((HeapWord*)to_start, (HeapWord*)to_end);
 
-  eden()->initialize(edenMR, (minimum_eden_size == 0));
-  // If minumum_eden_size != 0, we will not have cleared any
-  // portion of eden above its top. This can cause newly
-  // expanded space not to be mangled if using ZapUnusedHeapArea.
-  // We explicitly do such mangling here.
-  if (ZapUnusedHeapArea && (minimum_eden_size != 0)) {
-    eden()->mangle_unused_area();
+  eden()->set_bounds(edenMR);
+  if (minimum_eden_size == 0) {
+    // The "minimum_eden_size" is really the amount of eden occupied by
+    // allocated objects -- if this is zero, then we can clear the space.
+    eden()->clear();
+  } else {
+    // Otherwise, we will not have cleared eden. This can cause newly
+    // expanded space not to be mangled if using ZapUnusedHeapArea.
+    // We explicitly do such mangling here.
+    if (ZapUnusedHeapArea) {
+      eden()->mangle_unused_area();
+    }
   }
-  from()->initialize(fromMR, true);
-    to()->initialize(toMR  , true);
-  eden()->set_next_compaction_space(from());
+  from()->set_bounds(fromMR); from()->clear();
+    to()->set_bounds(toMR);     to()->clear();
+  // Make sure we compact eden, then from.
   // The to-space is normally empty before a compaction so need
   // not be considered.  The exception is during promotion
   // failure handling when to-space can contain live objects.
+  eden()->set_next_compaction_space(from());
   from()->set_next_compaction_space(NULL);
 }
 
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/genCollectedHeap.hpp
--- a/src/share/vm/memory/genCollectedHeap.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/genCollectedHeap.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -252,6 +252,21 @@
   virtual size_t unsafe_max_tlab_alloc(Thread* thr) const;
   virtual HeapWord* allocate_new_tlab(size_t size);
 
+  // Can a compiler initialize a new object without store barriers?
+  // This permission only extends from the creation of a new object
+  // via a TLAB up to the first subsequent safepoint.
+  virtual bool can_elide_tlab_store_barriers() const {
+    return true;
+  }
+
+  // Can a compiler elide a store barrier when it writes
+  // a permanent oop into the heap?  Applies when the compiler
+  // is storing x to the heap, where x->is_perm() is true.
+  virtual bool can_elide_permanent_oop_store_barriers() const {
+    // CMS needs to see all, even intra-generational, ref updates.
+    return !UseConcMarkSweepGC;
+  }
+
   // The "requestor" generation is performing some garbage collection
   // action for which it would be useful to have scratch space.  The
   // requestor promises to allocate no more than "max_alloc_words" in any
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/genMarkSweep.hpp
--- a/src/share/vm/memory/genMarkSweep.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/genMarkSweep.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -24,6 +24,7 @@
 
 class GenMarkSweep : public MarkSweep {
   friend class VM_MarkSweep;
+  friend class G1MarkSweep;
  public:
   static void invoke_at_safepoint(int level, ReferenceProcessor* rp,
                                   bool clear_all_softrefs);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/genOopClosures.hpp
--- a/src/share/vm/memory/genOopClosures.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/genOopClosures.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -56,6 +56,9 @@
   // pointers must call the method below.
   template <class T> void do_barrier(T* p);
 
+  // Version for use by closures that may be called in parallel code.
+  void par_do_barrier(oop* p);
+
  public:
   OopsInGenClosure() : OopClosure(NULL),
     _orig_gen(NULL), _gen(NULL), _gen_boundary(NULL), _rs(NULL) {};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/genOopClosures.inline.hpp
--- a/src/share/vm/memory/genOopClosures.inline.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/genOopClosures.inline.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -48,6 +48,16 @@
   }
 }
 
+inline void OopsInGenClosure::par_do_barrier(oop* p) {
+  assert(generation()->is_in_reserved(p), "expected ref in generation");
+  oop obj = *p;
+  assert(obj != NULL, "expected non-null object");
+  // If p points to a younger generation, mark the card.
+  if ((HeapWord*)obj < gen_boundary()) {
+    rs()->write_ref_field_gc_par(p, obj);
+  }
+}
+
 // NOTE! Any changes made here should also be made
 // in FastScanClosure::do_oop_work()
 template <class T> inline void ScanClosure::do_oop_work(T* p) {
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/genRemSet.hpp
--- a/src/share/vm/memory/genRemSet.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/genRemSet.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -42,6 +42,7 @@
   };
 
   GenRemSet(BarrierSet * bs) : _bs(bs) {}
+  GenRemSet() : _bs(NULL) {}
 
   virtual Name rs_kind() = 0;
 
@@ -53,6 +54,9 @@
   // Return the barrier set associated with "this."
   BarrierSet* bs() { return _bs; }
 
+  // Set the barrier set.
+  void set_bs(BarrierSet* bs) { _bs = bs; }
+
   // Do any (sequential) processing necessary to prepare for (possibly
   // "parallel", if that arg is true) calls to younger_refs_iterate.
   virtual void prepare_for_younger_refs_iterate(bool parallel) = 0;
@@ -116,7 +120,10 @@
 
   // Informs the RS that refs in the given "mr" may have changed
   // arbitrarily, and therefore may contain old-to-young pointers.
-  virtual void invalidate(MemRegion mr) = 0;
+  // If "whole heap" is true, then this invalidation is part of an
+  // invalidation of the whole heap, which an implementation might
+  // handle differently than that of a sub-part of the heap.
+  virtual void invalidate(MemRegion mr, bool whole_heap = false) = 0;
 
   // Informs the RS that refs in this generation
   // may have changed arbitrarily, and therefore may contain
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/heapInspection.cpp
--- a/src/share/vm/memory/heapInspection.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/heapInspection.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -238,11 +238,14 @@
   HeapWord* ref;
 
   CollectedHeap* heap = Universe::heap();
+  bool is_shared_heap = false;
   switch (heap->kind()) {
+    case CollectedHeap::G1CollectedHeap:
     case CollectedHeap::GenCollectedHeap: {
-      GenCollectedHeap* gch = (GenCollectedHeap*)heap;
-      gch->gc_prologue(false /* !full */); // get any necessary locks
-      ref = gch->perm_gen()->used_region().start();
+      is_shared_heap = true;
+      SharedHeap* sh = (SharedHeap*)heap;
+      sh->gc_prologue(false /* !full */); // get any necessary locks, etc.
+      ref = sh->perm_gen()->used_region().start();
       break;
     }
 #ifndef SERIALGC
@@ -284,9 +287,9 @@
   }
   st->flush();
 
-  if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) {
-    GenCollectedHeap* gch = GenCollectedHeap::heap();
-    gch->gc_epilogue(false /* !full */); // release all acquired locks
+  if (is_shared_heap) {
+    SharedHeap* sh = (SharedHeap*)heap;
+    sh->gc_epilogue(false /* !full */); // release all acquired locks, etc.
   }
 }
 
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/iterator.hpp
--- a/src/share/vm/memory/iterator.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/iterator.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -26,9 +26,23 @@
 
 class ReferenceProcessor;
 
+// Closure provides abortability.
+
+class Closure : public StackObj {
+ protected:
+  bool _abort;
+  void set_abort() { _abort = true; }
+ public:
+  Closure() : _abort(false) {}
+  // A subtype can use this mechanism to indicate to some iterator mapping
+  // functions that the iteration should cease.
+  bool abort() { return _abort; }
+  void clear_abort() { _abort = false; }
+};
+
 // OopClosure is used for iterating through roots (oop*)
 
-class OopClosure : public StackObj {
+class OopClosure : public Closure {
  public:
   ReferenceProcessor* _ref_processor;
   OopClosure(ReferenceProcessor* rp) : _ref_processor(rp) { }
@@ -55,11 +69,16 @@
   Prefetch::style prefetch_style() { // Note that this is non-virtual.
     return Prefetch::do_none;
   }
+
+  // True iff this closure may be safely applied more than once to an oop
+  // location without an intervening "major reset" (like the end of a GC).
+  virtual bool idempotent() { return false; }
+  virtual bool apply_to_weak_ref_discovered_field() { return false; }
 };
 
 // ObjectClosure is used for iterating through an object space
 
-class ObjectClosure : public StackObj {
+class ObjectClosure : public Closure {
  public:
   // Called for each object.
   virtual void do_object(oop obj) = 0;
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/modRefBarrierSet.hpp
--- a/src/share/vm/memory/modRefBarrierSet.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/modRefBarrierSet.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -31,6 +31,13 @@
 
 class ModRefBarrierSet: public BarrierSet {
 public:
+
+  ModRefBarrierSet() { _kind = BarrierSet::ModRef; }
+
+  bool is_a(BarrierSet::Name bsn) {
+    return bsn == BarrierSet::ModRef;
+  }
+
   // Barriers only on ref writes.
   bool has_read_ref_barrier() { return false; }
   bool has_read_prim_barrier() { return false; }
@@ -85,8 +92,10 @@
                                         bool clear = false,
                                         bool before_save_marks = false) = 0;
 
-  // Causes all refs in "mr" to be assumed to be modified.
-  virtual void invalidate(MemRegion mr) = 0;
+  // Causes all refs in "mr" to be assumed to be modified.  If "whole_heap"
+  // is true, the caller asserts that the entire heap is being invalidated,
+  // which may admit an optimized implementation for some barriers.
+  virtual void invalidate(MemRegion mr, bool whole_heap = false) = 0;
 
   // The caller guarantees that "mr" contains no references.  (Perhaps it's
   // objects have been moved elsewhere.)
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/referenceProcessor.cpp
--- a/src/share/vm/memory/referenceProcessor.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/referenceProcessor.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -91,7 +91,8 @@
                                          bool               mt_discovery,
                                          BoolObjectClosure* is_alive_non_header,
                                          int                parallel_gc_threads,
-                                         bool               mt_processing) {
+                                         bool               mt_processing,
+                                         bool               dl_needs_barrier) {
   int mt_degree = 1;
   if (parallel_gc_threads > 1) {
     mt_degree = parallel_gc_threads;
@@ -99,7 +100,8 @@
   ReferenceProcessor* rp =
     new ReferenceProcessor(span, atomic_discovery,
                            mt_discovery, mt_degree,
-                           mt_processing && (parallel_gc_threads > 0));
+                           mt_processing && (parallel_gc_threads > 0),
+                           dl_needs_barrier);
   if (rp == NULL) {
     vm_exit_during_initialization("Could not allocate ReferenceProcessor object");
   }
@@ -111,10 +113,13 @@
                                        bool      atomic_discovery,
                                        bool      mt_discovery,
                                        int       mt_degree,
-                                       bool      mt_processing) :
+                                       bool      mt_processing,
+                                       bool      discovered_list_needs_barrier)  :
   _discovering_refs(false),
   _enqueuing_is_done(false),
   _is_alive_non_header(NULL),
+  _discovered_list_needs_barrier(discovered_list_needs_barrier),
+  _bs(NULL),
   _processing_is_mt(mt_processing),
   _next_id(0)
 {
@@ -135,6 +140,10 @@
         _discoveredSoftRefs[i].set_head(sentinel_ref());
     _discoveredSoftRefs[i].set_length(0);
   }
+  // If we do barreirs, cache a copy of the barrier set.
+  if (discovered_list_needs_barrier) {
+    _bs = Universe::heap()->barrier_set();
+  }
 }
 
 #ifndef PRODUCT
@@ -727,10 +736,15 @@
   refs_list.set_length(0);
 }
 
-void
-ReferenceProcessor::abandon_partial_discovered_list_arr(DiscoveredList refs_lists[]) {
-  for (int i = 0; i < _num_q; i++) {
-    abandon_partial_discovered_list(refs_lists[i]);
+void ReferenceProcessor::abandon_partial_discovery() {
+  // loop over the lists
+  for (int i = 0; i < _num_q * subclasses_of_ref; i++) {
+    if (TraceReferenceGC && PrintGCDetails && ((i % _num_q) == 0)) {
+      gclog_or_tty->print_cr(
+        "\nAbandoning %s discovered list",
+        list_name(i));
+    }
+    abandon_partial_discovered_list(_discoveredSoftRefs[i]);
   }
 }
 
@@ -994,7 +1008,16 @@
   assert(_discovery_is_mt, "!_discovery_is_mt should have been handled by caller");
   // First we must make sure this object is only enqueued once. CAS in a non null
   // discovered_addr.
-  oop retest = oopDesc::atomic_compare_exchange_oop(refs_list.head(), discovered_addr,
+  oop current_head = refs_list.head();
+
+  // Note: In the case of G1, this pre-barrier is strictly
+  // not necessary because the only case we are interested in
+  // here is when *discovered_addr is NULL, so this will expand to
+  // nothing. As a result, I am just manually eliding this out for G1.
+  if (_discovered_list_needs_barrier && !UseG1GC) {
+    _bs->write_ref_field_pre((void*)discovered_addr, current_head); guarantee(false, "Needs to be fixed: YSR");
+  }
+  oop retest = oopDesc::atomic_compare_exchange_oop(current_head, discovered_addr,
                                                     NULL);
   if (retest == NULL) {
     // This thread just won the right to enqueue the object.
@@ -1002,6 +1025,10 @@
     // is necessary.
     refs_list.set_head(obj);
     refs_list.set_length(refs_list.length() + 1);
+    if (_discovered_list_needs_barrier) {
+      _bs->write_ref_field((void*)discovered_addr, current_head); guarantee(false, "Needs to be fixed: YSR");
+    }
+
   } else {
     // If retest was non NULL, another thread beat us to it:
     // The reference has already been discovered...
@@ -1073,8 +1100,8 @@
     }
   }
 
-  HeapWord* discovered_addr = java_lang_ref_Reference::discovered_addr(obj);
-  oop  discovered = java_lang_ref_Reference::discovered(obj);
+  HeapWord* const discovered_addr = java_lang_ref_Reference::discovered_addr(obj);
+  const oop  discovered = java_lang_ref_Reference::discovered(obj);
   assert(discovered->is_oop_or_null(), "bad discovered field");
   if (discovered != NULL) {
     // The reference has already been discovered...
@@ -1094,7 +1121,7 @@
       // discovered twice except by concurrent collectors that potentially
       // trace the same Reference object twice.
       assert(UseConcMarkSweepGC,
-             "Only possible with a concurrent collector");
+             "Only possible with an incremental-update concurrent collector");
       return true;
     }
   }
@@ -1122,12 +1149,24 @@
     return false;   // nothing special needs to be done
   }
 
-  // We do a raw store here, the field will be visited later when
-  // processing the discovered references.
   if (_discovery_is_mt) {
     add_to_discovered_list_mt(*list, obj, discovered_addr);
   } else {
-    oop_store_raw(discovered_addr, list->head());
+    // If "_discovered_list_needs_barrier", we do write barriers when
+    // updating the discovered reference list.  Otherwise, we do a raw store
+    // here: the field will be visited later when processing the discovered
+    // references.
+    oop current_head = list->head();
+    // As in the case further above, since we are over-writing a NULL
+    // pre-value, we can safely elide the pre-barrier here for the case of G1.
+    assert(discovered == NULL, "control point invariant");
+    if (_discovered_list_needs_barrier && !UseG1GC) { // safe to elide for G1
+      _bs->write_ref_field_pre((oop*)discovered_addr, current_head);
+    }
+    oop_store_raw(discovered_addr, current_head);
+    if (_discovered_list_needs_barrier) {
+      _bs->write_ref_field((oop*)discovered_addr, current_head);
+    }
     list->set_head(obj);
     list->set_length(list->length() + 1);
   }
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/referenceProcessor.hpp
--- a/src/share/vm/memory/referenceProcessor.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/referenceProcessor.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -54,6 +54,14 @@
   bool        _discovery_is_atomic;   // if discovery is atomic wrt
                                       // other collectors in configuration
   bool        _discovery_is_mt;       // true if reference discovery is MT.
+  // If true, setting "next" field of a discovered refs list requires
+  // write barrier(s).  (Must be true if used in a collector in which
+  // elements of a discovered list may be moved during discovery: for
+  // example, a collector like Garbage-First that moves objects during a
+  // long-term concurrent marking phase that does weak reference
+  // discovery.)
+  bool        _discovered_list_needs_barrier;
+  BarrierSet* _bs;                    // Cached copy of BarrierSet.
   bool        _enqueuing_is_done;     // true if all weak references enqueued
   bool        _processing_is_mt;      // true during phases when
                                       // reference processing is MT.
@@ -196,7 +204,6 @@
   void verify_ok_to_handle_reflists() PRODUCT_RETURN;
 
   void abandon_partial_discovered_list(DiscoveredList& refs_list);
-  void abandon_partial_discovered_list_arr(DiscoveredList refs_lists[]);
 
   // Calculate the number of jni handles.
   unsigned int count_jni_refs();
@@ -217,6 +224,8 @@
     _discovery_is_atomic(true),
     _enqueuing_is_done(false),
     _discovery_is_mt(false),
+    _discovered_list_needs_barrier(false),
+    _bs(NULL),
     _is_alive_non_header(NULL),
     _num_q(0),
     _processing_is_mt(false),
@@ -224,8 +233,10 @@
   {}
 
   ReferenceProcessor(MemRegion span, bool atomic_discovery,
-                     bool mt_discovery, int mt_degree = 1,
-                     bool mt_processing = false);
+                     bool mt_discovery,
+                     int mt_degree = 1,
+                     bool mt_processing = false,
+                     bool discovered_list_needs_barrier = false);
 
   // Allocates and initializes a reference processor.
   static ReferenceProcessor* create_ref_processor(
@@ -234,8 +245,8 @@
     bool               mt_discovery,
     BoolObjectClosure* is_alive_non_header = NULL,
     int                parallel_gc_threads = 1,
-    bool               mt_processing = false);
-
+    bool               mt_processing = false,
+    bool               discovered_list_needs_barrier = false);
   // RefDiscoveryPolicy values
   enum {
     ReferenceBasedDiscovery = 0,
@@ -296,6 +307,11 @@
   // Enqueue references at end of GC (called by the garbage collector)
   bool enqueue_discovered_references(AbstractRefProcTaskExecutor* task_executor = NULL);
 
+  // If a discovery is in process that is being superceded, abandon it: all
+  // the discovered lists will be empty, and all the objects on them will
+  // have NULL discovered fields.  Must be called only at a safepoint.
+  void abandon_partial_discovery();
+
   // debugging
   void verify_no_references_recorded() PRODUCT_RETURN;
   static void verify();
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/sharedHeap.cpp
--- a/src/share/vm/memory/sharedHeap.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/sharedHeap.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -57,15 +57,24 @@
   }
   _sh = this;  // ch is static, should be set only once.
   if ((UseParNewGC ||
-      (UseConcMarkSweepGC && CMSParallelRemarkEnabled)) &&
+      (UseConcMarkSweepGC && CMSParallelRemarkEnabled) ||
+       UseG1GC) &&
       ParallelGCThreads > 0) {
-    _workers = new WorkGang("Parallel GC Threads", ParallelGCThreads, true);
+    _workers = new WorkGang("Parallel GC Threads", ParallelGCThreads,
+                            /* are_GC_task_threads */true,
+                            /* are_ConcurrentGC_threads */false);
     if (_workers == NULL) {
       vm_exit_during_initialization("Failed necessary allocation.");
     }
   }
 }
 
+bool SharedHeap::heap_lock_held_for_gc() {
+  Thread* t = Thread::current();
+  return    Heap_lock->owned_by_self()
+         || (   (t->is_GC_task_thread() ||  t->is_VM_thread())
+             && _thread_holds_heap_lock_for_gc);
+}
 
 void SharedHeap::set_par_threads(int t) {
   _n_par_threads = t;
@@ -280,10 +289,11 @@
 }
 
 // Some utilities.
-void SharedHeap::print_size_transition(size_t bytes_before,
+void SharedHeap::print_size_transition(outputStream* out,
+                                       size_t bytes_before,
                                        size_t bytes_after,
                                        size_t capacity) {
-  tty->print(" %d%s->%d%s(%d%s)",
+  out->print(" %d%s->%d%s(%d%s)",
              byte_size_in_proper_unit(bytes_before),
              proper_unit_for_byte_size(bytes_before),
              byte_size_in_proper_unit(bytes_after),
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/sharedHeap.hpp
--- a/src/share/vm/memory/sharedHeap.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/sharedHeap.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -44,6 +44,9 @@
 class SharedHeap : public CollectedHeap {
   friend class VMStructs;
 
+  friend class VM_GC_Operation;
+  friend class VM_CGC_Operation;
+
 private:
   // For claiming strong_roots tasks.
   SubTasksDone* _process_strong_tasks;
@@ -82,6 +85,14 @@
   // function.
   SharedHeap(CollectorPolicy* policy_);
 
+  // Returns true if the calling thread holds the heap lock,
+  // or the calling thread is a par gc thread and the heap_lock is held
+  // by the vm thread doing a gc operation.
+  bool heap_lock_held_for_gc();
+  // True if the heap_lock is held by the a non-gc thread invoking a gc
+  // operation.
+  bool _thread_holds_heap_lock_for_gc;
+
 public:
   static SharedHeap* heap() { return _sh; }
 
@@ -97,8 +108,8 @@
 
   void set_perm(PermGen* perm_gen) { _perm_gen = perm_gen; }
 
-  // A helper function that fills an allocated-but-not-yet-initialized
-  // region with a garbage object.
+  // A helper function that fills a region of the heap with
+  // with a single object.
   static void fill_region_with_object(MemRegion mr);
 
   // Minimum garbage fill object size
@@ -214,13 +225,12 @@
   // "SharedHeap" can use in the implementation of its virtual
   // functions.
 
-protected:
+public:
 
   // Do anything common to GC's.
   virtual void gc_prologue(bool full) = 0;
   virtual void gc_epilogue(bool full) = 0;
 
-public:
   //
   // New methods from CollectedHeap
   //
@@ -266,7 +276,8 @@
   }
 
   // Some utilities.
-  void print_size_transition(size_t bytes_before,
+  void print_size_transition(outputStream* out,
+                             size_t bytes_before,
                              size_t bytes_after,
                              size_t capacity);
 };
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/space.cpp
--- a/src/share/vm/memory/space.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/space.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -105,7 +105,7 @@
          "Only ones we deal with for now.");
 
   assert(_precision != CardTableModRefBS::ObjHeadPreciseArray ||
-         _last_bottom == NULL ||
+         _cl->idempotent() || _last_bottom == NULL ||
          top <= _last_bottom,
          "Not decreasing");
   NOT_PRODUCT(_last_bottom = mr.start());
@@ -144,7 +144,14 @@
     walk_mem_region(mr, bottom_obj, top);
   }
 
-  _min_done = bottom;
+  // An idempotent closure might be applied in any order, so we don't
+  // record a _min_done for it.
+  if (!_cl->idempotent()) {
+    _min_done = bottom;
+  } else {
+    assert(_min_done == _last_explicit_min_done,
+           "Don't update _min_done for idempotent cl");
+  }
 }
 
 DirtyCardToOopClosure* Space::new_dcto_cl(OopClosure* cl,
@@ -232,13 +239,17 @@
   return new ContiguousSpaceDCTOC(this, cl, precision, boundary);
 }
 
-void Space::initialize(MemRegion mr, bool clear_space) {
+void Space::set_bounds(MemRegion mr) {
   HeapWord* bottom = mr.start();
   HeapWord* end    = mr.end();
   assert(Universe::on_page_boundary(bottom) && Universe::on_page_boundary(end),
          "invalid space boundaries");
   set_bottom(bottom);
   set_end(end);
+}
+
+void Space::initialize(MemRegion mr, bool clear_space) {
+  set_bounds(mr);
   if (clear_space) clear();
 }
 
@@ -246,20 +257,35 @@
   if (ZapUnusedHeapArea) mangle_unused_area();
 }
 
-void ContiguousSpace::initialize(MemRegion mr, bool clear_space)
-{
-  CompactibleSpace::initialize(mr, clear_space);
-  _concurrent_iteration_safe_limit = top();
+void CompactibleSpace::initialize(MemRegion mr, bool clear_space) {
+  Space::initialize(mr, false); // We'll do the clearing if there's
+                                // clearing to be done.
+  _compaction_top = bottom();
+  _next_compaction_space = NULL;
+  if (clear_space) clear();
+}
+
+void CompactibleSpace::clear() {
+  _compaction_top = bottom();
+  Space::clear();
+}
+
+void ContiguousSpace::initialize(MemRegion mr, bool clear_space) {
+  CompactibleSpace::initialize(mr, false); // We'll do the clearing if there's
+                                           // clearing to be done.
+  set_top(bottom());
+  set_saved_mark();
+  if (clear_space) clear();
 }
 
 void ContiguousSpace::clear() {
   set_top(bottom());
   set_saved_mark();
-  Space::clear();
+  CompactibleSpace::clear();
 }
 
 bool Space::is_in(const void* p) const {
-  HeapWord* b = block_start(p);
+  HeapWord* b = block_start_const(p);
   return b != NULL && block_is_obj(b);
 }
 
@@ -271,8 +297,17 @@
   return p >= _top;
 }
 
+void OffsetTableContigSpace::initialize(MemRegion mr, bool clear_space) {
+  // false ==> we'll do the clearing if there's clearing to be done.
+  ContiguousSpace::initialize(mr, false);
+  _offsets.zero_bottom_entry();
+  _offsets.initialize_threshold();
+  if (clear_space) clear();
+}
+
 void OffsetTableContigSpace::clear() {
   ContiguousSpace::clear();
+  _offsets.zero_bottom_entry();
   _offsets.initialize_threshold();
 }
 
@@ -297,12 +332,6 @@
   debug_only(Copy::fill_to_words(mr.start(), mr.word_size(), badHeapWord));
 }
 
-void CompactibleSpace::initialize(MemRegion mr, bool clear_space) {
-  Space::initialize(mr, clear_space);
-  _compaction_top = bottom();
-  _next_compaction_space = NULL;
-}
-
 HeapWord* CompactibleSpace::forward(oop q, size_t size,
                                     CompactPoint* cp, HeapWord* compact_top) {
   // q is alive
@@ -477,8 +506,8 @@
   }
   guarantee(p == top(), "end of last object must match end of space");
   if (top() != end()) {
-    guarantee(top() == block_start(end()-1) &&
-              top() == block_start(top()),
+    guarantee(top() == block_start_const(end()-1) &&
+              top() == block_start_const(top()),
               "top should be start of unallocated block, if it exists");
   }
 }
@@ -710,7 +739,7 @@
 #undef ContigSpace_OOP_SINCE_SAVE_MARKS_DEFN
 
 // Very general, slow implementation.
-HeapWord* ContiguousSpace::block_start(const void* p) const {
+HeapWord* ContiguousSpace::block_start_const(const void* p) const {
   assert(MemRegion(bottom(), end()).contains(p), "p not in space");
   if (p >= top()) {
     return top();
@@ -913,7 +942,8 @@
     // For a sampling of objects in the space, find it using the
     // block offset table.
     if (blocks == BLOCK_SAMPLE_INTERVAL) {
-      guarantee(p == block_start(p + (size/2)), "check offset computation");
+      guarantee(p == block_start_const(p + (size/2)),
+                "check offset computation");
       blocks = 0;
     } else {
       blocks++;
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/space.hpp
--- a/src/share/vm/memory/space.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/space.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -105,7 +105,7 @@
   virtual void set_bottom(HeapWord* value) { _bottom = value; }
   virtual void set_end(HeapWord* value)    { _end = value; }
 
-  HeapWord* saved_mark_word() const  { return _saved_mark_word; }
+  virtual HeapWord* saved_mark_word() const  { return _saved_mark_word; }
   void set_saved_mark_word(HeapWord* p) { _saved_mark_word = p; }
 
   MemRegionClosure* preconsumptionDirtyCardClosure() const {
@@ -131,8 +131,18 @@
     return MemRegion(bottom(), saved_mark_word());
   }
 
-  // Initialization
+  // Initialization.
+  // "initialize" should be called once on a space, before it is used for
+  // any purpose.  The "mr" arguments gives the bounds of the space, and
+  // the "clear_space" argument should be true unless the memory in "mr" is
+  // known to be zeroed.
   virtual void initialize(MemRegion mr, bool clear_space);
+
+  // Sets the bounds (bottom and end) of the current space to those of "mr."
+  void set_bounds(MemRegion mr);
+
+  // The "clear" method must be called on a region that may have
+  // had allocation performed in it, but is now to be considered empty.
   virtual void clear();
 
   // For detecting GC bugs.  Should only be called at GC boundaries, since
@@ -216,7 +226,13 @@
   // "block" that contains "p".  We say "block" instead of "object" since
   // some heaps may not pack objects densely; a chunk may either be an
   // object or a non-object.  If "p" is not in the space, return NULL.
-  virtual HeapWord* block_start(const void* p) const = 0;
+  virtual HeapWord* block_start_const(const void* p) const = 0;
+
+  // The non-const version may have benevolent side effects on the data
+  // structure supporting these calls, possibly speeding up future calls.
+  // The default implementation, however, is simply to call the const
+  // version.
+  inline virtual HeapWord* block_start(const void* p);
 
   // Requires "addr" to be the start of a chunk, and returns its size.
   // "addr + size" is required to be the start of a new chunk, or the end
@@ -282,12 +298,13 @@
   CardTableModRefBS::PrecisionStyle _precision;
   HeapWord* _boundary;          // If non-NULL, process only non-NULL oops
                                 // pointing below boundary.
-  HeapWord* _min_done;                // ObjHeadPreciseArray precision requires
+  HeapWord* _min_done;          // ObjHeadPreciseArray precision requires
                                 // a downwards traversal; this is the
                                 // lowest location already done (or,
                                 // alternatively, the lowest address that
                                 // shouldn't be done again.  NULL means infinity.)
   NOT_PRODUCT(HeapWord* _last_bottom;)
+  NOT_PRODUCT(HeapWord* _last_explicit_min_done;)
 
   // Get the actual top of the area on which the closure will
   // operate, given where the top is assumed to be (the end of the
@@ -311,13 +328,15 @@
                         HeapWord* boundary) :
     _sp(sp), _cl(cl), _precision(precision), _boundary(boundary),
     _min_done(NULL) {
-    NOT_PRODUCT(_last_bottom = NULL;)
+    NOT_PRODUCT(_last_bottom = NULL);
+    NOT_PRODUCT(_last_explicit_min_done = NULL);
   }
 
   void do_MemRegion(MemRegion mr);
 
   void set_min_done(HeapWord* min_done) {
     _min_done = min_done;
+    NOT_PRODUCT(_last_explicit_min_done = _min_done);
   }
 #ifndef PRODUCT
   void set_last_bottom(HeapWord* last_bottom) {
@@ -355,6 +374,7 @@
 
 public:
   virtual void initialize(MemRegion mr, bool clear_space);
+  virtual void clear();
 
   // Used temporarily during a compaction phase to hold the value
   // top should have when compaction is complete.
@@ -511,7 +531,7 @@
       /* prefetch beyond q */                                                \
       Prefetch::write(q, interval);                                          \
       /* size_t size = oop(q)->size();  changing this for cms for perm gen */\
-      size_t size = block_size(q);                                             \
+      size_t size = block_size(q);                                           \
       compact_top = cp->space->forward(oop(q), size, cp, compact_top);       \
       q += size;                                                             \
       end_of_live = q;                                                       \
@@ -575,68 +595,68 @@
   cp->space->set_compaction_top(compact_top);                                \
 }
 
-#define SCAN_AND_ADJUST_POINTERS(adjust_obj_size) {                                \
-  /* adjust all the interior pointers to point at the new locations of objects        \
-   * Used by MarkSweep::mark_sweep_phase3() */                                        \
+#define SCAN_AND_ADJUST_POINTERS(adjust_obj_size) {                             \
+  /* adjust all the interior pointers to point at the new locations of objects  \
+   * Used by MarkSweep::mark_sweep_phase3() */                                  \
                                                                                 \
-  HeapWord* q = bottom();                                                        \
-  HeapWord* t = _end_of_live;  /* Established by "prepare_for_compaction". */        \
+  HeapWord* q = bottom();                                                       \
+  HeapWord* t = _end_of_live;  /* Established by "prepare_for_compaction". */   \
                                                                                 \
-  assert(_first_dead <= _end_of_live, "Stands to reason, no?");                        \
+  assert(_first_dead <= _end_of_live, "Stands to reason, no?");                 \
                                                                                 \
-  if (q < t && _first_dead > q &&                                                \
+  if (q < t && _first_dead > q &&                                               \
       !oop(q)->is_gc_marked()) {                                                \
     /* we have a chunk of the space which hasn't moved and we've                \
      * reinitialized the mark word during the previous pass, so we can't        \
-     * use is_gc_marked for the traversal. */                                        \
+     * use is_gc_marked for the traversal. */                                   \
     HeapWord* end = _first_dead;                                                \
                                                                                 \
-    while (q < end) {                                                                \
-      /* I originally tried to conjoin "block_start(q) == q" to the                \
-       * assertion below, but that doesn't work, because you can't                \
-       * accurately traverse previous objects to get to the current one                \
-       * after their pointers (including pointers into permGen) have been        \
-       * updated, until the actual compaction is done.  dld, 4/00 */                \
-      assert(block_is_obj(q),                                                        \
-             "should be at block boundaries, and should be looking at objs");        \
+    while (q < end) {                                                           \
+      /* I originally tried to conjoin "block_start(q) == q" to the             \
+       * assertion below, but that doesn't work, because you can't              \
+       * accurately traverse previous objects to get to the current one         \
+       * after their pointers (including pointers into permGen) have been       \
+       * updated, until the actual compaction is done.  dld, 4/00 */            \
+      assert(block_is_obj(q),                                                   \
+             "should be at block boundaries, and should be looking at objs");   \
                                                                                 \
       VALIDATE_MARK_SWEEP_ONLY(MarkSweep::track_interior_pointers(oop(q)));     \
                                                                                 \
-      /* point all the oops to the new location */                                \
-      size_t size = oop(q)->adjust_pointers();                                        \
-      size = adjust_obj_size(size);                                                \
+      /* point all the oops to the new location */                              \
+      size_t size = oop(q)->adjust_pointers();                                  \
+      size = adjust_obj_size(size);                                             \
                                                                                 \
       VALIDATE_MARK_SWEEP_ONLY(MarkSweep::check_interior_pointers());           \
-                                                                                      \
+                                                                                \
       VALIDATE_MARK_SWEEP_ONLY(MarkSweep::validate_live_oop(oop(q), size));     \
-                                                                                      \
+                                                                                \
       q += size;                                                                \
-    }                                                                                \
+    }                                                                           \
                                                                                 \
-    if (_first_dead == t) {                                                        \
-      q = t;                                                                        \
-    } else {                                                                        \
-      /* $$$ This is funky.  Using this to read the previously written                \
-       * LiveRange.  See also use below. */                                        \
+    if (_first_dead == t) {                                                     \
+      q = t;                                                                    \
+    } else {                                                                    \
+      /* $$$ This is funky.  Using this to read the previously written          \
+       * LiveRange.  See also use below. */                                     \
       q = (HeapWord*)oop(_first_dead)->mark()->decode_pointer();                \
-    }                                                                                \
-  }                                                                                \
+    }                                                                           \
+  }                                                                             \
                                                                                 \
   const intx interval = PrefetchScanIntervalInBytes;                            \
                                                                                 \
-  debug_only(HeapWord* prev_q = NULL);                                                \
-  while (q < t) {                                                                \
-    /* prefetch beyond q */                                                        \
+  debug_only(HeapWord* prev_q = NULL);                                          \
+  while (q < t) {                                                               \
+    /* prefetch beyond q */                                                     \
     Prefetch::write(q, interval);                                               \
-    if (oop(q)->is_gc_marked()) {                                                \
-      /* q is alive */                                                                \
+    if (oop(q)->is_gc_marked()) {                                               \
+      /* q is alive */                                                          \
       VALIDATE_MARK_SWEEP_ONLY(MarkSweep::track_interior_pointers(oop(q)));     \
-      /* point all the oops to the new location */                                \
-      size_t size = oop(q)->adjust_pointers();                                        \
-      size = adjust_obj_size(size);                                                \
-      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::check_interior_pointers());                \
+      /* point all the oops to the new location */                              \
+      size_t size = oop(q)->adjust_pointers();                                  \
+      size = adjust_obj_size(size);                                             \
+      VALIDATE_MARK_SWEEP_ONLY(MarkSweep::check_interior_pointers());           \
       VALIDATE_MARK_SWEEP_ONLY(MarkSweep::validate_live_oop(oop(q), size));     \
-      debug_only(prev_q = q);                                                        \
+      debug_only(prev_q = q);                                                   \
       q += size;                                                                \
     } else {                                                                        \
       /* q is not a live object, so its mark should point at the next                \
@@ -716,6 +736,8 @@
     }                                                                                \
   }                                                                                \
                                                                                 \
+  /* Let's remember if we were empty before we did the compaction. */           \
+  bool was_empty = used_region().is_empty();                                    \
   /* Reset space after compaction is complete */                                \
   reset_after_compaction();                                                        \
   /* We do this clear, below, since it has overloaded meanings for some */      \
@@ -723,8 +745,8 @@
   /* compacted into will have had their offset table thresholds updated */      \
   /* continuously, but those that weren't need to have their thresholds */      \
   /* re-initialized.  Also mangles unused area for debugging.           */      \
-  if (is_empty()) {                                                             \
-    clear();                                                                    \
+  if (used_region().is_empty()) {                                               \
+    if (!was_empty) clear();                                                    \
   } else {                                                                      \
     if (ZapUnusedHeapArea) mangle_unused_area();                                \
   }                                                                             \
@@ -750,8 +772,8 @@
   HeapWord* top() const            { return _top;    }
   void set_top(HeapWord* value)    { _top = value; }
 
-  void set_saved_mark()       { _saved_mark_word = top();    }
-  void reset_saved_mark()     { _saved_mark_word = bottom(); }
+  virtual void set_saved_mark()    { _saved_mark_word = top();    }
+  void reset_saved_mark()          { _saved_mark_word = bottom(); }
 
   virtual void clear();
 
@@ -843,7 +865,7 @@
   virtual void object_iterate_from(WaterMark mark, ObjectClosure* blk);
 
   // Very inefficient implementation.
-  virtual HeapWord* block_start(const void* p) const;
+  virtual HeapWord* block_start_const(const void* p) const;
   size_t block_size(const HeapWord* p) const;
   // If a block is in the allocated area, it is an object.
   bool block_is_obj(const HeapWord* p) const { return p < top(); }
@@ -1000,9 +1022,10 @@
   void set_bottom(HeapWord* value);
   void set_end(HeapWord* value);
 
+  virtual void initialize(MemRegion mr, bool clear_space);
   void clear();
 
-  inline HeapWord* block_start(const void* p) const;
+  inline HeapWord* block_start_const(const void* p) const;
 
   // Add offset table update.
   virtual inline HeapWord* allocate(size_t word_size);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/space.inline.hpp
--- a/src/share/vm/memory/space.inline.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/space.inline.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -22,6 +22,10 @@
  *
  */
 
+inline HeapWord* Space::block_start(const void* p) {
+  return block_start_const(p);
+}
+
 inline HeapWord* OffsetTableContigSpace::allocate(size_t size) {
   HeapWord* res = ContiguousSpace::allocate(size);
   if (res != NULL) {
@@ -50,7 +54,8 @@
   return res;
 }
 
-inline HeapWord* OffsetTableContigSpace::block_start(const void* p) const {
+inline HeapWord*
+OffsetTableContigSpace::block_start_const(const void* p) const {
   return _offsets.block_start(p);
 }
 
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/specialized_oop_closures.hpp
--- a/src/share/vm/memory/specialized_oop_closures.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/specialized_oop_closures.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -59,6 +59,12 @@
 // This is split into several because of a Visual C++ 6.0 compiler bug
 // where very long macros cause the compiler to crash
 
+// Some other heap might define further specialized closures.
+#ifndef FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES
+#define FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f) \
+        /* None */
+#endif
+
 #define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_S(f)       \
   f(ScanClosure,_nv)                                    \
   f(FastScanClosure,_nv)                                \
@@ -77,7 +83,7 @@
   SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_P(f)
 
 #ifndef SERIALGC
-#define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_3(f)       \
+#define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(f)       \
   f(MarkRefsIntoAndScanClosure,_nv)                     \
   f(Par_MarkRefsIntoAndScanClosure,_nv)                 \
   f(PushAndMarkClosure,_nv)                             \
@@ -85,11 +91,13 @@
   f(PushOrMarkClosure,_nv)                              \
   f(Par_PushOrMarkClosure,_nv)                          \
   f(CMSKeepAliveClosure,_nv)                            \
-  f(CMSInnerParMarkAndPushClosure,_nv)
+  f(CMSInnerParMarkAndPushClosure,_nv)                  \
+  FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f)
 #else  // SERIALGC
-#define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_3(f)
+#define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(f)
 #endif // SERIALGC
 
+
 // We separate these out, because sometime the general one has
 // a different definition from the specialized ones, and sometimes it
 // doesn't.
@@ -98,8 +106,8 @@
   f(OopClosure,_v)                                      \
   SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_1(f)
 
-#define ALL_OOP_OOP_ITERATE_CLOSURES_3(f)               \
-  SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_3(f)
+#define ALL_OOP_OOP_ITERATE_CLOSURES_2(f)               \
+  SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(f)
 
 #ifndef SERIALGC
 // This macro applies an argument macro to all OopClosures for which we
@@ -125,6 +133,13 @@
 // The "root_class" is the most general class to define; this may be
 // "OopClosure" in some applications and "OopsInGenClosure" in others.
 
+
+// Some other heap might define further specialized closures.
+#ifndef FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES
+#define FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(f) \
+        /* None */
+#endif
+
 #define SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES_YOUNG_S(f) \
   f(ScanClosure,_nv)                                     \
   f(FastScanClosure,_nv)
@@ -132,7 +147,8 @@
 #ifndef SERIALGC
 #define SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES_YOUNG_P(f) \
   f(ParScanWithBarrierClosure,_nv)                       \
-  f(ParScanWithoutBarrierClosure,_nv)
+  f(ParScanWithoutBarrierClosure,_nv)                    \
+  FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(f)
 #else  // SERIALGC
 #define SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES_YOUNG_P(f)
 #endif // SERIALGC
@@ -179,13 +195,15 @@
 
 #if ENABLE_SPECIALIZATION_STATS
 private:
-  static int _numCallsAll;
+  static bool _init;
+  static bool _wrapped;
+  static jint _numCallsAll;
 
-  static int _numCallsTotal[NUM_Kinds];
-  static int _numCalls_nv[NUM_Kinds];
+  static jint _numCallsTotal[NUM_Kinds];
+  static jint _numCalls_nv[NUM_Kinds];
 
-  static int _numDoOopCallsTotal[NUM_Kinds];
-  static int _numDoOopCalls_nv[NUM_Kinds];
+  static jint _numDoOopCallsTotal[NUM_Kinds];
+  static jint _numDoOopCalls_nv[NUM_Kinds];
 public:
 #endif
   static void clear()  PRODUCT_RETURN;
@@ -203,22 +221,22 @@
 #if ENABLE_SPECIALIZATION_STATS
 
 inline void SpecializationStats::record_call() {
-  _numCallsAll++;;
+  Atomic::inc(&_numCallsAll);
 }
 inline void SpecializationStats::record_iterate_call_v(Kind k) {
-  _numCallsTotal[k]++;
+  Atomic::inc(&_numCallsTotal[k]);
 }
 inline void SpecializationStats::record_iterate_call_nv(Kind k) {
-  _numCallsTotal[k]++;
-  _numCalls_nv[k]++;
+  Atomic::inc(&_numCallsTotal[k]);
+  Atomic::inc(&_numCalls_nv[k]);
 }
 
 inline void SpecializationStats::record_do_oop_call_v(Kind k) {
-  _numDoOopCallsTotal[k]++;
+  Atomic::inc(&_numDoOopCallsTotal[k]);
 }
 inline void SpecializationStats::record_do_oop_call_nv(Kind k) {
-  _numDoOopCallsTotal[k]++;
-  _numDoOopCalls_nv[k]++;
+  Atomic::inc(&_numDoOopCallsTotal[k]);
+  Atomic::inc(&_numDoOopCalls_nv[k]);
 }
 
 #else   // !ENABLE_SPECIALIZATION_STATS
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/memory/universe.cpp
--- a/src/share/vm/memory/universe.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/memory/universe.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -734,6 +734,15 @@
     fatal("UseParallelGC not supported in java kernel vm.");
 #endif // SERIALGC
 
+  } else if (UseG1GC) {
+#ifndef SERIALGC
+    G1CollectorPolicy* g1p = new G1CollectorPolicy_BestRegionsFirst();
+    G1CollectedHeap* g1h = new G1CollectedHeap(g1p);
+    Universe::_collectedHeap = g1h;
+#else  // SERIALGC
+    fatal("UseG1GC not supported in java kernel vm.");
+#endif // SERIALGC
+
   } else {
     GenCollectorPolicy *gc_policy;
 
@@ -933,7 +942,10 @@
 
   // This needs to be done before the first scavenge/gc, since
   // it's an input to soft ref clearing policy.
-  Universe::update_heap_info_at_gc();
+  {
+    MutexLocker x(Heap_lock);
+    Universe::update_heap_info_at_gc();
+  }
 
   // ("weak") refs processing infrastructure initialization
   Universe::heap()->post_initialize();
@@ -1189,10 +1201,11 @@
     // ???: What if a CollectedHeap doesn't have a permanent generation?
     ShouldNotReachHere();
     break;
-  case CollectedHeap::GenCollectedHeap: {
-    GenCollectedHeap* gch = (GenCollectedHeap*) Universe::heap();
-    permanent_reserved = gch->perm_gen()->reserved();
-    break;
+  case CollectedHeap::GenCollectedHeap:
+  case CollectedHeap::G1CollectedHeap: {
+    SharedHeap* sh = (SharedHeap*) Universe::heap();
+    permanent_reserved = sh->perm_gen()->reserved();
+   break;
   }
 #ifndef SERIALGC
   case CollectedHeap::ParallelScavengeHeap: {
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/generateOopMap.cpp
--- a/src/share/vm/oops/generateOopMap.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/oops/generateOopMap.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -370,21 +370,8 @@
 void GenerateOopMap ::initialize_bb() {
   _gc_points = 0;
   _bb_count  = 0;
-  int size = binsToHold(method()->code_size());
-  _bb_hdr_bits = NEW_RESOURCE_ARRAY(uintptr_t,size);
-  memset(_bb_hdr_bits, 0, size*sizeof(uintptr_t));
-}
-
-void GenerateOopMap ::set_bbmark_bit(int bci) {
-  int idx  = bci >> LogBitsPerWord;
-  uintptr_t bit = (uintptr_t)1 << (bci & (BitsPerWord-1));
-  _bb_hdr_bits[idx] |= bit;
-}
-
-void GenerateOopMap ::clear_bbmark_bit(int bci) {
-  int idx   = bci >> LogBitsPerWord;
-  uintptr_t bit = (uintptr_t)1 << (bci & (BitsPerWord-1));
-  _bb_hdr_bits[idx] &= (~bit);
+  _bb_hdr_bits.clear();
+  _bb_hdr_bits.resize(method()->code_size());
 }
 
 void GenerateOopMap::bb_mark_fct(GenerateOopMap *c, int bci, int *data) {
@@ -952,6 +939,17 @@
   _basic_blocks[bbNo-1]._end_bci = prev_bci;
 
 
+  // Check that the correct number of basicblocks was found
+  if (bbNo !=_bb_count) {
+    if (bbNo < _bb_count) {
+      verify_error("jump into the middle of instruction?");
+      return;
+    } else {
+      verify_error("extra basic blocks - should not happen?");
+      return;
+    }
+  }
+
   _max_monitors = monitor_count;
 
   // Now that we have a bound on the depth of the monitor stack, we can
@@ -985,17 +983,6 @@
   }
 #endif
 
-  // Check that the correct number of basicblocks was found
-  if (bbNo !=_bb_count) {
-    if (bbNo < _bb_count) {
-      verify_error("jump into the middle of instruction?");
-      return;
-    } else {
-      verify_error("extra basic blocks - should not happen?");
-      return;
-    }
-  }
-
   // Mark all alive blocks
   mark_reachable_code();
 }
@@ -1022,21 +1009,22 @@
                                          int new_method_size) {
   assert(new_method_size >= method()->code_size() + delta,
          "new method size is too small");
-  int newWords = binsToHold(new_method_size);
 
-  uintptr_t * new_bb_hdr_bits = NEW_RESOURCE_ARRAY(uintptr_t, newWords);
+  BitMap::bm_word_t* new_bb_hdr_bits =
+    NEW_RESOURCE_ARRAY(BitMap::bm_word_t,
+                       BitMap::word_align_up(new_method_size));
+  _bb_hdr_bits.set_map(new_bb_hdr_bits);
+  _bb_hdr_bits.set_size(new_method_size);
+  _bb_hdr_bits.clear();
 
-  BitMap bb_bits(new_bb_hdr_bits, new_method_size);
-  bb_bits.clear();
 
   for(int k = 0; k < _bb_count; k++) {
     if (_basic_blocks[k]._bci > bci) {
       _basic_blocks[k]._bci     += delta;
       _basic_blocks[k]._end_bci += delta;
     }
-    bb_bits.at_put(_basic_blocks[k]._bci, true);
+    _bb_hdr_bits.at_put(_basic_blocks[k]._bci, true);
   }
-  _bb_hdr_bits = new_bb_hdr_bits ;
 }
 
 //
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/generateOopMap.hpp
--- a/src/share/vm/oops/generateOopMap.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/oops/generateOopMap.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -341,16 +341,22 @@
   BasicBlock *    _basic_blocks;             // Array of basicblock info
   int             _gc_points;
   int             _bb_count;
-  uintptr_t *     _bb_hdr_bits;
+  BitMap          _bb_hdr_bits;
 
   // Basicblocks methods
   void          initialize_bb               ();
   void          mark_bbheaders_and_count_gc_points();
-  bool          is_bb_header                (int bci) const   { return (_bb_hdr_bits[bci >> LogBitsPerWord] & ((uintptr_t)1 << (bci & (BitsPerWord-1)))) != 0; }
+  bool          is_bb_header                (int bci) const   {
+    return _bb_hdr_bits.at(bci);
+  }
   int           gc_points                   () const                          { return _gc_points; }
   int           bb_count                    () const                          { return _bb_count; }
-  void          set_bbmark_bit              (int bci);
-  void          clear_bbmark_bit            (int bci);
+  void          set_bbmark_bit              (int bci) {
+    _bb_hdr_bits.at_put(bci, true);
+  }
+  void          clear_bbmark_bit            (int bci) {
+    _bb_hdr_bits.at_put(bci, false);
+  }
   BasicBlock *  get_basic_block_at          (int bci) const;
   BasicBlock *  get_basic_block_containing  (int bci) const;
   void          interp_bb                   (BasicBlock *bb);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/instanceKlass.cpp
--- a/src/share/vm/oops/instanceKlass.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/oops/instanceKlass.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -1515,10 +1515,9 @@
 // closure's do_header() method dicates whether the given closure should be
 // applied to the klass ptr in the object header.
 
-#define InstanceKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)   \
-                                                                        \
-int instanceKlass::oop_oop_iterate##nv_suffix(oop obj,                  \
-                                              OopClosureType* closure) {\
+#define InstanceKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)        \
+                                                                             \
+int instanceKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) { \
   SpecializationStats::record_iterate_call##nv_suffix(SpecializationStats::ik);\
   /* header */                                                          \
   if (closure->do_header()) {                                           \
@@ -1533,6 +1532,26 @@
   return size_helper();                                                 \
 }
 
+#ifndef SERIALGC
+#define InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) \
+                                                                                \
+int instanceKlass::oop_oop_iterate_backwards##nv_suffix(oop obj,                \
+                                              OopClosureType* closure) {        \
+  SpecializationStats::record_iterate_call##nv_suffix(SpecializationStats::ik); \
+  /* header */                                                                  \
+  if (closure->do_header()) {                                                   \
+    obj->oop_iterate_header(closure);                                           \
+  }                                                                             \
+  /* instance variables */                                                      \
+  InstanceKlass_OOP_MAP_REVERSE_ITERATE(                                        \
+    obj,                                                                        \
+    SpecializationStats::record_do_oop_call##nv_suffix(SpecializationStats::ik);\
+    (closure)->do_oop##nv_suffix(p),                                            \
+    assert_is_in_closed_subset)                                                 \
+   return size_helper();                                                        \
+}
+#endif // !SERIALGC
+
 #define InstanceKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix) \
                                                                         \
 int instanceKlass::oop_oop_iterate##nv_suffix##_m(oop obj,              \
@@ -1550,9 +1569,13 @@
 }
 
 ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_DEFN)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceKlass_OOP_OOP_ITERATE_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_DEFN)
 ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_DEFN_m)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceKlass_OOP_OOP_ITERATE_DEFN_m)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_DEFN_m)
+#ifndef SERIALGC
+ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN)
+#endif // !SERIALGC
 
 void instanceKlass::iterate_static_fields(OopClosure* closure) {
     InstanceKlass_OOP_ITERATE( \
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/instanceKlass.hpp
--- a/src/share/vm/oops/instanceKlass.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/oops/instanceKlass.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -656,13 +656,21 @@
     return oop_oop_iterate_v_m(obj, blk, mr);
   }
 
-#define InstanceKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)   \
-  int  oop_oop_iterate##nv_suffix(oop obj, OopClosureType* blk);        \
-  int  oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk,     \
+#define InstanceKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)      \
+  int  oop_oop_iterate##nv_suffix(oop obj, OopClosureType* blk);           \
+  int  oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk,        \
                                       MemRegion mr);
 
   ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_DECL)
+
+#ifndef SERIALGC
+#define InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \
+  int  oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* blk);
+
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+#endif // !SERIALGC
 
   void iterate_static_fields(OopClosure* closure);
   void iterate_static_fields(OopClosure* closure, MemRegion mr);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/instanceRefKlass.cpp
--- a/src/share/vm/oops/instanceRefKlass.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/oops/instanceRefKlass.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -176,6 +176,11 @@
 }
 
 #define InstanceRefKlass_SPECIALIZED_OOP_ITERATE(T, nv_suffix, contains)        \
+  if (closure->apply_to_weak_ref_discovered_field()) {                          \
+    T* disc_addr = (T*)java_lang_ref_Reference::discovered_addr(obj);           \
+    closure->do_oop##nv_suffix(disc_addr);                                      \
+  }                                                                             \
+                                                                                \
   T* referent_addr = (T*)java_lang_ref_Reference::referent_addr(obj);           \
   oop referent = oopDesc::load_decode_heap_oop(referent_addr);                  \
   if (referent != NULL && contains(referent_addr)) {                            \
@@ -219,6 +224,25 @@
   }                                                                             \
 }
 
+#ifndef SERIALGC
+#define InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix) \
+                                                                                \
+int instanceRefKlass::                                                          \
+oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) {        \
+  /* Get size before changing pointers */                                       \
+  SpecializationStats::record_iterate_call##nv_suffix(SpecializationStats::irk);\
+                                                                                \
+  int size = instanceKlass::oop_oop_iterate_backwards##nv_suffix(obj, closure); \
+                                                                                \
+  if (UseCompressedOops) {                                                      \
+    InstanceRefKlass_SPECIALIZED_OOP_ITERATE(narrowOop, nv_suffix, contains);   \
+  } else {                                                                      \
+    InstanceRefKlass_SPECIALIZED_OOP_ITERATE(oop, nv_suffix, contains);         \
+  }                                                                             \
+}
+#endif // !SERIALGC
+
+
 #define InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix)      \
                                                                                 \
 int instanceRefKlass::                                                          \
@@ -236,9 +260,13 @@
 }
 
 ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_DEFN)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceRefKlass_OOP_OOP_ITERATE_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_DEFN)
+#ifndef SERIALGC
+ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN)
+#endif // SERIALGC
 ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m)
 
 #ifndef SERIALGC
 template <class T>
@@ -423,7 +451,7 @@
   // Verify next field
   oop next = java_lang_ref_Reference::next(obj);
   if (next != NULL) {
-    guarantee(next->is_oop(), "next field verify fa iled");
+    guarantee(next->is_oop(), "next field verify failed");
     guarantee(next->is_instanceRef(), "next field verify failed");
     if (gch != NULL && !gch->is_in_youngest(obj)) {
       // We do a specific remembered set check here since the next field is
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/instanceRefKlass.hpp
--- a/src/share/vm/oops/instanceRefKlass.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/oops/instanceRefKlass.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -72,7 +72,15 @@
   int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk, MemRegion mr);
 
   ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_3(InstanceRefKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_DECL)
+
+#ifndef SERIALGC
+#define InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix)      \
+  int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* blk);
+
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+#endif // !SERIALGC
 
   static void release_and_notify_pending_list_lock(BasicLock *pending_list_basic_lock);
   static void acquire_pending_list_lock(BasicLock *pending_list_basic_lock);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/klass.hpp
--- a/src/share/vm/oops/klass.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/oops/klass.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -134,14 +134,14 @@
   // Every subclass on which vtbl_value is called must include this macro.
   // Delay the installation of the klassKlass pointer until after the
   // the vtable for a new klass has been installed (after the call to new()).
-#define DEFINE_ALLOCATE_PERMANENT(thisKlass) \
+#define DEFINE_ALLOCATE_PERMANENT(thisKlass)                                  \
   void* allocate_permanent(KlassHandle& klass_klass, int size, TRAPS) const { \
-    void* result = new(klass_klass, size, THREAD) thisKlass(); \
-    if (HAS_PENDING_EXCEPTION) return NULL;                    \
-    klassOop new_klass = ((Klass*) result)->as_klassOop();      \
-    OrderAccess::storestore();  \
-    post_new_init_klass(klass_klass, new_klass, size);  \
-    return result;      \
+    void* result = new(klass_klass, size, THREAD) thisKlass();                \
+    if (HAS_PENDING_EXCEPTION) return NULL;                                   \
+    klassOop new_klass = ((Klass*) result)->as_klassOop();                    \
+    OrderAccess::storestore();                                                \
+    post_new_init_klass(klass_klass, new_klass, size);                        \
+    return result;                                                            \
   }
 
   bool null_vtbl() { return *(intptr_t*)this == 0; }
@@ -694,6 +694,14 @@
     return oop_oop_iterate(obj, blk);
   }
 
+#ifndef SERIALGC
+  // In case we don't have a specialized backward scanner use forward
+  // iteration.
+  virtual int oop_oop_iterate_backwards_v(oop obj, OopClosure* blk) {
+    return oop_oop_iterate_v(obj, blk);
+  }
+#endif // !SERIALGC
+
   // Iterates "blk" over all the oops in "obj" (of type "this") within "mr".
   // (I don't see why the _m should be required, but without it the Solaris
   // C++ gives warning messages about overridings of the "oop_oop_iterate"
@@ -722,7 +730,19 @@
   }
 
   SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_1(Klass_OOP_OOP_ITERATE_DECL)
-  SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_3(Klass_OOP_OOP_ITERATE_DECL)
+  SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(Klass_OOP_OOP_ITERATE_DECL)
+
+#ifndef SERIALGC
+#define Klass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix)      \
+  virtual int oop_oop_iterate_backwards##nv_suffix(oop obj,                  \
+                                                   OopClosureType* blk) {    \
+    /* Default implementation reverts to general version. */                 \
+    return oop_oop_iterate_backwards_v(obj, blk);                            \
+  }
+
+  SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_1(Klass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_2(Klass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+#endif // !SERIALGC
 
   virtual void array_klasses_do(void f(klassOop k)) {}
   virtual void with_array_klasses_do(void f(klassOop k));
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/markOop.hpp
--- a/src/share/vm/oops/markOop.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/oops/markOop.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -216,11 +216,7 @@
   static markOop INFLATING() { return (markOop) 0; }    // inflate-in-progress
 
   // Should this header be preserved during GC?
-  bool must_be_preserved(oop obj_containing_mark) const {
-    if (!UseBiasedLocking)
-      return (!is_unlocked() || !has_no_hash());
-    return must_be_preserved_with_bias(obj_containing_mark);
-  }
+  inline bool must_be_preserved(oop obj_containing_mark) const;
   inline bool must_be_preserved_with_bias(oop obj_containing_mark) const;
 
   // Should this header (including its age bits) be preserved in the
@@ -240,22 +236,14 @@
   // observation is that promotion failures are quite rare and
   // reducing the number of mark words preserved during them isn't a
   // high priority.
-  bool must_be_preserved_for_promotion_failure(oop obj_containing_mark) const {
-    if (!UseBiasedLocking)
-      return (this != prototype());
-    return must_be_preserved_with_bias_for_promotion_failure(obj_containing_mark);
-  }
+  inline bool must_be_preserved_for_promotion_failure(oop obj_containing_mark) const;
   inline bool must_be_preserved_with_bias_for_promotion_failure(oop obj_containing_mark) const;
 
   // Should this header be preserved during a scavenge where CMS is
   // the old generation?
   // (This is basically the same body as must_be_preserved_for_promotion_failure(),
   // but takes the klassOop as argument instead)
-  bool must_be_preserved_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const {
-    if (!UseBiasedLocking)
-      return (this != prototype());
-    return must_be_preserved_with_bias_for_cms_scavenge(klass_of_obj_containing_mark);
-  }
+  inline bool must_be_preserved_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const;
   inline bool must_be_preserved_with_bias_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const;
 
   // WARNING: The following routines are used EXCLUSIVELY by
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/markOop.inline.hpp
--- a/src/share/vm/oops/markOop.inline.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/oops/markOop.inline.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -39,6 +39,12 @@
   return (!is_unlocked() || !has_no_hash());
 }
 
+inline bool markOopDesc::must_be_preserved(oop obj_containing_mark) const {
+  if (!UseBiasedLocking)
+    return (!is_unlocked() || !has_no_hash());
+  return must_be_preserved_with_bias(obj_containing_mark);
+}
+
 // Should this header (including its age bits) be preserved in the
 // case of a promotion failure during scavenge?
 inline bool markOopDesc::must_be_preserved_with_bias_for_promotion_failure(oop obj_containing_mark) const {
@@ -59,6 +65,13 @@
   return (this != prototype());
 }
 
+inline bool markOopDesc::must_be_preserved_for_promotion_failure(oop obj_containing_mark) const {
+  if (!UseBiasedLocking)
+    return (this != prototype());
+  return must_be_preserved_with_bias_for_promotion_failure(obj_containing_mark);
+}
+
+
 // Should this header (including its age bits) be preserved in the
 // case of a scavenge in which CMS is the old generation?
 inline bool markOopDesc::must_be_preserved_with_bias_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const {
@@ -70,6 +83,11 @@
   }
   return (this != prototype());
 }
+inline bool markOopDesc::must_be_preserved_for_cms_scavenge(klassOop klass_of_obj_containing_mark) const {
+  if (!UseBiasedLocking)
+    return (this != prototype());
+  return must_be_preserved_with_bias_for_cms_scavenge(klass_of_obj_containing_mark);
+}
 
 inline markOop markOopDesc::prototype_for_object(oop obj) {
 #ifdef ASSERT
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/objArrayKlass.cpp
--- a/src/share/vm/oops/objArrayKlass.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/oops/objArrayKlass.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -86,14 +86,18 @@
 
   const size_t word_len = objArrayOopDesc::array_size(length);
 
-  // For performance reasons, we assume we are using a card marking write
-  // barrier. The assert will fail if this is not the case.
   BarrierSet* bs = Universe::heap()->barrier_set();
+  // For performance reasons, we assume we are that the write barrier we
+  // are using has optimized modes for arrays of references.  At least one
+  // of the asserts below will fail if this is not the case.
   assert(bs->has_write_ref_array_opt(), "Barrier set must have ref array opt");
+  assert(bs->has_write_ref_array_pre_opt(), "For pre-barrier as well.");
 
+  MemRegion dst_mr = MemRegion((HeapWord*)dst, word_len);
   if (s == d) {
     // since source and destination are equal we do not need conversion checks.
     assert(length > 0, "sanity check");
+    bs->write_ref_array_pre(dst_mr);
     Copy::conjoint_oops_atomic(src, dst, length);
   } else {
     // We have to make sure all elements conform to the destination array
@@ -101,6 +105,7 @@
     klassOop stype = objArrayKlass::cast(s->klass())->element_klass();
     if (stype == bound || Klass::cast(stype)->is_subtype_of(bound)) {
       // elements are guaranteed to be subtypes, so no check necessary
+      bs->write_ref_array_pre(dst_mr);
       Copy::conjoint_oops_atomic(src, dst, length);
     } else {
       // slow case: need individual subtype checks
@@ -110,8 +115,13 @@
       for (T* p = dst; from < end; from++, p++) {
         // XXX this is going to be slow.
         T element = *from;
-        if (oopDesc::is_null(element) ||
-            Klass::cast(oopDesc::decode_heap_oop_not_null(element)->klass())->is_subtype_of(bound)) {
+        // even slower now
+        bool element_is_null = oopDesc::is_null(element);
+        oop new_val = element_is_null ? oop(NULL)
+                                      : oopDesc::decode_heap_oop_not_null(element);
+        if (element_is_null ||
+            Klass::cast((new_val->klass()))->is_subtype_of(bound)) {
+          bs->write_ref_field_pre(p, new_val);
           *p = *from;
         } else {
           // We must do a barrier to cover the partial copy.
@@ -401,11 +411,11 @@
 }
 
 ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DEFN)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayKlass_OOP_OOP_ITERATE_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DEFN)
 ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m)
 ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r)
 
 int objArrayKlass::oop_adjust_pointers(oop obj) {
   assert(obj->is_objArray(), "obj must be obj array");
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/objArrayKlass.hpp
--- a/src/share/vm/oops/objArrayKlass.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/oops/objArrayKlass.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -111,7 +111,7 @@
                                      int start, int end);
 
   ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DECL)
 
   // JVM support
   jint compute_modifier_flags(TRAPS) const;
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/objArrayOop.cpp
--- a/src/share/vm/oops/objArrayOop.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/oops/objArrayOop.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -33,4 +33,4 @@
 }
 
 ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayOop_OOP_ITERATE_DEFN)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayOop_OOP_ITERATE_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayOop_OOP_ITERATE_DEFN)
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/objArrayOop.hpp
--- a/src/share/vm/oops/objArrayOop.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/oops/objArrayOop.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -29,6 +29,8 @@
   friend class objArrayKlass;
   friend class Runtime1;
   friend class psPromotionManager;
+  friend class CSMarkOopClosure;
+  friend class G1ParScanPartialArrayClosure;
 
   template <class T> T* obj_at_addr(int index) const {
     assert(is_within_bounds(index), "index out of bounds");
@@ -88,5 +90,5 @@
   int oop_iterate_range(OopClosureType* blk, int start, int end);
 
   ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayOop_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_3(ObjArrayOop_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayOop_OOP_ITERATE_DECL)
 };
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/oop.hpp
--- a/src/share/vm/oops/oop.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/oops/oop.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -358,12 +358,21 @@
   static void set_bs(BarrierSet* bs) { _bs = bs; }
 
   // iterators, returns size of object
-#define OOP_ITERATE_DECL(OopClosureType, nv_suffix)                             \
+#define OOP_ITERATE_DECL(OopClosureType, nv_suffix)                      \
   int oop_iterate(OopClosureType* blk);                                  \
   int oop_iterate(OopClosureType* blk, MemRegion mr);  // Only in mr.
 
   ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_3(OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_ITERATE_DECL)
+
+#ifndef SERIALGC
+
+#define OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix)            \
+  int oop_iterate_backwards(OopClosureType* blk);
+
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_ITERATE_BACKWARDS_DECL)
+#endif
 
   void oop_iterate_header(OopClosure* blk);
   void oop_iterate_header(OopClosure* blk, MemRegion mr);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/oops/oop.inline.hpp
--- a/src/share/vm/oops/oop.inline.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/oops/oop.inline.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -380,10 +380,11 @@
       s = (int)((size_t)round_to(size_in_bytes, MinObjAlignmentInBytes) /
         HeapWordSize);
 
-      // UseParNewGC can change the length field of an "old copy" of an object
-      // array in the young gen so it indicates the stealable portion of
-      // an already copied array. This will cause the first disjunct below
-      // to fail if the sizes are computed across such a concurrent change.
+      // UseParNewGC, UseParallelGC and UseG1GC can change the length field
+      // of an "old copy" of an object array in the young gen so it indicates
+      // the grey portion of an already copied array. This will cause the first
+      // disjunct below to fail if the two comparands are computed across such
+      // a concurrent change.
       // UseParNewGC also runs with promotion labs (which look like int
       // filler arrays) which are subject to changing their declared size
       // when finally retiring a PLAB; this also can cause the first disjunct
@@ -393,13 +394,11 @@
       //     is_objArray() && is_forwarded()   // covers first scenario above
       //  || is_typeArray()                    // covers second scenario above
       // If and when UseParallelGC uses the same obj array oop stealing/chunking
-      // technique, or when G1 is integrated (and currently uses this array chunking
-      // technique) we will need to suitably modify the assertion.
+      // technique, we will need to suitably modify the assertion.
       assert((s == klass->oop_size(this)) ||
-             (((UseParNewGC || UseParallelGC) &&
-              Universe::heap()->is_gc_active()) &&
-              (is_typeArray() ||
-               (is_objArray() && is_forwarded()))),
+             (Universe::heap()->is_gc_active() &&
+              ((is_typeArray() && UseParNewGC) ||
+               (is_objArray()  && is_forwarded() && (UseParNewGC || UseParallelGC || UseG1GC)))),
              "wrong array object size");
     } else {
       // Must be zero, so bite the bullet and take the virtual call.
@@ -426,16 +425,22 @@
   oopDesc::bs()->write_ref_field(p, v);
 }
 
+inline void update_barrier_set_pre(void* p, oop v) {
+  oopDesc::bs()->write_ref_field_pre(p, v);
+}
+
 template <class T> inline void oop_store(T* p, oop v) {
   if (always_do_update_barrier) {
     oop_store((volatile T*)p, v);
   } else {
+    update_barrier_set_pre(p, v);
     oopDesc::encode_store_heap_oop(p, v);
     update_barrier_set(p, v);
   }
 }
 
 template <class T> inline void oop_store(volatile T* p, oop v) {
+  update_barrier_set_pre((void*)p, v);
   // Used by release_obj_field_put, so use release_store_ptr.
   oopDesc::release_encode_store_heap_oop(p, v);
   update_barrier_set((void*)p, v);
@@ -683,8 +688,19 @@
 }
 
 ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_ITERATE_DEFN)
-ALL_OOP_OOP_ITERATE_CLOSURES_3(OOP_ITERATE_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_ITERATE_DEFN)
 
+#ifndef SERIALGC
+#define OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)              \
+                                                                           \
+inline int oopDesc::oop_iterate_backwards(OopClosureType* blk) {           \
+  SpecializationStats::record_call();                                      \
+  return blueprint()->oop_oop_iterate_backwards##nv_suffix(this, blk);     \
+}
+
+ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_ITERATE_BACKWARDS_DEFN)
+ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_ITERATE_BACKWARDS_DEFN)
+#endif // !SERIALGC
 
 inline bool oopDesc::is_shared() const {
   return CompactingPermGenGen::is_shared(this);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/opto/graphKit.cpp
--- a/src/share/vm/opto/graphKit.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/opto/graphKit.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -1180,6 +1180,12 @@
   else
     reason = Deoptimization::Reason_div0_check;
 
+  // %%% Since Reason_unhandled is not recorded on a per-bytecode basis,
+  // ciMethodData::has_trap_at will return a conservative -1 if any
+  // must-be-null assertion has failed.  This could cause performance
+  // problems for a method after its first do_null_assert failure.
+  // Consider using 'Reason_class_check' instead?
+
   // To cause an implicit null check, we set the not-null probability
   // to the maximum (PROB_MAX).  For an explicit check the probablity
   // is set to a smaller value.
@@ -1366,6 +1372,10 @@
   BarrierSet* bs = Universe::heap()->barrier_set();
   set_control(ctl);
   switch (bs->kind()) {
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+        g1_write_barrier_pre(obj, adr, adr_idx, val, val_type, bt);
+      break;
 
     case BarrierSet::CardTableModRef:
     case BarrierSet::CardTableExtension:
@@ -1390,6 +1400,10 @@
   BarrierSet* bs = Universe::heap()->barrier_set();
   set_control(ctl);
   switch (bs->kind()) {
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+        g1_write_barrier_post(store, obj, adr, adr_idx, val, bt, use_precise);
+      break;
 
     case BarrierSet::CardTableModRef:
     case BarrierSet::CardTableExtension:
@@ -3176,3 +3190,251 @@
   }
   return NULL;
 }
+
+void GraphKit::g1_write_barrier_pre(Node* obj,
+                                    Node* adr,
+                                    uint alias_idx,
+                                    Node* val,
+                                    const Type* val_type,
+                                    BasicType bt) {
+  IdealKit ideal(gvn(), control(), merged_memory(), true);
+#define __ ideal.
+  __ declares_done();
+
+  Node* thread = __ thread();
+
+  Node* no_ctrl = NULL;
+  Node* no_base = __ top();
+  Node* zero = __ ConI(0);
+
+  float likely  = PROB_LIKELY(0.999);
+  float unlikely  = PROB_UNLIKELY(0.999);
+
+  BasicType active_type = in_bytes(PtrQueue::byte_width_of_active()) == 4 ? T_INT : T_BYTE;
+  assert(in_bytes(PtrQueue::byte_width_of_active()) == 4 || in_bytes(PtrQueue::byte_width_of_active()) == 1, "flag width");
+
+  // Offsets into the thread
+  const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() +  // 648
+                                          PtrQueue::byte_offset_of_active());
+  const int index_offset   = in_bytes(JavaThread::satb_mark_queue_offset() +  // 656
+                                          PtrQueue::byte_offset_of_index());
+  const int buffer_offset  = in_bytes(JavaThread::satb_mark_queue_offset() +  // 652
+                                          PtrQueue::byte_offset_of_buf());
+  // Now the actual pointers into the thread
+
+  // set_control( ctl);
+
+  Node* marking_adr = __ AddP(no_base, thread, __ ConX(marking_offset));
+  Node* buffer_adr  = __ AddP(no_base, thread, __ ConX(buffer_offset));
+  Node* index_adr   = __ AddP(no_base, thread, __ ConX(index_offset));
+
+  // Now some of the values
+
+  Node* marking = __ load(no_ctrl, marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw);
+  Node* index   = __ load(no_ctrl, index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw);
+  Node* buffer  = __ load(no_ctrl, buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
+
+  // if (!marking)
+  __ if_then(marking, BoolTest::ne, zero); {
+
+    const Type* t1 = adr->bottom_type();
+    const Type* t2 = val->bottom_type();
+
+    Node* orig = __ load(no_ctrl, adr, val_type, bt, alias_idx);
+    // if (orig != NULL)
+    __ if_then(orig, BoolTest::ne, null()); {
+
+      // load original value
+      // alias_idx correct??
+
+      // is the queue for this thread full?
+      __ if_then(index, BoolTest::ne, zero, likely); {
+
+        // decrement the index
+        Node* next_index = __ SubI(index,  __ ConI(sizeof(intptr_t)));
+        Node* next_indexX = next_index;
+#ifdef _LP64
+          // We could refine the type for what it's worth
+          // const TypeLong* lidxtype = TypeLong::make(CONST64(0), get_size_from_queue);
+          next_indexX = _gvn.transform( new (C, 2) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) );
+#endif // _LP64
+
+        // Now get the buffer location we will log the original value into and store it
+
+        Node *log_addr = __ AddP(no_base, buffer, next_indexX);
+        // __ store(__ ctrl(), log_addr, orig, T_OBJECT, C->get_alias_index(TypeOopPtr::BOTTOM));
+        __ store(__ ctrl(), log_addr, orig, T_OBJECT, Compile::AliasIdxRaw);
+
+
+        // update the index
+        // __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw);
+        // This is a hack to force this store to occur before the oop store that is coming up
+        __ store(__ ctrl(), index_adr, next_index, T_INT, C->get_alias_index(TypeOopPtr::BOTTOM));
+
+      } __ else_(); {
+
+        // logging buffer is full, call the runtime
+        const TypeFunc *tf = OptoRuntime::g1_wb_pre_Type();
+        // __ make_leaf_call(tf, OptoRuntime::g1_wb_pre_Java(), "g1_wb_pre", orig, thread);
+        __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", orig, thread);
+      } __ end_if();
+    } __ end_if();
+  } __ end_if();
+
+  __ drain_delay_transform();
+  set_control( __ ctrl());
+  set_all_memory( __ merged_memory());
+
+#undef __
+}
+
+//
+// Update the card table and add card address to the queue
+//
+void GraphKit::g1_mark_card(IdealKit* ideal, Node* card_adr, Node* store,  Node* index, Node* index_adr, Node* buffer, const TypeFunc* tf) {
+#define __ ideal->
+  Node* zero = __ ConI(0);
+  Node* no_base = __ top();
+  BasicType card_bt = T_BYTE;
+  // Smash zero into card. MUST BE ORDERED WRT TO STORE
+  __ storeCM(__ ctrl(), card_adr, zero, store, card_bt, Compile::AliasIdxRaw);
+
+  //  Now do the queue work
+  __ if_then(index, BoolTest::ne, zero); {
+
+    Node* next_index = __ SubI(index,  __ ConI(sizeof(intptr_t)));
+    Node* next_indexX = next_index;
+#ifdef _LP64
+    // We could refine the type for what it's worth
+    // const TypeLong* lidxtype = TypeLong::make(CONST64(0), get_size_from_queue);
+    next_indexX = _gvn.transform( new (C, 2) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) );
+#endif // _LP64
+    Node* log_addr = __ AddP(no_base, buffer, next_indexX);
+
+    __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw);
+    __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw);
+
+  } __ else_(); {
+    __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), "g1_wb_post", card_adr, __ thread());
+  } __ end_if();
+#undef __
+}
+
+void GraphKit::g1_write_barrier_post(Node* store,
+                                     Node* obj,
+                                     Node* adr,
+                                     uint alias_idx,
+                                     Node* val,
+                                     BasicType bt,
+                                     bool use_precise) {
+  // If we are writing a NULL then we need no post barrier
+
+  if (val != NULL && val->is_Con() && val->bottom_type() == TypePtr::NULL_PTR) {
+    // Must be NULL
+    const Type* t = val->bottom_type();
+    assert(t == Type::TOP || t == TypePtr::NULL_PTR, "must be NULL");
+    // No post barrier if writing NULLx
+    return;
+  }
+
+  if (!use_precise) {
+    // All card marks for a (non-array) instance are in one place:
+    adr = obj;
+  }
+  // (Else it's an array (or unknown), and we want more precise card marks.)
+  assert(adr != NULL, "");
+
+  IdealKit ideal(gvn(), control(), merged_memory(), true);
+#define __ ideal.
+  __ declares_done();
+
+  Node* thread = __ thread();
+
+  Node* no_ctrl = NULL;
+  Node* no_base = __ top();
+  float likely  = PROB_LIKELY(0.999);
+  float unlikely  = PROB_UNLIKELY(0.999);
+  Node* zero = __ ConI(0);
+  Node* zeroX = __ ConX(0);
+
+  // Get the alias_index for raw card-mark memory
+  const TypePtr* card_type = TypeRawPtr::BOTTOM;
+
+  const TypeFunc *tf = OptoRuntime::g1_wb_post_Type();
+
+  // Get the address of the card table
+  CardTableModRefBS* ct =
+    (CardTableModRefBS*)(Universe::heap()->barrier_set());
+  Node *card_table = __ makecon(TypeRawPtr::make((address)ct->byte_map_base));
+  // Get base of card map
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+
+  // Offsets into the thread
+  const int index_offset  = in_bytes(JavaThread::dirty_card_queue_offset() +
+                                     PtrQueue::byte_offset_of_index());
+  const int buffer_offset = in_bytes(JavaThread::dirty_card_queue_offset() +
+                                     PtrQueue::byte_offset_of_buf());
+
+  // Pointers into the thread
+
+  Node* buffer_adr = __ AddP(no_base, thread, __ ConX(buffer_offset));
+  Node* index_adr =  __ AddP(no_base, thread, __ ConX(index_offset));
+
+  // Now some values
+
+  Node* index  = __ load(no_ctrl, index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw);
+  Node* buffer = __ load(no_ctrl, buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
+
+
+  // Convert the store obj pointer to an int prior to doing math on it
+  // Use addr not obj gets accurate card marks
+
+  // Node* cast = __ CastPX(no_ctrl, adr /* obj */);
+
+  // Must use ctrl to prevent "integerized oop" existing across safepoint
+  Node* cast =  __ CastPX(__ ctrl(), ( use_precise ? adr : obj ));
+
+  // Divide pointer by card size
+  Node* card_offset = __ URShiftX( cast, __ ConI(CardTableModRefBS::card_shift) );
+
+  // Combine card table base and card offset
+  Node *card_adr = __ AddP(no_base, card_table, card_offset );
+
+  // If we know the value being stored does it cross regions?
+
+  if (val != NULL) {
+    // Does the store cause us to cross regions?
+
+    // Should be able to do an unsigned compare of region_size instead of
+    // and extra shift. Do we have an unsigned compare??
+    // Node* region_size = __ ConI(1 << HeapRegion::LogOfHRGrainBytes);
+    Node* xor_res =  __ URShiftX ( __ XorX( cast,  __ CastPX(__ ctrl(), val)), __ ConI(HeapRegion::LogOfHRGrainBytes));
+
+    // if (xor_res == 0) same region so skip
+    __ if_then(xor_res, BoolTest::ne, zeroX); {
+
+      // No barrier if we are storing a NULL
+      __ if_then(val, BoolTest::ne, null(), unlikely); {
+
+        // Ok must mark the card if not already dirty
+
+        // load the original value of the card
+        Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
+
+        __ if_then(card_val, BoolTest::ne, zero); {
+          g1_mark_card(&ideal, card_adr, store, index, index_adr, buffer, tf);
+        } __ end_if();
+      } __ end_if();
+    } __ end_if();
+  } else {
+    g1_mark_card(&ideal, card_adr, store, index, index_adr, buffer, tf);
+  }
+
+
+  __ drain_delay_transform();
+  set_control( __ ctrl());
+  set_all_memory( __ merged_memory());
+#undef __
+
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/opto/graphKit.hpp
--- a/src/share/vm/opto/graphKit.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/opto/graphKit.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -24,6 +24,7 @@
 
 class FastLockNode;
 class FastUnlockNode;
+class IdealKit;
 class Parse;
 class RootNode;
 
@@ -581,6 +582,27 @@
             && Universe::heap()->can_elide_tlab_store_barriers());
   }
 
+  // G1 pre/post barriers
+  void g1_write_barrier_pre(Node* obj,
+                            Node* adr,
+                            uint alias_idx,
+                            Node* val,
+                            const Type* val_type,
+                            BasicType bt);
+
+  void g1_write_barrier_post(Node* store,
+                             Node* obj,
+                             Node* adr,
+                             uint alias_idx,
+                             Node* val,
+                             BasicType bt,
+                             bool use_precise);
+  // Helper function for g1
+  private:
+  void g1_mark_card(IdealKit* ideal, Node* card_adr, Node* store,  Node* index, Node* index_adr,
+                    Node* buffer, const TypeFunc* tf);
+
+  public:
   // Helper function to round double arguments before a call
   void round_double_arguments(ciMethod* dest_method);
   void round_double_result(ciMethod* dest_method);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/opto/lcm.cpp
--- a/src/share/vm/opto/lcm.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/opto/lcm.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -585,7 +585,7 @@
 
       // A few node types require changing a required edge to a precedence edge
       // before allocation.
-      if( UseConcMarkSweepGC ) {
+      if( UseConcMarkSweepGC || UseG1GC ) {
         if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) {
           // Note: Required edges with an index greater than oper_input_base
           // are not supported by the allocator.
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/opto/macro.cpp
--- a/src/share/vm/opto/macro.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/opto/macro.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -923,21 +923,7 @@
   Node* eden_end_adr;
   set_eden_pointers(eden_top_adr, eden_end_adr);
 
-  uint raw_idx = C->get_alias_index(TypeRawPtr::BOTTOM);
   assert(ctrl != NULL, "must have control");
-
-  // Load Eden::end.  Loop invariant and hoisted.
-  //
-  // Note: We set the control input on "eden_end" and "old_eden_top" when using
-  //       a TLAB to work around a bug where these values were being moved across
-  //       a safepoint.  These are not oops, so they cannot be include in the oop
-  //       map, but the can be changed by a GC.   The proper way to fix this would
-  //       be to set the raw memory state when generating a  SafepointNode.  However
-  //       this will require extensive changes to the loop optimization in order to
-  //       prevent a degradation of the optimization.
-  //       See comment in memnode.hpp, around line 227 in class LoadPNode.
-  Node* eden_end = make_load(ctrl, mem, eden_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS);
-
   // We need a Region and corresponding Phi's to merge the slow-path and fast-path results.
   // they will not be used if "always_slow" is set
   enum { slow_result_path = 1, fast_result_path = 2 };
@@ -957,12 +943,15 @@
     initial_slow_test = BoolNode::make_predicate(initial_slow_test, &_igvn);
   }
 
-  if (DTraceAllocProbes) {
+  if (DTraceAllocProbes ||
+      !UseTLAB && (!Universe::heap()->supports_inline_contig_alloc() ||
+                   (UseConcMarkSweepGC && CMSIncrementalMode))) {
     // Force slow-path allocation
     always_slow = true;
     initial_slow_test = NULL;
   }
 
+
   enum { too_big_or_final_path = 1, need_gc_path = 2 };
   Node *slow_region = NULL;
   Node *toobig_false = ctrl;
@@ -991,6 +980,23 @@
   Node *slow_mem = mem;  // save the current memory state for slow path
   // generate the fast allocation code unless we know that the initial test will always go slow
   if (!always_slow) {
+    Node* eden_top_adr;
+    Node* eden_end_adr;
+
+    set_eden_pointers(eden_top_adr, eden_end_adr);
+
+    // Load Eden::end.  Loop invariant and hoisted.
+    //
+    // Note: We set the control input on "eden_end" and "old_eden_top" when using
+    //       a TLAB to work around a bug where these values were being moved across
+    //       a safepoint.  These are not oops, so they cannot be include in the oop
+    //       map, but the can be changed by a GC.   The proper way to fix this would
+    //       be to set the raw memory state when generating a  SafepointNode.  However
+    //       this will require extensive changes to the loop optimization in order to
+    //       prevent a degradation of the optimization.
+    //       See comment in memnode.hpp, around line 227 in class LoadPNode.
+    Node *eden_end = make_load(ctrl, mem, eden_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS);
+
     // allocate the Region and Phi nodes for the result
     result_region = new (C, 3) RegionNode(3);
     result_phi_rawmem = new (C, 3) PhiNode( result_region, Type::MEMORY, TypeRawPtr::BOTTOM );
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/opto/runtime.cpp
--- a/src/share/vm/opto/runtime.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/opto/runtime.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -44,6 +44,8 @@
 address OptoRuntime::_multianewarray3_Java                        = NULL;
 address OptoRuntime::_multianewarray4_Java                        = NULL;
 address OptoRuntime::_multianewarray5_Java                        = NULL;
+address OptoRuntime::_g1_wb_pre_Java                              = NULL;
+address OptoRuntime::_g1_wb_post_Java                             = NULL;
 address OptoRuntime::_vtable_must_compile_Java                    = NULL;
 address OptoRuntime::_complete_monitor_locking_Java               = NULL;
 address OptoRuntime::_rethrow_Java                                = NULL;
@@ -89,6 +91,8 @@
   gen(env, _multianewarray3_Java           , multianewarray3_Type         , multianewarray3_C               ,    0 , true , false, false);
   gen(env, _multianewarray4_Java           , multianewarray4_Type         , multianewarray4_C               ,    0 , true , false, false);
   gen(env, _multianewarray5_Java           , multianewarray5_Type         , multianewarray5_C               ,    0 , true , false, false);
+  gen(env, _g1_wb_pre_Java                 , g1_wb_pre_Type               , SharedRuntime::g1_wb_pre        ,    0 , false, false, false);
+  gen(env, _g1_wb_post_Java                , g1_wb_post_Type              , SharedRuntime::g1_wb_post       ,    0 , false, false, false);
   gen(env, _complete_monitor_locking_Java  , complete_monitor_enter_Type  , SharedRuntime::complete_monitor_locking_C      ,    0 , false, false, false);
   gen(env, _rethrow_Java                   , rethrow_Type                 , rethrow_C                       ,    2 , true , false, true );
 
@@ -385,6 +389,33 @@
   return multianewarray_Type(5);
 }
 
+const TypeFunc *OptoRuntime::g1_wb_pre_Type() {
+  const Type **fields = TypeTuple::fields(2);
+  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value
+  fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(0);
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
+
+  return TypeFunc::make(domain, range);
+}
+
+const TypeFunc *OptoRuntime::g1_wb_post_Type() {
+
+  const Type **fields = TypeTuple::fields(2);
+  fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL;  // Card addr
+  fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL;  // thread
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(0);
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
+
+  return TypeFunc::make(domain, range);
+}
+
 const TypeFunc *OptoRuntime::uncommon_trap_Type() {
   // create input type (domain)
   const Type **fields = TypeTuple::fields(1);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/opto/runtime.hpp
--- a/src/share/vm/opto/runtime.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/opto/runtime.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -108,6 +108,8 @@
   static address _multianewarray3_Java;
   static address _multianewarray4_Java;
   static address _multianewarray5_Java;
+  static address _g1_wb_pre_Java;
+  static address _g1_wb_post_Java;
   static address _vtable_must_compile_Java;
   static address _complete_monitor_locking_Java;
   static address _rethrow_Java;
@@ -140,6 +142,8 @@
   static void multianewarray3_C(klassOopDesc* klass, int len1, int len2, int len3, JavaThread *thread);
   static void multianewarray4_C(klassOopDesc* klass, int len1, int len2, int len3, int len4, JavaThread *thread);
   static void multianewarray5_C(klassOopDesc* klass, int len1, int len2, int len3, int len4, int len5, JavaThread *thread);
+  static void g1_wb_pre_C(oopDesc* orig, JavaThread* thread);
+  static void g1_wb_post_C(void* card_addr, JavaThread* thread);
 
 public:
   // Slow-path Locking and Unlocking
@@ -195,6 +199,8 @@
   static address multianewarray3_Java()                  { return _multianewarray3_Java; }
   static address multianewarray4_Java()                  { return _multianewarray4_Java; }
   static address multianewarray5_Java()                  { return _multianewarray5_Java; }
+  static address g1_wb_pre_Java()                        { return _g1_wb_pre_Java; }
+  static address g1_wb_post_Java()                       { return _g1_wb_post_Java; }
   static address vtable_must_compile_stub()              { return _vtable_must_compile_Java; }
   static address complete_monitor_locking_Java()         { return _complete_monitor_locking_Java;   }
 
@@ -232,6 +238,8 @@
   static const TypeFunc* multianewarray3_Type(); // multianewarray
   static const TypeFunc* multianewarray4_Type(); // multianewarray
   static const TypeFunc* multianewarray5_Type(); // multianewarray
+  static const TypeFunc* g1_wb_pre_Type();
+  static const TypeFunc* g1_wb_post_Type();
   static const TypeFunc* complete_monitor_enter_Type();
   static const TypeFunc* complete_monitor_exit_Type();
   static const TypeFunc* uncommon_trap_Type();
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/prims/jvm.cpp
--- a/src/share/vm/prims/jvm.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/prims/jvm.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -377,7 +377,11 @@
 JVM_ENTRY_NO_ENV(jlong, JVM_FreeMemory(void))
   JVMWrapper("JVM_FreeMemory");
   CollectedHeap* ch = Universe::heap();
-  size_t n = ch->capacity() - ch->used();
+  size_t n;
+  {
+     MutexLocker x(Heap_lock);
+     n = ch->capacity() - ch->used();
+  }
   return convert_size_t_to_jlong(n);
 JVM_END
 
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/prims/jvmtiExport.cpp
--- a/src/share/vm/prims/jvmtiExport.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/prims/jvmtiExport.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -2433,18 +2433,7 @@
   // so we record the number of collections so that it can be checked in
   // the destructor.
   if (!_full) {
-    if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) {
-      GenCollectedHeap* gch = GenCollectedHeap::heap();
-      assert(gch->n_gens() == 2, "configuration not recognized");
-      _invocation_count = (unsigned int)gch->get_gen(1)->stat_record()->invocations;
-    } else {
-#ifndef SERIALGC
-      assert(Universe::heap()->kind() == CollectedHeap::ParallelScavengeHeap, "checking");
-      _invocation_count = PSMarkSweep::total_invocations();
-#else  // SERIALGC
-      fatal("SerialGC only supported in this configuration.");
-#endif // SERIALGC
-    }
+    _invocation_count = Universe::heap()->total_full_collections();
   }
 
   // Do clean up tasks that need to be done at a safepoint
@@ -2466,20 +2455,7 @@
   // generation but could have ended up doing a "full" GC - check the
   // GC count to see.
   if (!_full) {
-    if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) {
-      GenCollectedHeap* gch = GenCollectedHeap::heap();
-      if (_invocation_count != (unsigned int)gch->get_gen(1)->stat_record()->invocations) {
-        _full = true;
-      }
-    } else {
-#ifndef SERIALGC
-      if (_invocation_count != PSMarkSweep::total_invocations()) {
-        _full = true;
-      }
-#else  // SERIALGC
-      fatal("SerialGC only supported in this configuration.");
-#endif // SERIALGC
-    }
+    _full = (_invocation_count != Universe::heap()->total_full_collections());
   }
 
   // Full collection probably means the perm generation has been GC'ed
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/prims/jvmtiTagMap.cpp
--- a/src/share/vm/prims/jvmtiTagMap.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/prims/jvmtiTagMap.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -400,16 +400,28 @@
 
 // get the memory region used for the young generation
 void JvmtiTagMap::get_young_generation() {
-  if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) {
-    GenCollectedHeap* gch = GenCollectedHeap::heap();
-    _young_gen = gch->get_gen(0)->reserved();
-  } else {
+  CollectedHeap* ch = Universe::heap();
+  switch (ch->kind()) {
+    case (CollectedHeap::GenCollectedHeap): {
+      _young_gen = ((GenCollectedHeap*)ch)->get_gen(0)->reserved();
+      break;
+    }
 #ifndef SERIALGC
-    ParallelScavengeHeap* psh = ParallelScavengeHeap::heap();
-    _young_gen= psh->young_gen()->reserved();
-#else  // SERIALGC
-    fatal("SerialGC only supported in this configuration.");
-#endif // SERIALGC
+    case (CollectedHeap::ParallelScavengeHeap): {
+      _young_gen = ((ParallelScavengeHeap*)ch)->young_gen()->reserved();
+      break;
+    }
+    case (CollectedHeap::G1CollectedHeap): {
+      // Until a more satisfactory solution is implemented, all
+      // oops in the tag map will require rehash at each gc.
+      // This is a correct, if extremely inefficient solution.
+      // See RFE 6621729 for related commentary.
+      _young_gen = ch->reserved_region();
+      break;
+    }
+#endif  // !SERIALGC
+    default:
+      ShouldNotReachHere();
   }
 }
 
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/prims/unsafe.cpp
--- a/src/share/vm/prims/unsafe.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/prims/unsafe.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -891,6 +891,7 @@
   oop e = JNIHandles::resolve(e_h);
   oop p = JNIHandles::resolve(obj);
   HeapWord* addr = (HeapWord *)index_oop_from_field_offset_long(p, offset);
+  update_barrier_set_pre((void*)addr, e);
   oop res = oopDesc::atomic_compare_exchange_oop(x, addr, e);
   jboolean success  = (res == e);
   if (success)
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/aprofiler.hpp
--- a/src/share/vm/runtime/aprofiler.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/runtime/aprofiler.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -33,6 +33,7 @@
 
 class AllocationProfiler: AllStatic {
   friend class GenCollectedHeap;
+  friend class G1CollectedHeap;
   friend class MarkSweep;
  private:
   static bool _active;                          // tells whether profiler is active
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/arguments.cpp
--- a/src/share/vm/runtime/arguments.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/runtime/arguments.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -922,18 +922,17 @@
 // UseParNewGC and not explicitly set ParallelGCThreads we
 // set it, unless this is a single cpu machine.
 void Arguments::set_parnew_gc_flags() {
-  assert(!UseSerialGC && !UseParallelGC, "control point invariant");
+  assert(!UseSerialGC && !UseParallelGC && !UseG1GC,
+         "control point invariant");
+  assert(UseParNewGC, "Error");
 
   // Turn off AdaptiveSizePolicy by default for parnew until it is
   // complete.
-  if (UseParNewGC &&
-      FLAG_IS_DEFAULT(UseAdaptiveSizePolicy)) {
+  if (FLAG_IS_DEFAULT(UseAdaptiveSizePolicy)) {
     FLAG_SET_DEFAULT(UseAdaptiveSizePolicy, false);
   }
 
-  if (FLAG_IS_DEFAULT(UseParNewGC) && ParallelGCThreads > 1) {
-    FLAG_SET_DEFAULT(UseParNewGC, true);
-  } else if (UseParNewGC && ParallelGCThreads == 0) {
+  if (ParallelGCThreads == 0) {
     FLAG_SET_DEFAULT(ParallelGCThreads,
                      Abstract_VM_Version::parallel_worker_threads());
     if (FLAG_IS_DEFAULT(ParallelGCThreads) && ParallelGCThreads == 1) {
@@ -969,15 +968,12 @@
 // further optimization and tuning efforts, and would almost
 // certainly gain from analysis of platform and environment.
 void Arguments::set_cms_and_parnew_gc_flags() {
-  if (UseSerialGC || UseParallelGC) {
-    return;
-  }
-
+  assert(!UseSerialGC && !UseParallelGC, "Error");
   assert(UseConcMarkSweepGC, "CMS is expected to be on here");
 
   // If we are using CMS, we prefer to UseParNewGC,
   // unless explicitly forbidden.
-  if (!UseParNewGC && FLAG_IS_DEFAULT(UseParNewGC)) {
+  if (FLAG_IS_DEFAULT(UseParNewGC)) {
     FLAG_SET_ERGO(bool, UseParNewGC, true);
   }
 
@@ -1157,6 +1153,7 @@
     // machine class and automatic selection policy.
     if (!UseSerialGC &&
         !UseConcMarkSweepGC &&
+        !UseG1GC &&
         !UseParNewGC &&
         !DumpSharedSpaces &&
         FLAG_IS_DEFAULT(UseParallelGC)) {
@@ -1174,7 +1171,7 @@
   // field offset to determine free list chunk markers.
   // Check that UseCompressedOops can be set with the max heap size allocated
   // by ergonomics.
-  if (!UseConcMarkSweepGC && MaxHeapSize <= max_heap_for_compressed_oops()) {
+  if (!UseG1GC && !UseConcMarkSweepGC && MaxHeapSize <= max_heap_for_compressed_oops()) {
     if (FLAG_IS_DEFAULT(UseCompressedOops)) {
       FLAG_SET_ERGO(bool, UseCompressedOops, true);
     }
@@ -1183,6 +1180,8 @@
       // If specified, give a warning
       if (UseConcMarkSweepGC){
         warning("Compressed Oops does not work with CMS");
+      } else if (UseG1GC) {
+        warning("Compressed Oops does not work with UseG1GC");
       } else {
         warning(
           "Max heap size too large for Compressed Oops");
@@ -1196,6 +1195,7 @@
 }
 
 void Arguments::set_parallel_gc_flags() {
+  assert(UseParallelGC || UseParallelOldGC, "Error");
   // If parallel old was requested, automatically enable parallel scavenge.
   if (UseParallelOldGC && !UseParallelGC && FLAG_IS_DEFAULT(UseParallelGC)) {
     FLAG_SET_DEFAULT(UseParallelGC, true);
@@ -1207,51 +1207,8 @@
     FLAG_SET_ERGO(uintx, ParallelGCThreads,
                   Abstract_VM_Version::parallel_worker_threads());
 
-    if (FLAG_IS_DEFAULT(MaxHeapSize)) {
-      const uint64_t reasonable_fraction =
-        os::physical_memory() / DefaultMaxRAMFraction;
-      const uint64_t maximum_size = (uint64_t)
-                 (FLAG_IS_DEFAULT(DefaultMaxRAM) && UseCompressedOops ?
-                     MIN2(max_heap_for_compressed_oops(), DefaultMaxRAM) :
-                     DefaultMaxRAM);
-      size_t reasonable_max =
-        (size_t) os::allocatable_physical_memory(reasonable_fraction);
-      if (reasonable_max > maximum_size) {
-        reasonable_max = maximum_size;
-      }
-      if (PrintGCDetails && Verbose) {
-        // Cannot use gclog_or_tty yet.
-        tty->print_cr("  Max heap size for server class platform "
-                      SIZE_FORMAT, reasonable_max);
-      }
-      // If the initial_heap_size has not been set with -Xms,
-      // then set it as fraction of size of physical memory
-      // respecting the maximum and minimum sizes of the heap.
-      if (initial_heap_size() == 0) {
-        const uint64_t reasonable_initial_fraction =
-          os::physical_memory() / DefaultInitialRAMFraction;
-        const size_t reasonable_initial =
-          (size_t) os::allocatable_physical_memory(reasonable_initial_fraction);
-        const size_t minimum_size = NewSize + OldSize;
-        set_initial_heap_size(MAX2(MIN2(reasonable_initial, reasonable_max),
-                                  minimum_size));
-        // Currently the minimum size and the initial heap sizes are the same.
-        set_min_heap_size(initial_heap_size());
-        if (PrintGCDetails && Verbose) {
-          // Cannot use gclog_or_tty yet.
-          tty->print_cr("  Initial heap size for server class platform "
-                        SIZE_FORMAT, initial_heap_size());
-        }
-      } else {
-        // An minimum size was specified on the command line.  Be sure
-        // that the maximum size is consistent.
-        if (initial_heap_size() > reasonable_max) {
-          reasonable_max = initial_heap_size();
-        }
-      }
-      FLAG_SET_ERGO(uintx, MaxHeapSize, (uintx) reasonable_max);
-    }
-
+    // PS is a server collector, setup the heap sizes accordingly.
+    set_server_heap_size();
     // If InitialSurvivorRatio or MinSurvivorRatio were not specified, but the
     // SurvivorRatio has been set, reset their default values to SurvivorRatio +
     // 2.  By doing this we make SurvivorRatio also work for Parallel Scavenger.
@@ -1279,6 +1236,70 @@
   }
 }
 
+void Arguments::set_g1_gc_flags() {
+  assert(UseG1GC, "Error");
+  // G1 is a server collector, setup the heap sizes accordingly.
+  set_server_heap_size();
+#ifdef COMPILER1
+  FastTLABRefill = false;
+#endif
+  FLAG_SET_DEFAULT(ParallelGCThreads,
+                     Abstract_VM_Version::parallel_worker_threads());
+  if (ParallelGCThreads == 0) {
+    FLAG_SET_DEFAULT(ParallelGCThreads,
+                     Abstract_VM_Version::parallel_worker_threads
+());
+  }
+  no_shared_spaces();
+}
+
+void Arguments::set_server_heap_size() {
+  if (FLAG_IS_DEFAULT(MaxHeapSize)) {
+    const uint64_t reasonable_fraction =
+      os::physical_memory() / DefaultMaxRAMFraction;
+    const uint64_t maximum_size = (uint64_t)
+                 (FLAG_IS_DEFAULT(DefaultMaxRAM) && UseCompressedOops ?
+                     MIN2(max_heap_for_compressed_oops(), DefaultMaxRAM) :
+                     DefaultMaxRAM);
+    size_t reasonable_max =
+      (size_t) os::allocatable_physical_memory(reasonable_fraction);
+    if (reasonable_max > maximum_size) {
+      reasonable_max = maximum_size;
+    }
+    if (PrintGCDetails && Verbose) {
+      // Cannot use gclog_or_tty yet.
+      tty->print_cr("  Max heap size for server class platform "
+                    SIZE_FORMAT, reasonable_max);
+    }
+    // If the initial_heap_size has not been set with -Xms,
+    // then set it as fraction of size of physical memory
+    // respecting the maximum and minimum sizes of the heap.
+    if (initial_heap_size() == 0) {
+      const uint64_t reasonable_initial_fraction =
+        os::physical_memory() / DefaultInitialRAMFraction;
+      const size_t reasonable_initial =
+        (size_t) os::allocatable_physical_memory(reasonable_initial_fraction);
+      const size_t minimum_size = NewSize + OldSize;
+      set_initial_heap_size(MAX2(MIN2(reasonable_initial, reasonable_max),
+                                minimum_size));
+      // Currently the minimum size and the initial heap sizes are the same.
+      set_min_heap_size(initial_heap_size());
+      if (PrintGCDetails && Verbose) {
+        // Cannot use gclog_or_tty yet.
+        tty->print_cr("  Initial heap size for server class platform "
+                      SIZE_FORMAT, initial_heap_size());
+      }
+    } else {
+      // A minimum size was specified on the command line.  Be sure
+      // that the maximum size is consistent.
+      if (initial_heap_size() > reasonable_max) {
+        reasonable_max = initial_heap_size();
+      }
+    }
+    FLAG_SET_ERGO(uintx, MaxHeapSize, (uintx) reasonable_max);
+  }
+}
+
 // This must be called after ergonomics because we want bytecode rewriting
 // if the server compiler is used, or if UseSharedSpaces is disabled.
 void Arguments::set_bytecode_flags() {
@@ -1362,12 +1383,13 @@
   FLAG_SET_DEFAULT(UseConcMarkSweepGC, false);
   FLAG_SET_DEFAULT(UseParallelGC, false);
   FLAG_SET_DEFAULT(UseParallelOldGC, false);
+  FLAG_SET_DEFAULT(UseG1GC, false);
 }
 
 static bool verify_serial_gc_flags() {
   return (UseSerialGC &&
-        !(UseParNewGC || UseConcMarkSweepGC || UseParallelGC ||
-          UseParallelOldGC));
+        !(UseParNewGC || UseConcMarkSweepGC || UseG1GC ||
+          UseParallelGC || UseParallelOldGC));
 }
 
 // Check consistency of GC selection
@@ -1470,8 +1492,8 @@
   status = status && verify_percentage(GCHeapFreeLimit, "GCHeapFreeLimit");
 
   // Check user specified sharing option conflict with Parallel GC
-  bool cannot_share = (UseConcMarkSweepGC || UseParallelGC ||
-                       UseParallelOldGC || UseParNewGC ||
+  bool cannot_share = (UseConcMarkSweepGC || UseG1GC || UseParNewGC ||
+                       UseParallelGC || UseParallelOldGC ||
                        SOLARIS_ONLY(UseISM) NOT_SOLARIS(UseLargePages));
 
   if (cannot_share) {
@@ -1511,11 +1533,6 @@
                   "The CMS collector (-XX:+UseConcMarkSweepGC) must be "
                   "selected in order\nto use CMSIncrementalMode.\n");
       status = false;
-    } else if (!UseTLAB) {
-      jio_fprintf(defaultStream::error_stream(),
-                  "error:  CMSIncrementalMode requires thread-local "
-                  "allocation buffers\n(-XX:+UseTLAB).\n");
-      status = false;
     } else {
       status = status && verify_percentage(CMSIncrementalDutyCycle,
                                   "CMSIncrementalDutyCycle");
@@ -1535,13 +1552,6 @@
     }
   }
 
-  if (UseNUMA && !UseTLAB) {
-    jio_fprintf(defaultStream::error_stream(),
-                "error:  NUMA allocator (-XX:+UseNUMA) requires thread-local "
-                "allocation\nbuffers (-XX:+UseTLAB).\n");
-    status = false;
-  }
-
   // CMS space iteration, which FLSVerifyAllHeapreferences entails,
   // insists that we hold the requisite locks so that the iteration is
   // MT-safe. For the verification at start-up and shut-down, we don't
@@ -2330,10 +2340,15 @@
     SOLARIS_ONLY(FLAG_SET_DEFAULT(UseMPSS, false));
     SOLARIS_ONLY(FLAG_SET_DEFAULT(UseISM, false));
   }
+
 #else
   if (!FLAG_IS_DEFAULT(OptoLoopAlignment) && FLAG_IS_DEFAULT(MaxLoopPad)) {
     FLAG_SET_DEFAULT(MaxLoopPad, OptoLoopAlignment-1);
   }
+  // Temporary disable bulk zeroing reduction with G1. See CR 6627983.
+  if (UseG1GC) {
+    FLAG_SET_DEFAULT(ReduceBulkZeroing, false);
+  }
 #endif
 
   if (!check_vm_args_consistency()) {
@@ -2485,12 +2500,29 @@
     }
   }
 
+
   // Parse JavaVMInitArgs structure passed in, as well as JAVA_TOOL_OPTIONS and _JAVA_OPTIONS
   jint result = parse_vm_init_args(args);
   if (result != JNI_OK) {
     return result;
   }
 
+  // These are hacks until G1 is fully supported and tested
+  // but lets you force -XX:+UseG1GC in PRT and get it where it (mostly) works
+  if (UseG1GC) {
+    if (UseConcMarkSweepGC || UseParNewGC || UseParallelGC || UseParallelOldGC || UseSerialGC) {
+#ifndef PRODUCT
+      tty->print_cr("-XX:+UseG1GC is incompatible with other collectors, using UseG1GC");
+#endif // PRODUCT
+      UseConcMarkSweepGC = false;
+      UseParNewGC        = false;
+      UseParallelGC      = false;
+      UseParallelOldGC   = false;
+      UseSerialGC        = false;
+    }
+    no_shared_spaces();
+  }
+
 #ifndef PRODUCT
   if (TraceBytecodesAt != 0) {
     TraceBytecodes = true;
@@ -2536,6 +2568,12 @@
     // Set some flags for ParNew
     set_parnew_gc_flags();
   }
+  // Temporary; make the "if" an "else-if" before
+  // we integrate G1. XXX
+  if (UseG1GC) {
+    // Set some flags for garbage-first, if needed.
+    set_g1_gc_flags();
+  }
 
 #ifdef SERIALGC
   assert(verify_serial_gc_flags(), "SerialGC unset");
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/arguments.hpp
--- a/src/share/vm/runtime/arguments.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/runtime/arguments.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -294,10 +294,14 @@
   // CMS/ParNew garbage collectors
   static void set_parnew_gc_flags();
   static void set_cms_and_parnew_gc_flags();
-  // UseParallelGC
+  // UseParallel[Old]GC
   static void set_parallel_gc_flags();
+  // Garbage-First (UseG1GC)
+  static void set_g1_gc_flags();
   // GC ergonomics
   static void set_ergonomics_flags();
+  // Setup heap size for a server platform
+  static void set_server_heap_size();
   // Based on automatic selection criteria, should the
   // low pause collector be used.
   static bool should_auto_select_low_pause_collector();
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/globals.cpp
--- a/src/share/vm/runtime/globals.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/runtime/globals.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -174,6 +174,9 @@
 static Flag flagTable[] = {
  RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, RUNTIME_PD_DEVELOP_FLAG_STRUCT, RUNTIME_PRODUCT_FLAG_STRUCT, RUNTIME_PD_PRODUCT_FLAG_STRUCT, RUNTIME_DIAGNOSTIC_FLAG_STRUCT, RUNTIME_NOTPRODUCT_FLAG_STRUCT, RUNTIME_MANAGEABLE_FLAG_STRUCT, RUNTIME_PRODUCT_RW_FLAG_STRUCT, RUNTIME_LP64_PRODUCT_FLAG_STRUCT)
  RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, RUNTIME_PD_DEVELOP_FLAG_STRUCT, RUNTIME_PRODUCT_FLAG_STRUCT, RUNTIME_PD_PRODUCT_FLAG_STRUCT, RUNTIME_DIAGNOSTIC_FLAG_STRUCT, RUNTIME_NOTPRODUCT_FLAG_STRUCT)
+#ifndef SERIALGC
+ G1_FLAGS(RUNTIME_DEVELOP_FLAG_STRUCT, RUNTIME_PD_DEVELOP_FLAG_STRUCT, RUNTIME_PRODUCT_FLAG_STRUCT, RUNTIME_PD_PRODUCT_FLAG_STRUCT, RUNTIME_DIAGNOSTIC_FLAG_STRUCT, RUNTIME_NOTPRODUCT_FLAG_STRUCT, RUNTIME_MANAGEABLE_FLAG_STRUCT, RUNTIME_PRODUCT_RW_FLAG_STRUCT)
+#endif // SERIALGC
 #ifdef COMPILER1
  C1_FLAGS(C1_DEVELOP_FLAG_STRUCT, C1_PD_DEVELOP_FLAG_STRUCT, C1_PRODUCT_FLAG_STRUCT, C1_PD_PRODUCT_FLAG_STRUCT, C1_NOTPRODUCT_FLAG_STRUCT)
 #endif
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/globals.hpp
--- a/src/share/vm/runtime/globals.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/runtime/globals.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -1106,7 +1106,10 @@
   /* gc */                                                                  \
                                                                             \
   product(bool, UseSerialGC, false,                                         \
-          "Tells whether the VM should use serial garbage collector")       \
+          "Use the serial garbage collector")                               \
+                                                                            \
+  product(bool, UseG1GC, false,                                             \
+          "Use the Garbage-First garbage collector")                        \
                                                                             \
   product(bool, UseParallelGC, false,                                       \
           "Use the Parallel Scavenge garbage collector")                    \
@@ -1169,6 +1172,9 @@
   develop(bool, TraceChunkTasksQueuing, false,                              \
           "Trace the queuing of the chunk tasks")                           \
                                                                             \
+  product(uintx, ParallelMarkingThreads, 0,                                 \
+          "Number of marking threads concurrent gc will use")               \
+                                                                            \
   product(uintx, YoungPLABSize, 4096,                                       \
           "Size of young gen promotion labs (in HeapWords)")                \
                                                                             \
@@ -1265,6 +1271,12 @@
           "The amount of young gen chosen by default per GC worker "        \
           "thread available ")                                              \
                                                                             \
+  product(bool, GCOverheadReporting, false,                                 \
+         "Enables the GC overhead reporting facility")                      \
+                                                                            \
+  product(intx, GCOverheadReportingPeriodMS, 100,                           \
+          "Reporting period for conc GC overhead reporting, in ms ")        \
+                                                                            \
   product(bool, CMSIncrementalMode, false,                                  \
           "Whether CMS GC should operate in \"incremental\" mode")          \
                                                                             \
@@ -1593,6 +1605,9 @@
   product(bool, ZeroTLAB, false,                                            \
           "Zero out the newly created TLAB")                                \
                                                                             \
+  product(bool, FastTLABRefill, true,                                       \
+          "Use fast TLAB refill code")                                      \
+                                                                            \
   product(bool, PrintTLAB, false,                                           \
           "Print various TLAB related information")                         \
                                                                             \
@@ -2790,6 +2805,12 @@
           "how many entries we'll try to leave on the stack during "        \
           "parallel GC")                                                    \
                                                                             \
+  product(intx, DCQBarrierQueueBufferSize, 256,                             \
+          "Number of elements in a dirty card queue buffer")                \
+                                                                            \
+  product(intx, DCQBarrierProcessCompletedThreshold, 5,                     \
+          "Number of completed dirty card buffers to trigger processing.")  \
+                                                                            \
   /* stack parameters */                                                    \
   product_pd(intx, StackYellowPages,                                        \
           "Number of yellow zone (recoverable overflows) pages")            \
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/globals_extension.hpp
--- a/src/share/vm/runtime/globals_extension.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/runtime/globals_extension.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -74,21 +74,16 @@
 #endif
 
 typedef enum {
- RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER,
-               RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER,
-               RUNTIME_NOTPRODUCT_FLAG_MEMBER, RUNTIME_MANAGEABLE_FLAG_MEMBER,
-               RUNTIME_PRODUCT_RW_FLAG_MEMBER,
-               RUNTIME_LP64_PRODUCT_FLAG_MEMBER)
- RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER,
-               RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER,
-               RUNTIME_NOTPRODUCT_FLAG_MEMBER)
+ RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, RUNTIME_NOTPRODUCT_FLAG_MEMBER, RUNTIME_MANAGEABLE_FLAG_MEMBER, RUNTIME_PRODUCT_RW_FLAG_MEMBER, RUNTIME_LP64_PRODUCT_FLAG_MEMBER)
+ RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, RUNTIME_NOTPRODUCT_FLAG_MEMBER)
+#ifndef KERNEL
+ G1_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER, RUNTIME_PD_DEVELOP_FLAG_MEMBER, RUNTIME_PRODUCT_FLAG_MEMBER, RUNTIME_PD_PRODUCT_FLAG_MEMBER, RUNTIME_DIAGNOSTIC_FLAG_MEMBER, RUNTIME_NOTPRODUCT_FLAG_MEMBER, RUNTIME_MANAGEABLE_FLAG_MEMBER, RUNTIME_PRODUCT_RW_FLAG_MEMBER)
+#endif
 #ifdef COMPILER1
- C1_FLAGS(C1_DEVELOP_FLAG_MEMBER, C1_PD_DEVELOP_FLAG_MEMBER, C1_PRODUCT_FLAG_MEMBER, C1_PD_PRODUCT_FLAG_MEMBER,
-          C1_NOTPRODUCT_FLAG_MEMBER)
+ C1_FLAGS(C1_DEVELOP_FLAG_MEMBER, C1_PD_DEVELOP_FLAG_MEMBER, C1_PRODUCT_FLAG_MEMBER, C1_PD_PRODUCT_FLAG_MEMBER, C1_NOTPRODUCT_FLAG_MEMBER)
 #endif
 #ifdef COMPILER2
- C2_FLAGS(C2_DEVELOP_FLAG_MEMBER, C2_PD_DEVELOP_FLAG_MEMBER, C2_PRODUCT_FLAG_MEMBER, C2_PD_PRODUCT_FLAG_MEMBER,
-          C2_DIAGNOSTIC_FLAG_MEMBER, C2_NOTPRODUCT_FLAG_MEMBER)
+ C2_FLAGS(C2_DEVELOP_FLAG_MEMBER, C2_PD_DEVELOP_FLAG_MEMBER, C2_PRODUCT_FLAG_MEMBER, C2_PD_PRODUCT_FLAG_MEMBER, C2_DIAGNOSTIC_FLAG_MEMBER, C2_NOTPRODUCT_FLAG_MEMBER)
 #endif
  NUM_CommandLineFlag
 } CommandLineFlag;
@@ -144,24 +139,45 @@
 #endif
 
 typedef enum {
- RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE, RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
-               RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE, RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+ RUNTIME_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+               RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+               RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+               RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
                RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE,
                RUNTIME_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE,
                RUNTIME_MANAGEABLE_FLAG_MEMBER_WITH_TYPE,
                RUNTIME_PRODUCT_RW_FLAG_MEMBER_WITH_TYPE,
                RUNTIME_LP64_PRODUCT_FLAG_MEMBER_WITH_TYPE)
-RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE, RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
-               RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE, RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
-               RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE,
-               RUNTIME_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
+ RUNTIME_OS_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+                  RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+                  RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+                  RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+                  RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE,
+                  RUNTIME_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
+#ifndef KERNEL
+ G1_FLAGS(RUNTIME_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_MANAGEABLE_FLAG_MEMBER_WITH_TYPE,
+          RUNTIME_PRODUCT_RW_FLAG_MEMBER_WITH_TYPE)
+#endif // KERNEL
 #ifdef COMPILER1
- C1_FLAGS(C1_DEVELOP_FLAG_MEMBER_WITH_TYPE, C1_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, C1_PRODUCT_FLAG_MEMBER_WITH_TYPE,
-          C1_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, C1_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
+ C1_FLAGS(C1_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+          C1_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+          C1_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+          C1_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+          C1_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
 #endif
 #ifdef COMPILER2
- C2_FLAGS(C2_DEVELOP_FLAG_MEMBER_WITH_TYPE, C2_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE, C2_PRODUCT_FLAG_MEMBER_WITH_TYPE,
-          C2_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, C2_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE, C2_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
+ C2_FLAGS(C2_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+          C2_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+          C2_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+          C2_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
+          C2_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE,
+          C2_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
 #endif
  NUM_CommandLineFlagWithType
 } CommandLineFlagWithType;
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/mutexLocker.cpp
--- a/src/share/vm/runtime/mutexLocker.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/runtime/mutexLocker.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -47,7 +47,7 @@
 Monitor* JNICritical_lock             = NULL;
 Mutex*   JvmtiThreadState_lock        = NULL;
 Monitor* JvmtiPendingEvent_lock       = NULL;
-Mutex*   Heap_lock                    = NULL;
+Monitor* Heap_lock                    = NULL;
 Mutex*   ExpandHeap_lock              = NULL;
 Mutex*   AdapterHandlerLibrary_lock   = NULL;
 Mutex*   SignatureHandlerLibrary_lock = NULL;
@@ -67,7 +67,18 @@
 Monitor* SLT_lock                     = NULL;
 Monitor* iCMS_lock                    = NULL;
 Monitor* FullGCCount_lock             = NULL;
+Monitor* CMark_lock                   = NULL;
+Monitor* ZF_mon                       = NULL;
+Monitor* Cleanup_mon                  = NULL;
+Monitor* G1ConcRefine_mon             = NULL;
+Mutex*   SATB_Q_FL_lock               = NULL;
+Monitor* SATB_Q_CBL_mon               = NULL;
+Mutex*   Shared_SATB_Q_lock           = NULL;
+Mutex*   DirtyCardQ_FL_lock           = NULL;
+Monitor* DirtyCardQ_CBL_mon           = NULL;
+Mutex*   Shared_DirtyCardQ_lock       = NULL;
 Mutex*   ParGCRareEvent_lock          = NULL;
+Mutex*   EvacFailureStack_lock        = NULL;
 Mutex*   DerivedPointerTableGC_lock   = NULL;
 Mutex*   Compile_lock                 = NULL;
 Monitor* MethodCompileQueue_lock      = NULL;
@@ -102,6 +113,9 @@
 Mutex*   PerfDataManager_lock         = NULL;
 Mutex*   OopMapCacheAlloc_lock        = NULL;
 
+Mutex*   MMUTracker_lock              = NULL;
+Mutex*   HotCardCache_lock            = NULL;
+
 Monitor* GCTaskManager_lock           = NULL;
 
 Mutex*   Management_lock              = NULL;
@@ -150,6 +164,23 @@
     def(iCMS_lock                  , Monitor, special,     true ); // CMS incremental mode start/stop notification
     def(FullGCCount_lock           , Monitor, leaf,        true ); // in support of ExplicitGCInvokesConcurrent
   }
+  if (UseG1GC) {
+    def(CMark_lock                 , Monitor, nonleaf,     true ); // coordinate concurrent mark thread
+    def(ZF_mon                     , Monitor, leaf,        true );
+    def(Cleanup_mon                , Monitor, nonleaf,     true );
+    def(G1ConcRefine_mon           , Monitor, nonleaf,     true );
+    def(SATB_Q_FL_lock             , Mutex  , special,     true );
+    def(SATB_Q_CBL_mon             , Monitor, nonleaf,     true );
+    def(Shared_SATB_Q_lock         , Mutex,   nonleaf,     true );
+
+    def(DirtyCardQ_FL_lock         , Mutex  , special,     true );
+    def(DirtyCardQ_CBL_mon         , Monitor, nonleaf,     true );
+    def(Shared_DirtyCardQ_lock     , Mutex,   nonleaf,     true );
+
+    def(MMUTracker_lock            , Mutex  , leaf     ,   true );
+    def(HotCardCache_lock          , Mutex  , special  ,   true );
+    def(EvacFailureStack_lock      , Mutex  , nonleaf  ,   true );
+  }
   def(ParGCRareEvent_lock          , Mutex  , leaf     ,   true );
   def(DerivedPointerTableGC_lock   , Mutex,   leaf,        true );
   def(CodeCache_lock               , Mutex  , special,     true );
@@ -203,7 +234,7 @@
     def(SLT_lock                   , Monitor, nonleaf,     false );
                     // used in CMS GC for locking PLL lock
   }
-  def(Heap_lock                    , Mutex  , nonleaf+1,   false);
+  def(Heap_lock                    , Monitor, nonleaf+1,   false);
   def(JfieldIdCreation_lock        , Mutex  , nonleaf+1,   true ); // jfieldID, Used in VM_Operation
   def(JNICachedItableIndex_lock    , Mutex  , nonleaf+1,   false); // Used to cache an itable index during JNI invoke
 
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/mutexLocker.hpp
--- a/src/share/vm/runtime/mutexLocker.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/runtime/mutexLocker.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -38,7 +38,7 @@
 extern Monitor* JNICritical_lock;                // a lock used while entering and exiting JNI critical regions, allows GC to sometimes get in
 extern Mutex*   JvmtiThreadState_lock;           // a lock on modification of JVMTI thread data
 extern Monitor* JvmtiPendingEvent_lock;          // a lock on the JVMTI pending events list
-extern Mutex*   Heap_lock;                       // a lock on the heap
+extern Monitor* Heap_lock;                       // a lock on the heap
 extern Mutex*   ExpandHeap_lock;                 // a lock on expanding the heap
 extern Mutex*   AdapterHandlerLibrary_lock;      // a lock on the AdapterHandlerLibrary
 extern Mutex*   SignatureHandlerLibrary_lock;    // a lock on the SignatureHandlerLibrary
@@ -60,8 +60,30 @@
 extern Monitor* SLT_lock;                        // used in CMS GC for acquiring PLL
 extern Monitor* iCMS_lock;                       // CMS incremental mode start/stop notification
 extern Monitor* FullGCCount_lock;                // in support of "concurrent" full gc
+extern Monitor* CMark_lock;                      // used for concurrent mark thread coordination
+extern Monitor* ZF_mon;                          // used for G1 conc zero-fill.
+extern Monitor* Cleanup_mon;                     // used for G1 conc cleanup.
+extern Monitor* G1ConcRefine_mon;                // used for G1 conc-refine
+                                                 // coordination.
+
+extern Mutex*   SATB_Q_FL_lock;                  // Protects SATB Q
+                                                 // buffer free list.
+extern Monitor* SATB_Q_CBL_mon;                  // Protects SATB Q
+                                                 // completed buffer queue.
+extern Mutex*   Shared_SATB_Q_lock;              // Lock protecting SATB
+                                                 // queue shared by
+                                                 // non-Java threads.
+
+extern Mutex*   DirtyCardQ_FL_lock;              // Protects dirty card Q
+                                                 // buffer free list.
+extern Monitor* DirtyCardQ_CBL_mon;              // Protects dirty card Q
+                                                 // completed buffer queue.
+extern Mutex*   Shared_DirtyCardQ_lock;          // Lock protecting dirty card
+                                                 // queue shared by
+                                                 // non-Java threads.
                                                  // (see option ExplicitGCInvokesConcurrent)
 extern Mutex*   ParGCRareEvent_lock;             // Synchronizes various (rare) parallel GC ops.
+extern Mutex*   EvacFailureStack_lock;           // guards the evac failure scan stack
 extern Mutex*   Compile_lock;                    // a lock held when Compilation is updating code (used to block CodeCache traversal, CHA updates, etc)
 extern Monitor* MethodCompileQueue_lock;         // a lock held when method compilations are enqueued, dequeued
 #ifdef TIERED
@@ -93,6 +115,10 @@
 extern Mutex*   ParkerFreeList_lock;
 extern Mutex*   OopMapCacheAlloc_lock;           // protects allocation of oop_map caches
 
+extern Mutex*   MMUTracker_lock;                 // protects the MMU
+                                                 // tracker data structures
+extern Mutex*   HotCardCache_lock;               // protects the hot card cache
+
 extern Mutex*   Management_lock;                 // a lock used to serialize JVM management
 extern Monitor* LowMemory_lock;                  // a lock used for low memory detection
 
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/os.hpp
--- a/src/share/vm/runtime/os.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/runtime/os.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -105,6 +105,18 @@
   static jlong elapsed_counter();
   static jlong elapsed_frequency();
 
+  // The "virtual time" of a thread is the amount of time a thread has
+  // actually run.  The first function indicates whether the OS supports
+  // this functionality for the current thread, and if so:
+  //   * the second enables vtime tracking (if that is required).
+  //   * the third tells whether vtime is enabled.
+  //   * the fourth returns the elapsed virtual time for the current
+  //     thread.
+  static bool supports_vtime();
+  static bool enable_vtime();
+  static bool vtime_enabled();
+  static double elapsedVTime();
+
   // Return current local time in a string (YYYY-MM-DD HH:MM:SS).
   // It is MT safe, but not async-safe, as reading time zone
   // information may require a lock on some platforms.
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/sharedRuntime.cpp
--- a/src/share/vm/runtime/sharedRuntime.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/runtime/sharedRuntime.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -111,6 +111,25 @@
 }
 #endif // PRODUCT
 
+#ifndef SERIALGC
+
+// G1 write-barrier pre: executed before a pointer store.
+JRT_LEAF(void, SharedRuntime::g1_wb_pre(oopDesc* orig, JavaThread *thread))
+  if (orig == NULL) {
+    assert(false, "should be optimized out");
+    return;
+  }
+  // store the original value that was in the field reference
+  thread->satb_mark_queue().enqueue(orig);
+JRT_END
+
+// G1 write-barrier post: executed after a pointer store.
+JRT_LEAF(void, SharedRuntime::g1_wb_post(void* card_addr, JavaThread* thread))
+  thread->dirty_card_queue().enqueue(card_addr);
+JRT_END
+
+#endif // !SERIALGC
+
 
 JRT_LEAF(jlong, SharedRuntime::lmul(jlong y, jlong x))
   return x * y;
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/sharedRuntime.hpp
--- a/src/share/vm/runtime/sharedRuntime.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/runtime/sharedRuntime.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -99,6 +99,12 @@
   static address raw_exception_handler_for_return_address(address return_address);
   static address exception_handler_for_return_address(address return_address);
 
+#ifndef SERIALGC
+  // G1 write barriers
+  static void g1_wb_pre(oopDesc* orig, JavaThread *thread);
+  static void g1_wb_post(void* card_addr, JavaThread* thread);
+#endif // !SERIALGC
+
   // exception handling and implicit exceptions
   static address compute_compiled_exc_handler(nmethod* nm, address ret_pc, Handle& exception,
                                               bool force_unwind, bool top_frame_only);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/task.cpp
--- a/src/share/vm/runtime/task.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/runtime/task.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -67,7 +67,6 @@
 
 PeriodicTask::PeriodicTask(size_t interval_time) :
   _counter(0), _interval(interval_time) {
-  assert(is_init_completed(), "Periodic tasks should not start during VM initialization");
   // Sanity check the interval time
   assert(_interval >= PeriodicTask::min_interval &&
          _interval <= PeriodicTask::max_interval &&
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/thread.cpp
--- a/src/share/vm/runtime/thread.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/runtime/thread.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -1138,6 +1138,10 @@
 
 void JavaThread::initialize() {
   // Initialize fields
+
+  // Set the claimed par_id to -1 (ie not claiming any par_ids)
+  set_claimed_par_id(-1);
+
   set_saved_exception_pc(NULL);
   set_threadObj(NULL);
   _anchor.clear();
@@ -1209,7 +1213,18 @@
   pd_initialize();
 }
 
-JavaThread::JavaThread(bool is_attaching) : Thread() {
+#ifndef SERIALGC
+SATBMarkQueueSet JavaThread::_satb_mark_queue_set;
+DirtyCardQueueSet JavaThread::_dirty_card_queue_set;
+#endif // !SERIALGC
+
+JavaThread::JavaThread(bool is_attaching) :
+  Thread()
+#ifndef SERIALGC
+  , _satb_mark_queue(&_satb_mark_queue_set),
+  _dirty_card_queue(&_dirty_card_queue_set)
+#endif // !SERIALGC
+{
   initialize();
   _is_attaching = is_attaching;
 }
@@ -1255,7 +1270,13 @@
 // Remove this ifdef when C1 is ported to the compiler interface.
 static void compiler_thread_entry(JavaThread* thread, TRAPS);
 
-JavaThread::JavaThread(ThreadFunction entry_point, size_t stack_sz) : Thread() {
+JavaThread::JavaThread(ThreadFunction entry_point, size_t stack_sz) :
+  Thread()
+#ifndef SERIALGC
+  , _satb_mark_queue(&_satb_mark_queue_set),
+  _dirty_card_queue(&_dirty_card_queue_set)
+#endif // !SERIALGC
+{
   if (TraceThreadEvents) {
     tty->print_cr("creating thread %p", this);
   }
@@ -3034,9 +3055,14 @@
 
 #ifndef SERIALGC
   // Support for ConcurrentMarkSweep. This should be cleaned up
-  // and better encapsulated. XXX YSR
-  if (UseConcMarkSweepGC) {
-    ConcurrentMarkSweepThread::makeSurrogateLockerThread(THREAD);
+  // and better encapsulated. The ugly nested if test would go away
+  // once things are properly refactored. XXX YSR
+  if (UseConcMarkSweepGC || UseG1GC) {
+    if (UseConcMarkSweepGC) {
+      ConcurrentMarkSweepThread::makeSurrogateLockerThread(THREAD);
+    } else {
+      ConcurrentMarkThread::makeSurrogateLockerThread(THREAD);
+    }
     if (HAS_PENDING_EXCEPTION) {
       vm_exit_during_initialization(Handle(THREAD, PENDING_EXCEPTION));
     }
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/thread.hpp
--- a/src/share/vm/runtime/thread.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/runtime/thread.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -783,6 +783,18 @@
   }   _jmp_ring[ jump_ring_buffer_size ];
 #endif /* PRODUCT */
 
+#ifndef SERIALGC
+  // Support for G1 barriers
+
+  ObjPtrQueue _satb_mark_queue;          // Thread-local log for SATB barrier.
+  // Set of all such queues.
+  static SATBMarkQueueSet _satb_mark_queue_set;
+
+  DirtyCardQueue _dirty_card_queue;      // Thread-local log for dirty cards.
+  // Set of all such queues.
+  static DirtyCardQueueSet _dirty_card_queue_set;
+#endif // !SERIALGC
+
   friend class VMThread;
   friend class ThreadWaitTransition;
   friend class VM_Exit;
@@ -1168,6 +1180,11 @@
 
   static ByteSize do_not_unlock_if_synchronized_offset() { return byte_offset_of(JavaThread, _do_not_unlock_if_synchronized); }
 
+#ifndef SERIALGC
+  static ByteSize satb_mark_queue_offset()       { return byte_offset_of(JavaThread, _satb_mark_queue); }
+  static ByteSize dirty_card_queue_offset()      { return byte_offset_of(JavaThread, _dirty_card_queue); }
+#endif // !SERIALGC
+
   // Returns the jni environment for this thread
   JNIEnv* jni_environment()                      { return &_jni_environment; }
 
@@ -1414,6 +1431,20 @@
     _stack_size_at_create = value;
   }
 
+#ifndef SERIALGC
+  // SATB marking queue support
+  ObjPtrQueue& satb_mark_queue() { return _satb_mark_queue; }
+  static SATBMarkQueueSet& satb_mark_queue_set() {
+    return _satb_mark_queue_set;
+  }
+
+  // Dirty card queue support
+  DirtyCardQueue& dirty_card_queue() { return _dirty_card_queue; }
+  static DirtyCardQueueSet& dirty_card_queue_set() {
+    return _dirty_card_queue_set;
+  }
+#endif // !SERIALGC
+
   // Machine dependent stuff
   #include "incls/_thread_pd.hpp.incl"
 
@@ -1445,6 +1476,14 @@
   // clearing/querying jni attach status
   bool is_attaching() const { return _is_attaching; }
   void set_attached() { _is_attaching = false; OrderAccess::fence(); }
+private:
+  // This field is used to determine if a thread has claimed
+  // a par_id: it is -1 if the thread has not claimed a par_id;
+  // otherwise its value is the par_id that has been claimed.
+  int _claimed_par_id;
+public:
+  int get_claimed_par_id() { return _claimed_par_id; }
+  void set_claimed_par_id(int id) { _claimed_par_id = id;}
 };
 
 // Inline implementation of JavaThread::current
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/virtualspace.cpp
--- a/src/share/vm/runtime/virtualspace.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/runtime/virtualspace.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -235,24 +235,16 @@
       // increase size to a multiple of the desired alignment
       size = align_size_up(size, alignment);
       size_t extra_size = size + alignment;
-      char* extra_base = os::reserve_memory(extra_size, NULL, alignment);
-      if (extra_base == NULL) return;
-      // Do manual alignement
-      base = (char*) align_size_up((uintptr_t) extra_base, alignment);
-      assert(base >= extra_base, "just checking");
-      // Release unused areas
-      size_t unused_bottom_size = base - extra_base;
-      size_t unused_top_size = extra_size - size - unused_bottom_size;
-      assert(unused_bottom_size % os::vm_allocation_granularity() == 0,
-             "size not allocation aligned");
-      assert(unused_top_size % os::vm_allocation_granularity() == 0,
-             "size not allocation aligned");
-      if (unused_bottom_size > 0) {
-        os::release_memory(extra_base, unused_bottom_size);
-      }
-      if (unused_top_size > 0) {
-        os::release_memory(base + size, unused_top_size);
-      }
+      do {
+        char* extra_base = os::reserve_memory(extra_size, NULL, alignment);
+        if (extra_base == NULL) return;
+        // Do manual alignement
+        base = (char*) align_size_up((uintptr_t) extra_base, alignment);
+        assert(base >= extra_base, "just checking");
+        // Re-reserve the region at the aligned base address.
+        os::release_memory(extra_base, extra_size);
+        base = os::reserve_memory(size, base);
+      } while (base == NULL);
     }
   }
   // Done
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/vmStructs.cpp
--- a/src/share/vm/runtime/vmStructs.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/runtime/vmStructs.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -302,7 +302,7 @@
   nonstatic_field(CardTableModRefBS,           _guard_region,                                 MemRegion)                             \
   nonstatic_field(CardTableModRefBS,           byte_map_base,                                 jbyte*)                                \
                                                                                                                                      \
-  nonstatic_field(CardTableRS,                 _ct_bs,                                        CardTableModRefBS)                     \
+  nonstatic_field(CardTableRS,                 _ct_bs,                                        CardTableModRefBSForCTRS*)             \
                                                                                                                                      \
   nonstatic_field(CollectedHeap,               _reserved,                                     MemRegion)                             \
   nonstatic_field(SharedHeap,                  _perm_gen,                                     PermGen*)                              \
@@ -993,6 +993,7 @@
   declare_toplevel_type(BarrierSet)                                       \
            declare_type(ModRefBarrierSet,             BarrierSet)         \
            declare_type(CardTableModRefBS,            ModRefBarrierSet)   \
+           declare_type(CardTableModRefBSForCTRS,     CardTableModRefBS)  \
   declare_toplevel_type(GenRemSet)                                        \
            declare_type(CardTableRS,                  GenRemSet)          \
   declare_toplevel_type(BlockOffsetSharedArray)                           \
@@ -1020,6 +1021,10 @@
   declare_toplevel_type(BlockOffsetSharedArray*)                          \
   declare_toplevel_type(GenRemSet*)                                       \
   declare_toplevel_type(CardTableRS*)                                     \
+  declare_toplevel_type(CardTableModRefBS*)                               \
+  declare_toplevel_type(CardTableModRefBS**)                              \
+  declare_toplevel_type(CardTableModRefBSForCTRS*)                        \
+  declare_toplevel_type(CardTableModRefBSForCTRS**)                       \
   declare_toplevel_type(CollectedHeap*)                                   \
   declare_toplevel_type(ContiguousSpace*)                                 \
   declare_toplevel_type(DefNewGeneration*)                                \
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/runtime/vm_operations.hpp
--- a/src/share/vm/runtime/vm_operations.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/runtime/vm_operations.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -53,8 +53,13 @@
   template(ParallelGCFailedAllocation)            \
   template(ParallelGCFailedPermanentAllocation)   \
   template(ParallelGCSystemGC)                    \
+  template(CGC_Operation)                         \
   template(CMS_Initial_Mark)                      \
   template(CMS_Final_Remark)                      \
+  template(G1CollectFull)                         \
+  template(G1CollectForAllocation)                \
+  template(G1IncCollectionPause)                  \
+  template(G1PopRegionCollectionPause)            \
   template(EnableBiasedLocking)                   \
   template(RevokeBias)                            \
   template(BulkRevokeBias)                        \
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/services/heapDumper.cpp
--- a/src/share/vm/services/heapDumper.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/services/heapDumper.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -1578,17 +1578,8 @@
   }
 
   // Write the file header - use 1.0.2 for large heaps, otherwise 1.0.1
-  size_t used;
+  size_t used = ch->used();
   const char* header;
-#ifndef SERIALGC
-  if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) {
-    used = GenCollectedHeap::heap()->used();
-  } else {
-    used = ParallelScavengeHeap::heap()->used();
-  }
-#else // SERIALGC
-  used = GenCollectedHeap::heap()->used();
-#endif // SERIALGC
   if (used > (size_t)SegmentedHeapDumpThreshold) {
     set_segmented_dump();
     header = "JAVA PROFILE 1.0.2";
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/services/memoryService.cpp
--- a/src/share/vm/services/memoryService.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/services/memoryService.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -59,9 +59,13 @@
       add_parallel_scavenge_heap_info(ParallelScavengeHeap::heap());
       break;
     }
+    case CollectedHeap::G1CollectedHeap : {
+      G1CollectedHeap::g1_unimplemented();
+      return;
+    }
 #endif // SERIALGC
     default: {
-      guarantee(false, "Not recognized kind of heap");
+      guarantee(false, "Unrecognized kind of heap");
     }
   }
 
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/bitMap.cpp
--- a/src/share/vm/utilities/bitMap.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/utilities/bitMap.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -26,54 +26,59 @@
 # include "incls/_bitMap.cpp.incl"
 
 
-BitMap::BitMap(idx_t* map, idx_t size_in_bits) {
+BitMap::BitMap(bm_word_t* map, idx_t size_in_bits) :
+  _map(map), _size(size_in_bits)
+{
+  assert(sizeof(bm_word_t) == BytesPerWord, "Implementation assumption.");
   assert(size_in_bits >= 0, "just checking");
-  _map = map;
-  _size = size_in_bits;
 }
 
 
-BitMap::BitMap(idx_t size_in_bits) {
-  assert(size_in_bits >= 0, "just checking");
-  _size = size_in_bits;
-  _map = NEW_RESOURCE_ARRAY(idx_t, size_in_words());
+BitMap::BitMap(idx_t size_in_bits, bool in_resource_area) :
+  _map(NULL), _size(0)
+{
+  assert(sizeof(bm_word_t) == BytesPerWord, "Implementation assumption.");
+  resize(size_in_bits, in_resource_area);
 }
 
 
-void BitMap::resize(idx_t size_in_bits) {
+void BitMap::verify_index(idx_t index) const {
+    assert(index < _size, "BitMap index out of bounds");
+}
+
+void BitMap::verify_range(idx_t beg_index, idx_t end_index) const {
+#ifdef ASSERT
+    assert(beg_index <= end_index, "BitMap range error");
+    // Note that [0,0) and [size,size) are both valid ranges.
+    if (end_index != _size)  verify_index(end_index);
+#endif
+}
+
+void BitMap::resize(idx_t size_in_bits, bool in_resource_area) {
   assert(size_in_bits >= 0, "just checking");
-  size_t old_size_in_words = size_in_words();
-  uintptr_t* old_map = map();
+  idx_t old_size_in_words = size_in_words();
+  bm_word_t* old_map = map();
+
   _size = size_in_bits;
-  size_t new_size_in_words = size_in_words();
-  _map = NEW_RESOURCE_ARRAY(idx_t, new_size_in_words);
-  Copy::disjoint_words((HeapWord*) old_map, (HeapWord*) _map, MIN2(old_size_in_words, new_size_in_words));
+  idx_t new_size_in_words = size_in_words();
+  if (in_resource_area) {
+    _map = NEW_RESOURCE_ARRAY(bm_word_t, new_size_in_words);
+  } else {
+    if (old_map != NULL) FREE_C_HEAP_ARRAY(bm_word_t, _map);
+    _map = NEW_C_HEAP_ARRAY(bm_word_t, new_size_in_words);
+  }
+  Copy::disjoint_words((HeapWord*)old_map, (HeapWord*) _map,
+                       MIN2(old_size_in_words, new_size_in_words));
   if (new_size_in_words > old_size_in_words) {
     clear_range_of_words(old_size_in_words, size_in_words());
   }
 }
 
-// Returns a bit mask for a range of bits [beg, end) within a single word.  Each
-// bit in the mask is 0 if the bit is in the range, 1 if not in the range.  The
-// returned mask can be used directly to clear the range, or inverted to set the
-// range.  Note:  end must not be 0.
-inline BitMap::idx_t
-BitMap::inverted_bit_mask_for_range(idx_t beg, idx_t end) const {
-  assert(end != 0, "does not work when end == 0");
-  assert(beg == end || word_index(beg) == word_index(end - 1),
-         "must be a single-word range");
-  idx_t mask = bit_mask(beg) - 1;       // low (right) bits
-  if (bit_in_word(end) != 0) {
-    mask |= ~(bit_mask(end) - 1);       // high (left) bits
-  }
-  return mask;
-}
-
 void BitMap::set_range_within_word(idx_t beg, idx_t end) {
   // With a valid range (beg <= end), this test ensures that end != 0, as
   // required by inverted_bit_mask_for_range.  Also avoids an unnecessary write.
   if (beg != end) {
-    idx_t mask = inverted_bit_mask_for_range(beg, end);
+    bm_word_t mask = inverted_bit_mask_for_range(beg, end);
     *word_addr(beg) |= ~mask;
   }
 }
@@ -82,7 +87,7 @@
   // With a valid range (beg <= end), this test ensures that end != 0, as
   // required by inverted_bit_mask_for_range.  Also avoids an unnecessary write.
   if (beg != end) {
-    idx_t mask = inverted_bit_mask_for_range(beg, end);
+    bm_word_t mask = inverted_bit_mask_for_range(beg, end);
     *word_addr(beg) &= mask;
   }
 }
@@ -105,20 +110,6 @@
   }
 }
 
-inline void BitMap::set_large_range_of_words(idx_t beg, idx_t end) {
-  memset(_map + beg, ~(unsigned char)0, (end - beg) * sizeof(uintptr_t));
-}
-
-inline void BitMap::clear_large_range_of_words(idx_t beg, idx_t end) {
-  memset(_map + beg, 0, (end - beg) * sizeof(uintptr_t));
-}
-
-inline BitMap::idx_t BitMap::word_index_round_up(idx_t bit) const {
-  idx_t bit_rounded_up = bit + (BitsPerWord - 1);
-  // Check for integer arithmetic overflow.
-  return bit_rounded_up > bit ? word_index(bit_rounded_up) : size_in_words();
-}
-
 void BitMap::set_range(idx_t beg, idx_t end) {
   verify_range(beg, end);
 
@@ -187,6 +178,64 @@
   clear_range_within_word(bit_index(end_full_word), end);
 }
 
+void BitMap::mostly_disjoint_range_union(BitMap* from_bitmap,
+                                         idx_t   from_start_index,
+                                         idx_t   to_start_index,
+                                         size_t  word_num) {
+  // Ensure that the parameters are correct.
+  // These shouldn't be that expensive to check, hence I left them as
+  // guarantees.
+  guarantee(from_bitmap->bit_in_word(from_start_index) == 0,
+            "it should be aligned on a word boundary");
+  guarantee(bit_in_word(to_start_index) == 0,
+            "it should be aligned on a word boundary");
+  guarantee(word_num >= 2, "word_num should be at least 2");
+
+  intptr_t* from = (intptr_t*) from_bitmap->word_addr(from_start_index);
+  intptr_t* to   = (intptr_t*) word_addr(to_start_index);
+
+  if (*from != 0) {
+    // if it's 0, then there's no point in doing the CAS
+    while (true) {
+      intptr_t old_value = *to;
+      intptr_t new_value = old_value | *from;
+      intptr_t res       = Atomic::cmpxchg_ptr(new_value, to, old_value);
+      if (res == old_value) break;
+    }
+  }
+  ++from;
+  ++to;
+
+  for (size_t i = 0; i < word_num - 2; ++i) {
+    if (*from != 0) {
+      // if it's 0, then there's no point in doing the CAS
+      assert(*to == 0, "nobody else should be writing here");
+      intptr_t new_value = *from;
+      *to = new_value;
+    }
+
+    ++from;
+    ++to;
+  }
+
+  if (*from != 0) {
+    // if it's 0, then there's no point in doing the CAS
+    while (true) {
+      intptr_t old_value = *to;
+      intptr_t new_value = old_value | *from;
+      intptr_t res       = Atomic::cmpxchg_ptr(new_value, to, old_value);
+      if (res == old_value) break;
+    }
+  }
+
+  // the -1 is because we didn't advance them after the final CAS
+  assert(from ==
+           (intptr_t*) from_bitmap->word_addr(from_start_index) + word_num - 1,
+            "invariant");
+  assert(to == (intptr_t*) word_addr(to_start_index) + word_num - 1,
+            "invariant");
+}
+
 void BitMap::at_put(idx_t offset, bool value) {
   if (value) {
     set_bit(offset);
@@ -282,11 +331,11 @@
 
 bool BitMap::contains(const BitMap other) const {
   assert(size() == other.size(), "must have same size");
-  uintptr_t* dest_map = map();
-  uintptr_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size_in_words(); index++) {
-    uintptr_t word_union = dest_map[index] | other_map[index];
+    bm_word_t word_union = dest_map[index] | other_map[index];
     // If this has more bits set than dest_map[index], then other is not a
     // subset.
     if (word_union != dest_map[index]) return false;
@@ -296,8 +345,8 @@
 
 bool BitMap::intersects(const BitMap other) const {
   assert(size() == other.size(), "must have same size");
-  uintptr_t* dest_map = map();
-  uintptr_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size_in_words(); index++) {
     if ((dest_map[index] & other_map[index]) != 0) return true;
@@ -308,8 +357,8 @@
 
 void BitMap::set_union(BitMap other) {
   assert(size() == other.size(), "must have same size");
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size_in_words(); index++) {
     dest_map[index] = dest_map[index] | other_map[index];
@@ -319,8 +368,8 @@
 
 void BitMap::set_difference(BitMap other) {
   assert(size() == other.size(), "must have same size");
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size_in_words(); index++) {
     dest_map[index] = dest_map[index] & ~(other_map[index]);
@@ -330,8 +379,8 @@
 
 void BitMap::set_intersection(BitMap other) {
   assert(size() == other.size(), "must have same size");
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size; index++) {
     dest_map[index]  = dest_map[index] & other_map[index];
@@ -339,11 +388,26 @@
 }
 
 
+void BitMap::set_intersection_at_offset(BitMap other, idx_t offset) {
+  assert(other.size() >= offset, "offset not in range");
+  assert(other.size() - offset >= size(), "other not large enough");
+  // XXX Ideally, we would remove this restriction.
+  guarantee((offset % (sizeof(bm_word_t) * BitsPerByte)) == 0,
+            "Only handle aligned cases so far.");
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
+  idx_t offset_word_ind = word_index(offset);
+  idx_t size = size_in_words();
+  for (idx_t index = 0; index < size; index++) {
+    dest_map[index] = dest_map[index] & other_map[offset_word_ind + index];
+  }
+}
+
 bool BitMap::set_union_with_result(BitMap other) {
   assert(size() == other.size(), "must have same size");
   bool changed = false;
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size; index++) {
     idx_t temp = map(index) | other_map[index];
@@ -357,11 +421,11 @@
 bool BitMap::set_difference_with_result(BitMap other) {
   assert(size() == other.size(), "must have same size");
   bool changed = false;
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size; index++) {
-    idx_t temp = dest_map[index] & ~(other_map[index]);
+    bm_word_t temp = dest_map[index] & ~(other_map[index]);
     changed = changed || (temp != dest_map[index]);
     dest_map[index] = temp;
   }
@@ -372,12 +436,12 @@
 bool BitMap::set_intersection_with_result(BitMap other) {
   assert(size() == other.size(), "must have same size");
   bool changed = false;
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size; index++) {
-    idx_t orig = dest_map[index];
-    idx_t temp = orig & other_map[index];
+    bm_word_t orig = dest_map[index];
+    bm_word_t temp = orig & other_map[index];
     changed = changed || (temp != orig);
     dest_map[index]  = temp;
   }
@@ -387,8 +451,8 @@
 
 void BitMap::set_from(BitMap other) {
   assert(size() == other.size(), "must have same size");
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size; index++) {
     dest_map[index] = other_map[index];
@@ -398,8 +462,8 @@
 
 bool BitMap::is_same(BitMap other) {
   assert(size() == other.size(), "must have same size");
-  idx_t* dest_map = map();
-  idx_t* other_map = other.map();
+  bm_word_t* dest_map = map();
+  bm_word_t* other_map = other.map();
   idx_t size = size_in_words();
   for (idx_t index = 0; index < size; index++) {
     if (dest_map[index] != other_map[index]) return false;
@@ -408,24 +472,24 @@
 }
 
 bool BitMap::is_full() const {
-  uintptr_t* word = map();
+  bm_word_t* word = map();
   idx_t rest = size();
   for (; rest >= (idx_t) BitsPerWord; rest -= BitsPerWord) {
-    if (*word != (uintptr_t) AllBits) return false;
+    if (*word != (bm_word_t) AllBits) return false;
     word++;
   }
-  return rest == 0 || (*word | ~right_n_bits((int)rest)) == (uintptr_t) AllBits;
+  return rest == 0 || (*word | ~right_n_bits((int)rest)) == (bm_word_t) AllBits;
 }
 
 
 bool BitMap::is_empty() const {
-  uintptr_t* word = map();
+  bm_word_t* word = map();
   idx_t rest = size();
   for (; rest >= (idx_t) BitsPerWord; rest -= BitsPerWord) {
-    if (*word != (uintptr_t) NoBits) return false;
+    if (*word != (bm_word_t) NoBits) return false;
     word++;
   }
-  return rest == 0 || (*word & right_n_bits((int)rest)) == (uintptr_t) NoBits;
+  return rest == 0 || (*word & right_n_bits((int)rest)) == (bm_word_t) NoBits;
 }
 
 void BitMap::clear_large() {
@@ -436,7 +500,7 @@
 // then modifications in and to the left of the _bit_ being
 // currently sampled will not be seen. Note also that the
 // interval [leftOffset, rightOffset) is right open.
-void BitMap::iterate(BitMapClosure* blk, idx_t leftOffset, idx_t rightOffset) {
+bool BitMap::iterate(BitMapClosure* blk, idx_t leftOffset, idx_t rightOffset) {
   verify_range(leftOffset, rightOffset);
 
   idx_t startIndex = word_index(leftOffset);
@@ -445,106 +509,71 @@
        offset < rightOffset && index < endIndex;
        offset = (++index) << LogBitsPerWord) {
     idx_t rest = map(index) >> (offset & (BitsPerWord - 1));
-    for (; offset < rightOffset && rest != (uintptr_t)NoBits; offset++) {
+    for (; offset < rightOffset && rest != (bm_word_t)NoBits; offset++) {
       if (rest & 1) {
-        blk->do_bit(offset);
+        if (!blk->do_bit(offset)) return false;
         //  resample at each closure application
         // (see, for instance, CMS bug 4525989)
         rest = map(index) >> (offset & (BitsPerWord -1));
-        // XXX debugging: remove
-        // The following assertion assumes that closure application
-        // doesn't clear bits (may not be true in general, e.g. G1).
-        assert(rest & 1,
-               "incorrect shift or closure application can clear bits?");
       }
       rest = rest >> 1;
     }
   }
+  return true;
+}
+
+BitMap::idx_t* BitMap::_pop_count_table = NULL;
+
+void BitMap::init_pop_count_table() {
+  if (_pop_count_table == NULL) {
+    BitMap::idx_t *table = NEW_C_HEAP_ARRAY(idx_t, 256);
+    for (uint i = 0; i < 256; i++) {
+      table[i] = num_set_bits(i);
+    }
+
+    intptr_t res = Atomic::cmpxchg_ptr((intptr_t)  table,
+                                       (intptr_t*) &_pop_count_table,
+                                       (intptr_t)  NULL_WORD);
+    if (res != NULL_WORD) {
+      guarantee( _pop_count_table == (void*) res, "invariant" );
+      FREE_C_HEAP_ARRAY(bm_word_t, table);
+    }
+  }
 }
 
-BitMap::idx_t BitMap::get_next_one_offset(idx_t l_offset,
-                                          idx_t r_offset) const {
-  assert(l_offset <= size(), "BitMap index out of bounds");
-  assert(r_offset <= size(), "BitMap index out of bounds");
-  assert(l_offset <= r_offset, "l_offset > r_offset ?");
-
-  if (l_offset == r_offset) {
-    return l_offset;
-  }
-  idx_t   index = word_index(l_offset);
-  idx_t r_index = word_index(r_offset-1) + 1;
-  idx_t res_offset = l_offset;
+BitMap::idx_t BitMap::num_set_bits(bm_word_t w) {
+  idx_t bits = 0;
 
-  // check bits including and to the _left_ of offset's position
-  idx_t pos = bit_in_word(res_offset);
-  idx_t res = map(index) >> pos;
-  if (res != (uintptr_t)NoBits) {
-    // find the position of the 1-bit
-    for (; !(res & 1); res_offset++) {
-      res = res >> 1;
+  while (w != 0) {
+    while ((w & 1) == 0) {
+      w >>= 1;
     }
-    assert(res_offset >= l_offset, "just checking");
-    return MIN2(res_offset, r_offset);
+    bits++;
+    w >>= 1;
   }
-  // skip over all word length 0-bit runs
-  for (index++; index < r_index; index++) {
-    res = map(index);
-    if (res != (uintptr_t)NoBits) {
-      // found a 1, return the offset
-      for (res_offset = index << LogBitsPerWord; !(res & 1);
-           res_offset++) {
-        res = res >> 1;
-      }
-      assert(res & 1, "tautology; see loop condition");
-      assert(res_offset >= l_offset, "just checking");
-      return MIN2(res_offset, r_offset);
-    }
-  }
-  return r_offset;
+  return bits;
 }
 
-BitMap::idx_t BitMap::get_next_zero_offset(idx_t l_offset,
-                                           idx_t r_offset) const {
-  assert(l_offset <= size(), "BitMap index out of bounds");
-  assert(r_offset <= size(), "BitMap index out of bounds");
-  assert(l_offset <= r_offset, "l_offset > r_offset ?");
-
-  if (l_offset == r_offset) {
-    return l_offset;
-  }
-  idx_t   index = word_index(l_offset);
-  idx_t r_index = word_index(r_offset-1) + 1;
-  idx_t res_offset = l_offset;
-
-  // check bits including and to the _left_ of offset's position
-  idx_t pos = res_offset & (BitsPerWord - 1);
-  idx_t res = (map(index) >> pos) | left_n_bits((int)pos);
+BitMap::idx_t BitMap::num_set_bits_from_table(unsigned char c) {
+  assert(_pop_count_table != NULL, "precondition");
+  return _pop_count_table[c];
+}
 
-  if (res != (uintptr_t)AllBits) {
-    // find the position of the 0-bit
-    for (; res & 1; res_offset++) {
-      res = res >> 1;
-    }
-    assert(res_offset >= l_offset, "just checking");
-    return MIN2(res_offset, r_offset);
-  }
-  // skip over all word length 1-bit runs
-  for (index++; index < r_index; index++) {
-    res = map(index);
-    if (res != (uintptr_t)AllBits) {
-      // found a 0, return the offset
-      for (res_offset = index << LogBitsPerWord; res & 1;
-           res_offset++) {
-        res = res >> 1;
-      }
-      assert(!(res & 1), "tautology; see loop condition");
-      assert(res_offset >= l_offset, "just checking");
-      return MIN2(res_offset, r_offset);
+BitMap::idx_t BitMap::count_one_bits() const {
+  init_pop_count_table(); // If necessary.
+  idx_t sum = 0;
+  typedef unsigned char uchar;
+  for (idx_t i = 0; i < size_in_words(); i++) {
+    bm_word_t w = map()[i];
+    for (size_t j = 0; j < sizeof(bm_word_t); j++) {
+      sum += num_set_bits_from_table(uchar(w & 255));
+      w >>= 8;
     }
   }
-  return r_offset;
+  return sum;
 }
 
+
 #ifndef PRODUCT
 
 void BitMap::print_on(outputStream* st) const {
@@ -558,7 +587,7 @@
 #endif
 
 
-BitMap2D::BitMap2D(uintptr_t* map, idx_t size_in_slots, idx_t bits_per_slot)
+BitMap2D::BitMap2D(bm_word_t* map, idx_t size_in_slots, idx_t bits_per_slot)
   : _bits_per_slot(bits_per_slot)
   , _map(map, size_in_slots * bits_per_slot)
 {
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/bitMap.hpp
--- a/src/share/vm/utilities/bitMap.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/utilities/bitMap.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -22,25 +22,19 @@
  *
  */
 
-// Closure for iterating over BitMaps
+// Forward decl;
+class BitMapClosure;
 
-class BitMapClosure VALUE_OBJ_CLASS_SPEC {
- public:
-  // Callback when bit in map is set
-  virtual void do_bit(size_t offset) = 0;
-};
-
-
-// Operations for bitmaps represented as arrays of unsigned 32- or 64-bit
-// integers (uintptr_t).
-//
-// Bit offsets are numbered from 0 to size-1
+// Operations for bitmaps represented as arrays of unsigned integers.
+// Bit offsets are numbered from 0 to size-1.
 
 class BitMap VALUE_OBJ_CLASS_SPEC {
   friend class BitMap2D;
 
  public:
   typedef size_t idx_t;         // Type used for bit and word indices.
+  typedef uintptr_t bm_word_t;  // Element type of array that represents
+                                // the bitmap.
 
   // Hints for range sizes.
   typedef enum {
@@ -48,8 +42,8 @@
   } RangeSizeHint;
 
  private:
-  idx_t* _map;     // First word in bitmap
-  idx_t  _size;    // Size of bitmap (in bits)
+  bm_word_t* _map;     // First word in bitmap
+  idx_t      _size;    // Size of bitmap (in bits)
 
   // Puts the given value at the given offset, using resize() to size
   // the bitmap appropriately if needed using factor-of-two expansion.
@@ -62,7 +56,7 @@
 
   // Return a mask that will select the specified bit, when applied to the word
   // containing the bit.
-  static idx_t bit_mask(idx_t bit)    { return (idx_t)1 << bit_in_word(bit); }
+  static bm_word_t bit_mask(idx_t bit) { return (bm_word_t)1 << bit_in_word(bit); }
 
   // Return the index of the word containing the specified bit.
   static idx_t word_index(idx_t bit)  { return bit >> LogBitsPerWord; }
@@ -71,66 +65,68 @@
   static idx_t bit_index(idx_t word)  { return word << LogBitsPerWord; }
 
   // Return the array of bitmap words, or a specific word from it.
-  idx_t* map() const           { return _map; }
-  idx_t  map(idx_t word) const { return _map[word]; }
+  bm_word_t* map() const           { return _map; }
+  bm_word_t  map(idx_t word) const { return _map[word]; }
 
   // Return a pointer to the word containing the specified bit.
-  idx_t* word_addr(idx_t bit) const { return map() + word_index(bit); }
+  bm_word_t* word_addr(idx_t bit) const { return map() + word_index(bit); }
 
   // Set a word to a specified value or to all ones; clear a word.
-  void set_word  (idx_t word, idx_t val) { _map[word] = val; }
+  void set_word  (idx_t word, bm_word_t val) { _map[word] = val; }
   void set_word  (idx_t word)            { set_word(word, ~(uintptr_t)0); }
   void clear_word(idx_t word)            { _map[word] = 0; }
 
   // Utilities for ranges of bits.  Ranges are half-open [beg, end).
 
   // Ranges within a single word.
-  inline idx_t inverted_bit_mask_for_range(idx_t beg, idx_t end) const;
-  inline void  set_range_within_word      (idx_t beg, idx_t end);
-  inline void  clear_range_within_word    (idx_t beg, idx_t end);
-  inline void  par_put_range_within_word  (idx_t beg, idx_t end, bool value);
+  bm_word_t inverted_bit_mask_for_range(idx_t beg, idx_t end) const;
+  void  set_range_within_word      (idx_t beg, idx_t end);
+  void  clear_range_within_word    (idx_t beg, idx_t end);
+  void  par_put_range_within_word  (idx_t beg, idx_t end, bool value);
 
   // Ranges spanning entire words.
-  inline void      set_range_of_words         (idx_t beg, idx_t end);
-  inline void      clear_range_of_words       (idx_t beg, idx_t end);
-  inline void      set_large_range_of_words   (idx_t beg, idx_t end);
-  inline void      clear_large_range_of_words (idx_t beg, idx_t end);
+  void      set_range_of_words         (idx_t beg, idx_t end);
+  void      clear_range_of_words       (idx_t beg, idx_t end);
+  void      set_large_range_of_words   (idx_t beg, idx_t end);
+  void      clear_large_range_of_words (idx_t beg, idx_t end);
 
   // The index of the first full word in a range.
-  inline idx_t word_index_round_up(idx_t bit) const;
+  idx_t word_index_round_up(idx_t bit) const;
 
   // Verification, statistics.
-  void verify_index(idx_t index) const {
-    assert(index < _size, "BitMap index out of bounds");
-  }
+  void verify_index(idx_t index) const;
+  void verify_range(idx_t beg_index, idx_t end_index) const;
 
-  void verify_range(idx_t beg_index, idx_t end_index) const {
-#ifdef ASSERT
-    assert(beg_index <= end_index, "BitMap range error");
-    // Note that [0,0) and [size,size) are both valid ranges.
-    if (end_index != _size)  verify_index(end_index);
-#endif
-  }
+  static idx_t* _pop_count_table;
+  static void init_pop_count_table();
+  static idx_t num_set_bits(bm_word_t w);
+  static idx_t num_set_bits_from_table(unsigned char c);
 
  public:
 
   // Constructs a bitmap with no map, and size 0.
   BitMap() : _map(NULL), _size(0) {}
 
-  // Construction
-  BitMap(idx_t* map, idx_t size_in_bits);
+  // Constructs a bitmap with the given map and size.
+  BitMap(bm_word_t* map, idx_t size_in_bits);
 
-  // Allocates necessary data structure in resource area
-  BitMap(idx_t size_in_bits);
+  // Constructs an empty bitmap of the given size (that is, this clears the
+  // new bitmap).  Allocates the map array in resource area if
+  // "in_resource_area" is true, else in the C heap.
+  BitMap(idx_t size_in_bits, bool in_resource_area = true);
 
-  void set_map(idx_t* map)          { _map = map; }
+  // Set the map and size.
+  void set_map(bm_word_t* map)      { _map = map; }
   void set_size(idx_t size_in_bits) { _size = size_in_bits; }
 
-  // Allocates necessary data structure in resource area.
+  // Allocates necessary data structure, either in the resource area
+  // or in the C heap, as indicated by "in_resource_area."
   // Preserves state currently in bit map by copying data.
   // Zeros any newly-addressable bits.
-  // Does not perform any frees (i.e., of current _map).
-  void resize(idx_t size_in_bits);
+  // If "in_resource_area" is false, frees the current map.
+  // (Note that this assumes that all calls to "resize" on the same BitMap
+  // use the same value for "in_resource_area".)
+  void resize(idx_t size_in_bits, bool in_resource_area = true);
 
   // Accessing
   idx_t size() const                    { return _size; }
@@ -157,11 +153,11 @@
 
   // Set or clear the specified bit.
   inline void set_bit(idx_t bit);
-  inline void clear_bit(idx_t bit);
+  void clear_bit(idx_t bit);
 
   // Atomically set or clear the specified bit.
-  inline bool par_set_bit(idx_t bit);
-  inline bool par_clear_bit(idx_t bit);
+  bool par_set_bit(idx_t bit);
+  bool par_clear_bit(idx_t bit);
 
   // Put the given value at the given offset. The parallel version
   // will CAS the value into the bitmap and is quite a bit slower.
@@ -183,23 +179,61 @@
   // Update a range of bits, using a hint about the size.  Currently only
   // inlines the predominant case of a 1-bit range.  Works best when hint is a
   // compile-time constant.
-  inline void set_range(idx_t beg, idx_t end, RangeSizeHint hint);
-  inline void clear_range(idx_t beg, idx_t end, RangeSizeHint hint);
-  inline void par_set_range(idx_t beg, idx_t end, RangeSizeHint hint);
-  inline void par_clear_range  (idx_t beg, idx_t end, RangeSizeHint hint);
+  void set_range(idx_t beg, idx_t end, RangeSizeHint hint);
+  void clear_range(idx_t beg, idx_t end, RangeSizeHint hint);
+  void par_set_range(idx_t beg, idx_t end, RangeSizeHint hint);
+  void par_clear_range  (idx_t beg, idx_t end, RangeSizeHint hint);
+
+  // It performs the union operation between subsets of equal length
+  // of two bitmaps (the target bitmap of the method and the
+  // from_bitmap) and stores the result to the target bitmap.  The
+  // from_start_index represents the first bit index of the subrange
+  // of the from_bitmap.  The to_start_index is the equivalent of the
+  // target bitmap. Both indexes should be word-aligned, i.e. they
+  // should correspond to the first bit on a bitmap word (it's up to
+  // the caller to ensure this; the method does check it).  The length
+  // of the subset is specified with word_num and it is in number of
+  // bitmap words. The caller should ensure that this is at least 2
+  // (smaller ranges are not support to save extra checks).  Again,
+  // this is checked in the method.
+  //
+  // Atomicity concerns: it is assumed that any contention on the
+  // target bitmap with other threads will happen on the first and
+  // last words; the ones in between will be "owned" exclusively by
+  // the calling thread and, in fact, they will already be 0. So, the
+  // method performs a CAS on the first word, copies the next
+  // word_num-2 words, and finally performs a CAS on the last word.
+  void mostly_disjoint_range_union(BitMap* from_bitmap,
+                                   idx_t   from_start_index,
+                                   idx_t   to_start_index,
+                                   size_t  word_num);
+
 
   // Clearing
-  void clear();
   void clear_large();
+  inline void clear();
 
-  // Iteration support
-  void iterate(BitMapClosure* blk, idx_t leftIndex, idx_t rightIndex);
-  inline void iterate(BitMapClosure* blk) {
+  // Iteration support.  Returns "true" if the iteration completed, false
+  // if the iteration terminated early (because the closure "blk" returned
+  // false).
+  bool iterate(BitMapClosure* blk, idx_t leftIndex, idx_t rightIndex);
+  bool iterate(BitMapClosure* blk) {
     // call the version that takes an interval
-    iterate(blk, 0, size());
+    return iterate(blk, 0, size());
   }
 
-  // Looking for 1's and 0's to the "right"
+  // Looking for 1's and 0's at indices equal to or greater than "l_index",
+  // stopping if none has been found before "r_index", and returning
+  // "r_index" (which must be at most "size") in that case.
+  idx_t get_next_one_offset_inline (idx_t l_index, idx_t r_index) const;
+  idx_t get_next_zero_offset_inline(idx_t l_index, idx_t r_index) const;
+
+  // Like "get_next_one_offset_inline", except requires that "r_index" is
+  // aligned to bitsizeof(bm_word_t).
+  idx_t get_next_one_offset_inline_aligned_right(idx_t l_index,
+                                                        idx_t r_index) const;
+
+  // Non-inline versionsof the above.
   idx_t get_next_one_offset (idx_t l_index, idx_t r_index) const;
   idx_t get_next_zero_offset(idx_t l_index, idx_t r_index) const;
 
@@ -210,12 +244,8 @@
     return get_next_zero_offset(offset, size());
   }
 
-
-
-  // Find the next one bit in the range [beg_bit, end_bit), or return end_bit if
-  // no one bit is found.  Equivalent to get_next_one_offset(), but inline for
-  // use in performance-critical code.
-  inline idx_t find_next_one_bit(idx_t beg_bit, idx_t end_bit) const;
+  // Returns the number of bits set in the bitmap.
+  idx_t count_one_bits() const;
 
   // Set operations.
   void set_union(BitMap bits);
@@ -232,6 +262,15 @@
   bool set_difference_with_result(BitMap bits);
   bool set_intersection_with_result(BitMap bits);
 
+  // Requires the submap of "bits" starting at offset to be at least as
+  // large as "this".  Modifies "this" to be the intersection of its
+  // current contents and the submap of "bits" starting at "offset" of the
+  // same length as "this."
+  // (For expedience, currently requires the offset to be aligned to the
+  // bitsize of a uintptr_t.  This should go away in the future though it
+  // will probably remain a good case to optimize.)
+  void set_intersection_at_offset(BitMap bits, idx_t offset);
+
   void set_from(BitMap bits);
 
   bool is_same(BitMap bits);
@@ -248,58 +287,13 @@
 #endif
 };
 
-inline void BitMap::set_bit(idx_t bit) {
-  verify_index(bit);
-  *word_addr(bit) |= bit_mask(bit);
-}
-
-inline void BitMap::clear_bit(idx_t bit) {
-  verify_index(bit);
-  *word_addr(bit) &= ~bit_mask(bit);
-}
-
-inline void BitMap::set_range(idx_t beg, idx_t end, RangeSizeHint hint) {
-  if (hint == small_range && end - beg == 1) {
-    set_bit(beg);
-  } else {
-    if (hint == large_range) {
-      set_large_range(beg, end);
-    } else {
-      set_range(beg, end);
-    }
-  }
-}
-
-inline void BitMap::clear_range(idx_t beg, idx_t end, RangeSizeHint hint) {
-  if (hint == small_range && end - beg == 1) {
-    clear_bit(beg);
-  } else {
-    if (hint == large_range) {
-      clear_large_range(beg, end);
-    } else {
-      clear_range(beg, end);
-    }
-  }
-}
-
-inline void BitMap::par_set_range(idx_t beg, idx_t end, RangeSizeHint hint) {
-  if (hint == small_range && end - beg == 1) {
-    par_at_put(beg, true);
-  } else {
-    if (hint == large_range) {
-      par_at_put_large_range(beg, end, true);
-    } else {
-      par_at_put_range(beg, end, true);
-    }
-  }
-}
-
 
 // Convenience class wrapping BitMap which provides multiple bits per slot.
 class BitMap2D VALUE_OBJ_CLASS_SPEC {
  public:
-  typedef size_t idx_t;         // Type used for bit and word indices.
-
+  typedef BitMap::idx_t idx_t;          // Type used for bit and word indices.
+  typedef BitMap::bm_word_t bm_word_t;  // Element type of array that
+                                        // represents the bitmap.
  private:
   BitMap _map;
   idx_t  _bits_per_slot;
@@ -314,7 +308,7 @@
 
  public:
   // Construction. bits_per_slot must be greater than 0.
-  BitMap2D(uintptr_t* map, idx_t size_in_slots, idx_t bits_per_slot);
+  BitMap2D(bm_word_t* map, idx_t size_in_slots, idx_t bits_per_slot);
 
   // Allocates necessary data structure in resource area. bits_per_slot must be greater than 0.
   BitMap2D(idx_t size_in_slots, idx_t bits_per_slot);
@@ -359,38 +353,14 @@
     _map.at_put_grow(bit_index(slot_index, bit_within_slot_index), value);
   }
 
-  void clear() {
-    _map.clear();
-  }
+  void clear();
 };
 
-
-
-inline void BitMap::set_range_of_words(idx_t beg, idx_t end) {
-  uintptr_t* map = _map;
-  for (idx_t i = beg; i < end; ++i) map[i] = ~(uintptr_t)0;
-}
-
-
-inline void BitMap::clear_range_of_words(idx_t beg, idx_t end) {
-  uintptr_t* map = _map;
-  for (idx_t i = beg; i < end; ++i) map[i] = 0;
-}
-
+// Closure for iterating over BitMaps
 
-inline void BitMap::clear() {
-  clear_range_of_words(0, size_in_words());
-}
-
-
-inline void BitMap::par_clear_range(idx_t beg, idx_t end, RangeSizeHint hint) {
-  if (hint == small_range && end - beg == 1) {
-    par_at_put(beg, false);
-  } else {
-    if (hint == large_range) {
-      par_at_put_large_range(beg, end, false);
-    } else {
-      par_at_put_range(beg, end, false);
-    }
-  }
-}
+class BitMapClosure VALUE_OBJ_CLASS_SPEC {
+ public:
+  // Callback when bit in map is set.  Should normally return "true";
+  // return of false indicates that the bitmap iteration should terminate.
+  virtual bool do_bit(BitMap::idx_t offset) = 0;
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/bitMap.inline.hpp
--- a/src/share/vm/utilities/bitMap.inline.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/utilities/bitMap.inline.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -22,6 +22,17 @@
  *
  */
 
+
+inline void BitMap::set_bit(idx_t bit) {
+  verify_index(bit);
+  *word_addr(bit) |= bit_mask(bit);
+}
+
+inline void BitMap::clear_bit(idx_t bit) {
+  verify_index(bit);
+  *word_addr(bit) &= ~bit_mask(bit);
+}
+
 inline bool BitMap::par_set_bit(idx_t bit) {
   verify_index(bit);
   volatile idx_t* const addr = word_addr(bit);
@@ -64,42 +75,236 @@
   } while (true);
 }
 
-inline BitMap::idx_t
-BitMap::find_next_one_bit(idx_t beg_bit, idx_t end_bit) const
-{
-  verify_range(beg_bit, end_bit);
-  assert(bit_in_word(end_bit) == 0, "end_bit not word-aligned");
+inline void BitMap::set_range(idx_t beg, idx_t end, RangeSizeHint hint) {
+  if (hint == small_range && end - beg == 1) {
+    set_bit(beg);
+  } else {
+    if (hint == large_range) {
+      set_large_range(beg, end);
+    } else {
+      set_range(beg, end);
+    }
+  }
+}
+
+inline void BitMap::clear_range(idx_t beg, idx_t end, RangeSizeHint hint) {
+  if (hint == small_range && end - beg == 1) {
+    clear_bit(beg);
+  } else {
+    if (hint == large_range) {
+      clear_large_range(beg, end);
+    } else {
+      clear_range(beg, end);
+    }
+  }
+}
+
+inline void BitMap::par_set_range(idx_t beg, idx_t end, RangeSizeHint hint) {
+  if (hint == small_range && end - beg == 1) {
+    par_at_put(beg, true);
+  } else {
+    if (hint == large_range) {
+      par_at_put_large_range(beg, end, true);
+    } else {
+      par_at_put_range(beg, end, true);
+    }
+  }
+}
 
-  if (beg_bit == end_bit) {
-    return beg_bit;
-  }
+inline void BitMap::set_range_of_words(idx_t beg, idx_t end) {
+  bm_word_t* map = _map;
+  for (idx_t i = beg; i < end; ++i) map[i] = ~(uintptr_t)0;
+}
+
+
+inline void BitMap::clear_range_of_words(idx_t beg, idx_t end) {
+  bm_word_t* map = _map;
+  for (idx_t i = beg; i < end; ++i) map[i] = 0;
+}
+
+
+inline void BitMap::clear() {
+  clear_range_of_words(0, size_in_words());
+}
+
 
-  idx_t   index = word_index(beg_bit);
-  idx_t r_index = word_index(end_bit);
-  idx_t res_bit = beg_bit;
+inline void BitMap::par_clear_range(idx_t beg, idx_t end, RangeSizeHint hint) {
+  if (hint == small_range && end - beg == 1) {
+    par_at_put(beg, false);
+  } else {
+    if (hint == large_range) {
+      par_at_put_large_range(beg, end, false);
+    } else {
+      par_at_put_range(beg, end, false);
+    }
+  }
+}
+
+inline BitMap::idx_t
+BitMap::get_next_one_offset_inline(idx_t l_offset, idx_t r_offset) const {
+  assert(l_offset <= size(), "BitMap index out of bounds");
+  assert(r_offset <= size(), "BitMap index out of bounds");
+  assert(l_offset <= r_offset, "l_offset > r_offset ?");
+
+  if (l_offset == r_offset) {
+    return l_offset;
+  }
+  idx_t   index = word_index(l_offset);
+  idx_t r_index = word_index(r_offset-1) + 1;
+  idx_t res_offset = l_offset;
 
   // check bits including and to the _left_ of offset's position
-  idx_t res = map(index) >> bit_in_word(res_bit);
-  if (res != (uintptr_t) NoBits) {
+  idx_t pos = bit_in_word(res_offset);
+  idx_t res = map(index) >> pos;
+  if (res != (uintptr_t)NoBits) {
     // find the position of the 1-bit
-    for (; !(res & 1); res_bit++) {
+    for (; !(res & 1); res_offset++) {
       res = res >> 1;
     }
-    assert(res_bit >= beg_bit && res_bit < end_bit, "just checking");
-    return res_bit;
+    assert(res_offset >= l_offset &&
+           res_offset < r_offset, "just checking");
+    return MIN2(res_offset, r_offset);
   }
   // skip over all word length 0-bit runs
   for (index++; index < r_index; index++) {
     res = map(index);
-    if (res != (uintptr_t) NoBits) {
+    if (res != (uintptr_t)NoBits) {
       // found a 1, return the offset
-      for (res_bit = bit_index(index); !(res & 1); res_bit++) {
+      for (res_offset = bit_index(index); !(res & 1); res_offset++) {
         res = res >> 1;
       }
       assert(res & 1, "tautology; see loop condition");
-      assert(res_bit >= beg_bit && res_bit < end_bit, "just checking");
-      return res_bit;
+      assert(res_offset >= l_offset, "just checking");
+      return MIN2(res_offset, r_offset);
+    }
+  }
+  return r_offset;
+}
+
+inline BitMap::idx_t
+BitMap::get_next_zero_offset_inline(idx_t l_offset, idx_t r_offset) const {
+  assert(l_offset <= size(), "BitMap index out of bounds");
+  assert(r_offset <= size(), "BitMap index out of bounds");
+  assert(l_offset <= r_offset, "l_offset > r_offset ?");
+
+  if (l_offset == r_offset) {
+    return l_offset;
+  }
+  idx_t   index = word_index(l_offset);
+  idx_t r_index = word_index(r_offset-1) + 1;
+  idx_t res_offset = l_offset;
+
+  // check bits including and to the _left_ of offset's position
+  idx_t pos = res_offset & (BitsPerWord - 1);
+  idx_t res = (map(index) >> pos) | left_n_bits((int)pos);
+
+  if (res != (uintptr_t)AllBits) {
+    // find the position of the 0-bit
+    for (; res & 1; res_offset++) {
+      res = res >> 1;
+    }
+    assert(res_offset >= l_offset, "just checking");
+    return MIN2(res_offset, r_offset);
+  }
+  // skip over all word length 1-bit runs
+  for (index++; index < r_index; index++) {
+    res = map(index);
+    if (res != (uintptr_t)AllBits) {
+      // found a 0, return the offset
+      for (res_offset = index << LogBitsPerWord; res & 1;
+           res_offset++) {
+        res = res >> 1;
+      }
+      assert(!(res & 1), "tautology; see loop condition");
+      assert(res_offset >= l_offset, "just checking");
+      return MIN2(res_offset, r_offset);
     }
   }
-  return end_bit;
+  return r_offset;
+}
+
+inline BitMap::idx_t
+BitMap::get_next_one_offset_inline_aligned_right(idx_t l_offset,
+                                                 idx_t r_offset) const
+{
+  verify_range(l_offset, r_offset);
+  assert(bit_in_word(r_offset) == 0, "r_offset not word-aligned");
+
+  if (l_offset == r_offset) {
+    return l_offset;
+  }
+  idx_t   index = word_index(l_offset);
+  idx_t r_index = word_index(r_offset);
+  idx_t res_offset = l_offset;
+
+  // check bits including and to the _left_ of offset's position
+  idx_t res = map(index) >> bit_in_word(res_offset);
+  if (res != (uintptr_t)NoBits) {
+    // find the position of the 1-bit
+    for (; !(res & 1); res_offset++) {
+      res = res >> 1;
+    }
+    assert(res_offset >= l_offset &&
+           res_offset < r_offset, "just checking");
+    return res_offset;
+  }
+  // skip over all word length 0-bit runs
+  for (index++; index < r_index; index++) {
+    res = map(index);
+    if (res != (uintptr_t)NoBits) {
+      // found a 1, return the offset
+      for (res_offset = bit_index(index); !(res & 1); res_offset++) {
+        res = res >> 1;
+      }
+      assert(res & 1, "tautology; see loop condition");
+      assert(res_offset >= l_offset && res_offset < r_offset, "just checking");
+      return res_offset;
+    }
+  }
+  return r_offset;
 }
+
+
+// Returns a bit mask for a range of bits [beg, end) within a single word.  Each
+// bit in the mask is 0 if the bit is in the range, 1 if not in the range.  The
+// returned mask can be used directly to clear the range, or inverted to set the
+// range.  Note:  end must not be 0.
+inline BitMap::bm_word_t
+BitMap::inverted_bit_mask_for_range(idx_t beg, idx_t end) const {
+  assert(end != 0, "does not work when end == 0");
+  assert(beg == end || word_index(beg) == word_index(end - 1),
+         "must be a single-word range");
+  bm_word_t mask = bit_mask(beg) - 1;   // low (right) bits
+  if (bit_in_word(end) != 0) {
+    mask |= ~(bit_mask(end) - 1);       // high (left) bits
+  }
+  return mask;
+}
+
+inline void BitMap::set_large_range_of_words(idx_t beg, idx_t end) {
+  memset(_map + beg, ~(unsigned char)0, (end - beg) * sizeof(uintptr_t));
+}
+
+inline void BitMap::clear_large_range_of_words(idx_t beg, idx_t end) {
+  memset(_map + beg, 0, (end - beg) * sizeof(uintptr_t));
+}
+
+inline BitMap::idx_t BitMap::word_index_round_up(idx_t bit) const {
+  idx_t bit_rounded_up = bit + (BitsPerWord - 1);
+  // Check for integer arithmetic overflow.
+  return bit_rounded_up > bit ? word_index(bit_rounded_up) : size_in_words();
+}
+
+inline BitMap::idx_t BitMap::get_next_one_offset(idx_t l_offset,
+                                          idx_t r_offset) const {
+  return get_next_one_offset_inline(l_offset, r_offset);
+}
+
+inline BitMap::idx_t BitMap::get_next_zero_offset(idx_t l_offset,
+                                           idx_t r_offset) const {
+  return get_next_zero_offset_inline(l_offset, r_offset);
+}
+
+inline void BitMap2D::clear() {
+  _map.clear();
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/debug.cpp
--- a/src/share/vm/utilities/debug.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/utilities/debug.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -666,7 +666,7 @@
   oop target;
   void do_oop(oop* o) {
     if (o != NULL && *o == target) {
-      tty->print_cr("0x%08x", o);
+      tty->print_cr(INTPTR_FORMAT, o);
     }
   }
   void do_oop(narrowOop* o) { ShouldNotReachHere(); }
@@ -685,13 +685,13 @@
 
 
 static void findref(intptr_t x) {
-  GenCollectedHeap *gch = GenCollectedHeap::heap();
+  CollectedHeap *ch = Universe::heap();
   LookForRefInGenClosure lookFor;
   lookFor.target = (oop) x;
   LookForRefInObjectClosure look_in_object((oop) x);
 
   tty->print_cr("Searching heap:");
-  gch->object_iterate(&look_in_object);
+  ch->object_iterate(&look_in_object);
 
   tty->print_cr("Searching strong roots:");
   Universe::oops_do(&lookFor, false);
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/intHisto.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/utilities/intHisto.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_intHisto.cpp.incl"
+
+IntHistogram::IntHistogram(int est, int max) : _max(max), _tot(0) {
+  assert(0 <= est && est <= max, "Preconditions");
+  _elements = new (ResourceObj::C_HEAP) GrowableArray<int>(est, true);
+  guarantee(_elements != NULL, "alloc failure");
+}
+
+void IntHistogram::add_entry(int outcome) {
+  if (outcome > _max) outcome = _max;
+  int new_count = _elements->at_grow(outcome) + 1;
+  _elements->at_put(outcome, new_count);
+  _tot++;
+}
+
+int IntHistogram::entries_for_outcome(int outcome) {
+  return _elements->at_grow(outcome);
+}
+
+void IntHistogram::print_on(outputStream* st) const {
+  double tot_d = (double)_tot;
+  st->print_cr("Outcome     # of occurrences   %% of occurrences");
+  st->print_cr("-----------------------------------------------");
+  for (int i=0; i < _elements->length()-2; i++) {
+    int cnt = _elements->at(i);
+    if (cnt != 0) {
+      st->print_cr("%7d        %10d         %8.4f",
+                   i, cnt, (double)cnt/tot_d);
+    }
+  }
+  // Does it have any max entries?
+  if (_elements->length()-1 == _max) {
+    int cnt = _elements->at(_max);
+    st->print_cr(">= %4d        %10d         %8.4f",
+                 _max, cnt, (double)cnt/tot_d);
+  }
+  st->print_cr("-----------------------------------------------");
+  st->print_cr("    All        %10d         %8.4f", _tot, 1.0);
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/intHisto.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/utilities/intHisto.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// This class implements a simple histogram.
+
+// A histogram summarizes a series of "measurements", each of which is
+// assumed (required in this implementation) to have an outcome that is a
+// non-negative integer.  The histogram efficiently maps measurement outcomes
+// to the number of measurements had that outcome.
+
+// To print the results, invoke print() on your Histogram*.
+
+// Note: there is already an existing "Histogram" class, in file
+// histogram.{hpp,cpp}, but to my mind that's not a histogram, it's a table
+// mapping strings to counts.  To be a histogram (IMHO) it needs to map
+// numbers (in fact, integers) to number of occurrences of that number.
+
+// ysr: (i am not sure i agree with the above note.) i suspect we want to have a
+// histogram template that will map an arbitrary type (with a defined order
+// relation) to a count.
+
+
+class IntHistogram : public CHeapObj {
+ protected:
+  int _max;
+  int _tot;
+  GrowableArray<int>* _elements;
+
+public:
+  // Create a new, empty table.  "est" is an estimate of the maximum outcome
+  // that will be added, and "max" is an outcome such that all outcomes at
+  // least that large will be bundled with it.
+  IntHistogram(int est, int max);
+  // Add a measurement with the given outcome to the sequence.
+  void add_entry(int outcome);
+  // Return the number of entries recorded so far with the given outcome.
+  int  entries_for_outcome(int outcome);
+  // Return the total number of entries recorded so far.
+  int  total_entries() { return _tot; }
+  // Return the number of entries recorded so far with the given outcome as
+  // a fraction of the total number recorded so far.
+  double fraction_for_outcome(int outcome) {
+    return
+      (double)entries_for_outcome(outcome)/
+      (double)total_entries();
+  }
+  // Print the histogram on the given output stream.
+  void print_on(outputStream* st) const;
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/numberSeq.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/utilities/numberSeq.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,243 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_numberSeq.cpp.incl"
+
+AbsSeq::AbsSeq(double alpha) :
+  _num(0), _sum(0.0), _sum_of_squares(0.0),
+  _davg(0.0), _dvariance(0.0), _alpha(alpha) {
+}
+
+void AbsSeq::add(double val) {
+  if (_num == 0) {
+    // if the sequence is empty, the davg is the same as the value
+    _davg = val;
+    // and the variance is 0
+    _dvariance = 0.0;
+  } else {
+    // otherwise, calculate both
+    _davg = (1.0 - _alpha) * val + _alpha * _davg;
+    double diff = val - _davg;
+    _dvariance = (1.0 - _alpha) * diff * diff + _alpha * _dvariance;
+  }
+}
+
+double AbsSeq::avg() const {
+  if (_num == 0)
+    return 0.0;
+  else
+    return _sum / total();
+}
+
+double AbsSeq::variance() const {
+  if (_num <= 1)
+    return 0.0;
+
+  double x_bar = avg();
+  double result = _sum_of_squares / total() - x_bar * x_bar;
+  if (result < 0.0) {
+    // due to loss-of-precision errors, the variance might be negative
+    // by a small bit
+
+    //    guarantee(-0.1 < result && result < 0.0,
+    //        "if variance is negative, it should be very small");
+    result = 0.0;
+  }
+  return result;
+}
+
+double AbsSeq::sd() const {
+  double var = variance();
+  guarantee( var >= 0.0, "variance should not be negative" );
+  return sqrt(var);
+}
+
+double AbsSeq::davg() const {
+  return _davg;
+}
+
+double AbsSeq::dvariance() const {
+  if (_num <= 1)
+    return 0.0;
+
+  double result = _dvariance;
+  if (result < 0.0) {
+    // due to loss-of-precision errors, the variance might be negative
+    // by a small bit
+
+    guarantee(-0.1 < result && result < 0.0,
+               "if variance is negative, it should be very small");
+    result = 0.0;
+  }
+  return result;
+}
+
+double AbsSeq::dsd() const {
+  double var = dvariance();
+  guarantee( var >= 0.0, "variance should not be negative" );
+  return sqrt(var);
+}
+
+NumberSeq::NumberSeq(double alpha) :
+  AbsSeq(alpha), _maximum(0.0), _last(0.0) {
+}
+
+bool NumberSeq::check_nums(NumberSeq *total, int n, NumberSeq **parts) {
+  for (int i = 0; i < n; ++i) {
+    if (parts[i] != NULL && total->num() != parts[i]->num())
+      return false;
+  }
+  return true;
+}
+
+NumberSeq::NumberSeq(NumberSeq *total, int n, NumberSeq **parts) {
+  guarantee(check_nums(total, n, parts), "all seq lengths should match");
+  double sum = total->sum();
+  for (int i = 0; i < n; ++i) {
+    if (parts[i] != NULL)
+      sum -= parts[i]->sum();
+  }
+
+  _num = total->num();
+  _sum = sum;
+
+  // we do not calculate these...
+  _sum_of_squares = -1.0;
+  _maximum = -1.0;
+  _davg = -1.0;
+  _dvariance = -1.0;
+}
+
+void NumberSeq::add(double val) {
+  AbsSeq::add(val);
+
+  _last = val;
+  if (_num == 0) {
+    _maximum = val;
+  } else {
+    if (val > _maximum)
+      _maximum = val;
+  }
+  _sum += val;
+  _sum_of_squares += val * val;
+  ++_num;
+}
+
+
+TruncatedSeq::TruncatedSeq(int length, double alpha):
+  AbsSeq(alpha), _length(length), _next(0) {
+  _sequence = NEW_C_HEAP_ARRAY(double, _length);
+  for (int i = 0; i < _length; ++i)
+    _sequence[i] = 0.0;
+}
+
+void TruncatedSeq::add(double val) {
+  AbsSeq::add(val);
+
+  // get the oldest value in the sequence...
+  double old_val = _sequence[_next];
+  // ...remove it from the sum and sum of squares
+  _sum -= old_val;
+  _sum_of_squares -= old_val * old_val;
+
+  // ...and update them with the new value
+  _sum += val;
+  _sum_of_squares += val * val;
+
+  // now replace the old value with the new one
+  _sequence[_next] = val;
+  _next = (_next + 1) % _length;
+
+  // only increase it if the buffer is not full
+  if (_num < _length)
+    ++_num;
+
+  guarantee( variance() > -1.0, "variance should be >= 0" );
+}
+
+// can't easily keep track of this incrementally...
+double TruncatedSeq::maximum() const {
+  if (_num == 0)
+    return 0.0;
+  double ret = _sequence[0];
+  for (int i = 1; i < _num; ++i) {
+    double val = _sequence[i];
+    if (val > ret)
+      ret = val;
+  }
+  return ret;
+}
+
+double TruncatedSeq::last() const {
+  if (_num == 0)
+    return 0.0;
+  unsigned last_index = (_next + _length - 1) % _length;
+  return _sequence[last_index];
+}
+
+double TruncatedSeq::oldest() const {
+  if (_num == 0)
+    return 0.0;
+  else if (_num < _length)
+    // index 0 always oldest value until the array is full
+    return _sequence[0];
+  else {
+    // since the array is full, _next is over the oldest value
+    return _sequence[_next];
+  }
+}
+
+double TruncatedSeq::predict_next() const {
+  if (_num == 0)
+    return 0.0;
+
+  double num           = (double) _num;
+  double x_squared_sum = 0.0;
+  double x_sum         = 0.0;
+  double y_sum         = 0.0;
+  double xy_sum        = 0.0;
+  double x_avg         = 0.0;
+  double y_avg         = 0.0;
+
+  int first = (_next + _length - _num) % _length;
+  for (int i = 0; i < _num; ++i) {
+    double x = (double) i;
+    double y =  _sequence[(first + i) % _length];
+
+    x_squared_sum += x * x;
+    x_sum         += x;
+    y_sum         += y;
+    xy_sum        += x * y;
+  }
+  x_avg = x_sum / num;
+  y_avg = y_sum / num;
+
+  double Sxx = x_squared_sum - x_sum * x_sum / num;
+  double Sxy = xy_sum - x_sum * y_sum / num;
+  double b1 = Sxy / Sxx;
+  double b0 = y_avg - b1 * x_avg;
+
+  return b0 + b1 * num;
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/numberSeq.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/utilities/numberSeq.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/**
+ **  This file contains a few classes that represent number sequence,
+ **  x1, x2, x3, ..., xN, and can calculate their avg, max, and sd.
+ **
+ **  Here's a quick description of the classes:
+ **
+ **    AbsSeq: abstract superclass
+ **    NumberSeq: the sequence is assumed to be very long and the
+ **      maximum, avg, sd, davg, and dsd are calculated over all its elements
+ **    TruncatedSeq: this class keeps track of the last L elements
+ **      of the sequence and calculates avg, max, and sd only over them
+ **/
+
+#define DEFAULT_ALPHA_VALUE 0.7
+
+class AbsSeq {
+private:
+  void init(double alpha);
+
+protected:
+  int    _num; // the number of elements in the sequence
+  double _sum; // the sum of the elements in the sequence
+  double _sum_of_squares; // the sum of squares of the elements in the sequence
+
+  double _davg; // decaying average
+  double _dvariance; // decaying variance
+  double _alpha; // factor for the decaying average / variance
+
+  // This is what we divide with to get the average. In a standard
+  // number sequence, this should just be the number of elements in it.
+  virtual double total() const { return (double) _num; };
+
+public:
+  AbsSeq(double alpha = DEFAULT_ALPHA_VALUE);
+
+  virtual void add(double val); // adds a new element to the sequence
+  void add(unsigned val) { add((double) val); }
+  virtual double maximum() const = 0; // maximum element in the sequence
+  virtual double last() const = 0; // last element added in the sequence
+
+  // the number of elements in the sequence
+  int num() const { return _num; }
+  // the sum of the elements in the sequence
+  double sum() const { return _sum; }
+
+  double avg() const; // the average of the sequence
+  double variance() const; // the variance of the sequence
+  double sd() const; // the standard deviation of the sequence
+
+  double davg() const; // decaying average
+  double dvariance() const; // decaying variance
+  double dsd() const; // decaying "standard deviation"
+};
+
+class NumberSeq: public AbsSeq {
+private:
+  bool check_nums(NumberSeq* total, int n, NumberSeq** parts);
+
+protected:
+  double _last;
+  double _maximum; // keep track of maximum value
+
+public:
+  NumberSeq(double alpha = DEFAULT_ALPHA_VALUE);
+  NumberSeq(NumberSeq* total, int n_parts, NumberSeq** parts);
+
+  virtual void add(double val);
+  virtual double maximum() const { return _maximum; }
+  virtual double last() const { return _last; }
+};
+
+class TruncatedSeq: public AbsSeq {
+private:
+  enum PrivateConstants {
+    DefaultSeqLength = 10
+  };
+  void init();
+protected:
+  double *_sequence; // buffers the last L elements in the sequence
+  int     _length; // this is L
+  int     _next;   // oldest slot in the array, i.e. next to be overwritten
+
+public:
+  // accepts a value for L
+  TruncatedSeq(int length = DefaultSeqLength,
+               double alpha = DEFAULT_ALPHA_VALUE);
+  virtual void add(double val);
+  virtual double maximum() const;
+  virtual double last() const; // the last value added to the sequence
+
+  double oldest() const; // the oldest valid value in the sequence
+  double predict_next() const; // prediction based on linear regression
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/ostream.cpp
--- a/src/share/vm/utilities/ostream.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/utilities/ostream.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -188,6 +188,17 @@
   print_raw(buf);
 }
 
+void outputStream::stamp(bool guard,
+                         const char* prefix,
+                         const char* suffix) {
+  if (!guard) {
+    return;
+  }
+  print_raw(prefix);
+  stamp();
+  print_raw(suffix);
+}
+
 void outputStream::date_stamp(bool guard,
                               const char* prefix,
                               const char* suffix) {
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/ostream.hpp
--- a/src/share/vm/utilities/ostream.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/utilities/ostream.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -86,6 +86,10 @@
    // Time stamp
    TimeStamp& time_stamp() { return _stamp; }
    void stamp();
+   void stamp(bool guard, const char* prefix, const char* suffix);
+   void stamp(bool guard) {
+     stamp(guard, "", ": ");
+   }
    // Date stamp
    void date_stamp(bool guard, const char* prefix, const char* suffix);
    // A simplified call that includes a suffix of ": "
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/taskqueue.cpp
--- a/src/share/vm/utilities/taskqueue.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/utilities/taskqueue.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -65,7 +65,8 @@
   os::sleep(Thread::current(), millis, false);
 }
 
-bool ParallelTaskTerminator::offer_termination() {
+bool
+ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) {
   Atomic::inc(&_offered_termination);
 
   juint yield_count = 0;
@@ -91,7 +92,8 @@
         sleep(WorkStealingSleepMillis);
       }
 
-      if (peek_in_queue_set()) {
+      if (peek_in_queue_set() ||
+          (terminator != NULL && terminator->should_exit_termination())) {
         Atomic::dec(&_offered_termination);
         return false;
       }
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/taskqueue.hpp
--- a/src/share/vm/utilities/taskqueue.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/utilities/taskqueue.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -120,6 +120,11 @@
     return dirty_size(_bottom, get_top());
   }
 
+  void set_empty() {
+    _bottom = 0;
+    _age = Age();
+  }
+
   // Maximum number of elements allowed in the queue.  This is two less
   // than the actual queue size, for somewhat complicated reasons.
   juint max_elems() { return n() - 2; }
@@ -155,6 +160,9 @@
   // Delete any resource associated with the queue.
   ~GenericTaskQueue();
 
+  // apply the closure to all elements in the task queue
+  void oops_do(OopClosure* f);
+
 private:
   // Element array.
   volatile E* _elems;
@@ -172,6 +180,24 @@
 }
 
 template<class E>
+void GenericTaskQueue<E>::oops_do(OopClosure* f) {
+  // tty->print_cr("START OopTaskQueue::oops_do");
+  int iters = size();
+  juint index = _bottom;
+  for (int i = 0; i < iters; ++i) {
+    index = decrement_index(index);
+    // tty->print_cr("  doing entry %d," INTPTR_T " -> " INTPTR_T,
+    //            index, &_elems[index], _elems[index]);
+    E* t = (E*)&_elems[index];      // cast away volatility
+    oop* p = (oop*)t;
+    assert((*t)->is_oop_or_null(), "Not an oop or null");
+    f->do_oop(p);
+  }
+  // tty->print_cr("END OopTaskQueue::oops_do");
+}
+
+
+template<class E>
 bool GenericTaskQueue<E>::push_slow(E t, juint dirty_n_elems) {
   if (dirty_n_elems == n() - 1) {
     // Actually means 0, so do the push.
@@ -383,6 +409,12 @@
   return false;
 }
 
+// When to terminate from the termination protocol.
+class TerminatorTerminator: public CHeapObj {
+public:
+  virtual bool should_exit_termination() = 0;
+};
+
 // A class to aid in the termination of a set of parallel tasks using
 // TaskQueueSet's for work stealing.
 
@@ -407,7 +439,14 @@
   // else is.  If returns "true", all threads are terminated.  If returns
   // "false", available work has been observed in one of the task queues,
   // so the global task is not complete.
-  bool offer_termination();
+  bool offer_termination() {
+    return offer_termination(NULL);
+  }
+
+  // As above, but it also terminates of the should_exit_termination()
+  // method of the terminator parameter returns true. If terminator is
+  // NULL, then it is ignored.
+  bool offer_termination(TerminatorTerminator* terminator);
 
   // Reset the terminator, so that it may be reused again.
   // The caller is responsible for ensuring that this is done
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/workgroup.cpp
--- a/src/share/vm/utilities/workgroup.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/utilities/workgroup.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -28,13 +28,19 @@
 // Definitions of WorkGang methods.
 
 AbstractWorkGang::AbstractWorkGang(const char* name,
-                                   bool  are_GC_threads) :
+                                   bool  are_GC_task_threads,
+                                   bool  are_ConcurrentGC_threads) :
   _name(name),
-  _are_GC_threads(are_GC_threads) {
+  _are_GC_task_threads(are_GC_task_threads),
+  _are_ConcurrentGC_threads(are_ConcurrentGC_threads) {
+
+  assert(!(are_GC_task_threads && are_ConcurrentGC_threads),
+         "They cannot both be STW GC and Concurrent threads" );
+
   // Other initialization.
   _monitor = new Monitor(/* priority */       Mutex::leaf,
                          /* name */           "WorkGroup monitor",
-                         /* allow_vm_block */ are_GC_threads);
+                         /* allow_vm_block */ are_GC_task_threads);
   assert(monitor() != NULL, "Failed to allocate monitor");
   _terminate = false;
   _task = NULL;
@@ -44,16 +50,21 @@
 }
 
 WorkGang::WorkGang(const char* name,
-                   int           workers,
-                   bool          are_GC_threads) :
-  AbstractWorkGang(name, are_GC_threads) {
+                   int         workers,
+                   bool        are_GC_task_threads,
+                   bool        are_ConcurrentGC_threads) :
+  AbstractWorkGang(name, are_GC_task_threads, are_ConcurrentGC_threads)
+{
   // Save arguments.
   _total_workers = workers;
+
   if (TraceWorkGang) {
     tty->print_cr("Constructing work gang %s with %d threads", name, workers);
   }
   _gang_workers = NEW_C_HEAP_ARRAY(GangWorker*, workers);
-  assert(gang_workers() != NULL, "Failed to allocate gang workers");
+  if (gang_workers() == NULL) {
+    vm_exit_out_of_memory(0, "Cannot create GangWorker array.");
+  }
   for (int worker = 0; worker < total_workers(); worker += 1) {
     GangWorker* new_worker = new GangWorker(this, worker);
     assert(new_worker != NULL, "Failed to allocate GangWorker");
@@ -285,7 +296,11 @@
 }
 
 bool GangWorker::is_GC_task_thread() const {
-  return gang()->are_GC_threads();
+  return gang()->are_GC_task_threads();
+}
+
+bool GangWorker::is_ConcurrentGC_thread() const {
+  return gang()->are_ConcurrentGC_threads();
 }
 
 void GangWorker::print_on(outputStream* st) const {
@@ -312,26 +327,43 @@
 
 WorkGangBarrierSync::WorkGangBarrierSync()
   : _monitor(Mutex::safepoint, "work gang barrier sync", true),
-    _n_workers(0), _n_completed(0) {
+    _n_workers(0), _n_completed(0), _should_reset(false) {
 }
 
 WorkGangBarrierSync::WorkGangBarrierSync(int n_workers, const char* name)
   : _monitor(Mutex::safepoint, name, true),
-    _n_workers(n_workers), _n_completed(0) {
+    _n_workers(n_workers), _n_completed(0), _should_reset(false) {
 }
 
 void WorkGangBarrierSync::set_n_workers(int n_workers) {
   _n_workers   = n_workers;
   _n_completed = 0;
+  _should_reset = false;
 }
 
 void WorkGangBarrierSync::enter() {
   MutexLockerEx x(monitor(), Mutex::_no_safepoint_check_flag);
+  if (should_reset()) {
+    // The should_reset() was set and we are the first worker to enter
+    // the sync barrier. We will zero the n_completed() count which
+    // effectively resets the barrier.
+    zero_completed();
+    set_should_reset(false);
+  }
   inc_completed();
   if (n_completed() == n_workers()) {
+    // At this point we would like to reset the barrier to be ready in
+    // case it is used again. However, we cannot set n_completed() to
+    // 0, even after the notify_all(), given that some other workers
+    // might still be waiting for n_completed() to become ==
+    // n_workers(). So, if we set n_completed() to 0, those workers
+    // will get stuck (as they will wake up, see that n_completed() !=
+    // n_workers() and go back to sleep). Instead, we raise the
+    // should_reset() flag and the barrier will be reset the first
+    // time a worker enters it again.
+    set_should_reset(true);
     monitor()->notify_all();
-  }
-  else {
+  } else {
     while (n_completed() != n_workers()) {
       monitor()->wait(/* no_safepoint_check */ true);
     }
@@ -442,3 +474,122 @@
   }
   return false;
 }
+
+bool FreeIdSet::_stat_init = false;
+FreeIdSet* FreeIdSet::_sets[NSets];
+bool FreeIdSet::_safepoint;
+
+FreeIdSet::FreeIdSet(int sz, Monitor* mon) :
+  _sz(sz), _mon(mon), _hd(0), _waiters(0), _index(-1), _claimed(0)
+{
+  _ids = new int[sz];
+  for (int i = 0; i < sz; i++) _ids[i] = i+1;
+  _ids[sz-1] = end_of_list; // end of list.
+  if (_stat_init) {
+    for (int j = 0; j < NSets; j++) _sets[j] = NULL;
+    _stat_init = true;
+  }
+  // Add to sets.  (This should happen while the system is still single-threaded.)
+  for (int j = 0; j < NSets; j++) {
+    if (_sets[j] == NULL) {
+      _sets[j] = this;
+      _index = j;
+      break;
+    }
+  }
+  guarantee(_index != -1, "Too many FreeIdSets in use!");
+}
+
+FreeIdSet::~FreeIdSet() {
+  _sets[_index] = NULL;
+}
+
+void FreeIdSet::set_safepoint(bool b) {
+  _safepoint = b;
+  if (b) {
+    for (int j = 0; j < NSets; j++) {
+      if (_sets[j] != NULL && _sets[j]->_waiters > 0) {
+        Monitor* mon = _sets[j]->_mon;
+        mon->lock_without_safepoint_check();
+        mon->notify_all();
+        mon->unlock();
+      }
+    }
+  }
+}
+
+#define FID_STATS 0
+
+int FreeIdSet::claim_par_id() {
+#if FID_STATS
+  thread_t tslf = thr_self();
+  tty->print("claim_par_id[%d]: sz = %d, claimed = %d\n", tslf, _sz, _claimed);
+#endif
+  MutexLockerEx x(_mon, Mutex::_no_safepoint_check_flag);
+  while (!_safepoint && _hd == end_of_list) {
+    _waiters++;
+#if FID_STATS
+    if (_waiters > 5) {
+      tty->print("claim_par_id waiting[%d]: %d waiters, %d claimed.\n",
+                 tslf, _waiters, _claimed);
+    }
+#endif
+    _mon->wait(Mutex::_no_safepoint_check_flag);
+    _waiters--;
+  }
+  if (_hd == end_of_list) {
+#if FID_STATS
+    tty->print("claim_par_id[%d]: returning EOL.\n", tslf);
+#endif
+    return -1;
+  } else {
+    int res = _hd;
+    _hd = _ids[res];
+    _ids[res] = claimed;  // For debugging.
+    _claimed++;
+#if FID_STATS
+    tty->print("claim_par_id[%d]: returning %d, claimed = %d.\n",
+               tslf, res, _claimed);
+#endif
+    return res;
+  }
+}
+
+bool FreeIdSet::claim_perm_id(int i) {
+  assert(0 <= i && i < _sz, "Out of range.");
+  MutexLockerEx x(_mon, Mutex::_no_safepoint_check_flag);
+  int prev = end_of_list;
+  int cur = _hd;
+  while (cur != end_of_list) {
+    if (cur == i) {
+      if (prev == end_of_list) {
+        _hd = _ids[cur];
+      } else {
+        _ids[prev] = _ids[cur];
+      }
+      _ids[cur] = claimed;
+      _claimed++;
+      return true;
+    } else {
+      prev = cur;
+      cur = _ids[cur];
+    }
+  }
+  return false;
+
+}
+
+void FreeIdSet::release_par_id(int id) {
+  MutexLockerEx x(_mon, Mutex::_no_safepoint_check_flag);
+  assert(_ids[id] == claimed, "Precondition.");
+  _ids[id] = _hd;
+  _hd = id;
+  _claimed--;
+#if FID_STATS
+  tty->print("[%d] release_par_id(%d), waiters =%d,  claimed = %d.\n",
+             thr_self(), id, _waiters, _claimed);
+#endif
+  if (_waiters > 0)
+    // Notify all would be safer, but this is OK, right?
+    _mon->notify_all();
+}
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/workgroup.hpp
--- a/src/share/vm/utilities/workgroup.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/utilities/workgroup.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -72,7 +72,8 @@
   // Here's the public interface to this class.
 public:
   // Constructor and destructor.
-  AbstractWorkGang(const char* name, bool are_GC_threads);
+  AbstractWorkGang(const char* name, bool are_GC_task_threads,
+                   bool are_ConcurrentGC_threads);
   ~AbstractWorkGang();
   // Run a task, returns when the task is done (or terminated).
   virtual void run_task(AbstractGangTask* task) = 0;
@@ -83,7 +84,8 @@
   const char* name() const;
 protected:
   // Initialize only instance data.
-  const bool _are_GC_threads;
+  const bool _are_GC_task_threads;
+  const bool _are_ConcurrentGC_threads;
   // Printing support.
   const char* _name;
   // The monitor which protects these data,
@@ -130,8 +132,11 @@
   int finished_workers() const {
     return _finished_workers;
   }
-  bool are_GC_threads() const {
-    return _are_GC_threads;
+  bool are_GC_task_threads() const {
+    return _are_GC_task_threads;
+  }
+  bool are_ConcurrentGC_threads() const {
+    return _are_ConcurrentGC_threads;
   }
   // Predicates.
   bool is_idle() const {
@@ -190,7 +195,8 @@
 class WorkGang: public AbstractWorkGang {
 public:
   // Constructor
-  WorkGang(const char* name, int workers, bool are_GC_threads);
+  WorkGang(const char* name, int workers,
+           bool are_GC_task_threads, bool are_ConcurrentGC_threads);
   // Run a task, returns when the task is done (or terminated).
   virtual void run_task(AbstractGangTask* task);
 };
@@ -206,6 +212,7 @@
   virtual void run();
   // Predicate for Thread
   virtual bool is_GC_task_thread() const;
+  virtual bool is_ConcurrentGC_thread() const;
   // Printing
   void print_on(outputStream* st) const;
   virtual void print() const { print_on(tty); }
@@ -228,12 +235,17 @@
   Monitor _monitor;
   int     _n_workers;
   int     _n_completed;
+  bool    _should_reset;
 
-  Monitor* monitor()       { return &_monitor; }
-  int      n_workers()     { return _n_workers; }
-  int      n_completed()   { return _n_completed; }
+  Monitor* monitor()        { return &_monitor; }
+  int      n_workers()      { return _n_workers; }
+  int      n_completed()    { return _n_completed; }
+  bool     should_reset()   { return _should_reset; }
 
-  void     inc_completed() { _n_completed++; }
+  void     zero_completed() { _n_completed = 0; }
+  void     inc_completed()  { _n_completed++; }
+
+  void     set_should_reset(bool v) { _should_reset = v; }
 
 public:
   WorkGangBarrierSync();
@@ -343,3 +355,42 @@
   // cleanup if necessary.
   bool all_tasks_completed();
 };
+
+// Represents a set of free small integer ids.
+class FreeIdSet {
+  enum {
+    end_of_list = -1,
+    claimed = -2
+  };
+
+  int _sz;
+  Monitor* _mon;
+
+  int* _ids;
+  int _hd;
+  int _waiters;
+  int _claimed;
+
+  static bool _safepoint;
+  typedef FreeIdSet* FreeIdSetPtr;
+  static const int NSets = 10;
+  static FreeIdSetPtr _sets[NSets];
+  static bool _stat_init;
+  int _index;
+
+public:
+  FreeIdSet(int sz, Monitor* mon);
+  ~FreeIdSet();
+
+  static void set_safepoint(bool b);
+
+  // Attempt to claim the given id permanently.  Returns "true" iff
+  // successful.
+  bool claim_perm_id(int i);
+
+  // Returns an unclaimed parallel id (waiting for one to be released if
+  // necessary).  Returns "-1" if a GC wakes up a wait for an id.
+  int claim_par_id();
+
+  void release_par_id(int id);
+};
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/yieldingWorkgroup.cpp
--- a/src/share/vm/utilities/yieldingWorkgroup.cpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/utilities/yieldingWorkgroup.cpp	Thu Jun 05 15:57:56 2008 -0700
@@ -31,8 +31,8 @@
 class WorkData;
 
 YieldingFlexibleWorkGang::YieldingFlexibleWorkGang(
-  const char* name, int workers, bool are_GC_threads) :
-  AbstractWorkGang(name, are_GC_threads) {
+  const char* name, int workers, bool are_GC_task_threads) :
+  AbstractWorkGang(name, are_GC_task_threads, false) {
   // Save arguments.
   _total_workers = workers;
   assert(_total_workers > 0, "Must have more than 1 worker");
diff -r 0b27f3512f9e -r 37f87013dfd8 src/share/vm/utilities/yieldingWorkgroup.hpp
--- a/src/share/vm/utilities/yieldingWorkgroup.hpp	Wed Jun 04 13:51:09 2008 -0700
+++ b/src/share/vm/utilities/yieldingWorkgroup.hpp	Thu Jun 05 15:57:56 2008 -0700
@@ -143,7 +143,8 @@
   // Here's the public interface to this class.
 public:
   // Constructor and destructor.
-  YieldingFlexibleWorkGang(const char* name, int workers, bool are_GC_threads);
+  YieldingFlexibleWorkGang(const char* name, int workers,
+                           bool are_GC_task_threads);
 
   YieldingFlexibleGangTask* yielding_task() const {
     assert(task() == NULL || task()->is_YieldingFlexibleGang_task(),