changeset 828:b64314863098

Merge
author kvn
date Wed, 01 Jul 2009 15:06:54 -0700
parents 32c83fb84370 (diff) bf3489cc0aa0 (current diff)
children e7d5557ad624 acba6af809c8
files
diffstat 45 files changed, 713 insertions(+), 881 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Wed Jul 01 12:22:23 2009 -0700
+++ b/.hgtags	Wed Jul 01 15:06:54 2009 -0700
@@ -33,3 +33,7 @@
 a3fd9e40ff2e854f6169eb6d09d491a28634d04f jdk7-b56
 f4cbf78110c726919f46b59a3b054c54c7e889b4 jdk7-b57
 53d9bf689e80fcc76b221bbe6c5d58e08b80cbc6 jdk7-b58
+c55be0c7bd32c016c52218eb4c8b5da8a75450b5 jdk7-b59
+a77eddcd510c3972717c025cfcef9a60bfa4ecac jdk7-b60
+27b728fd1281ab62e9d7e4424f8bbb6ca438d803 jdk7-b61
+a88386380bdaaa5ab4ffbedf22c57bac5dbec034 jdk7-b62
--- a/make/README	Wed Jul 01 12:22:23 2009 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-README:
-  This file should be located at the top of the hotspot Mercurial repository.
-
-  See http://openjdk.java.net/ for more information about the OpenJDK.
-
-  See ../README-builds.html for complete details on build machine requirements.
-
-Simple Build Instructions:
-
-    cd make && gnumake
-     
-  The files that will be imported into the jdk build will be in the "build"
-  directory.
-
--- a/make/hotspot_version	Wed Jul 01 12:22:23 2009 -0700
+++ b/make/hotspot_version	Wed Jul 01 15:06:54 2009 -0700
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=16
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=03
+HS_BUILD_NUMBER=05
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=7
--- a/src/cpu/sparc/vm/assembler_sparc.cpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Wed Jul 01 15:06:54 2009 -0700
@@ -4454,43 +4454,26 @@
     delayed()->nop();
   }
 
-  // Now we decide how to generate the card table write.  If we're
-  // enqueueing, we call out to a generated function.  Otherwise, we do it
-  // inline here.
-
-  if (G1RSBarrierUseQueue) {
-    // If the "store_addr" register is an "in" or "local" register, move it to
-    // a scratch reg so we can pass it as an argument.
-    bool use_scr = !(store_addr->is_global() || store_addr->is_out());
-    // Pick a scratch register different from "tmp".
-    Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
-    // Make sure we use up the delay slot!
-    if (use_scr) {
-      post_filter_masm->mov(store_addr, scr);
-    } else {
-      post_filter_masm->nop();
-    }
-    generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
-    save_frame(0);
-    call(dirty_card_log_enqueue);
-    if (use_scr) {
-      delayed()->mov(scr, O0);
-    } else {
-      delayed()->mov(store_addr->after_save(), O0);
-    }
-    restore();
-
+  // If the "store_addr" register is an "in" or "local" register, move it to
+  // a scratch reg so we can pass it as an argument.
+  bool use_scr = !(store_addr->is_global() || store_addr->is_out());
+  // Pick a scratch register different from "tmp".
+  Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
+  // Make sure we use up the delay slot!
+  if (use_scr) {
+    post_filter_masm->mov(store_addr, scr);
   } else {
-
-#ifdef _LP64
-    post_filter_masm->srlx(store_addr, CardTableModRefBS::card_shift, store_addr);
-#else
-    post_filter_masm->srl(store_addr, CardTableModRefBS::card_shift, store_addr);
-#endif
-    assert(tmp != store_addr, "need separate temp reg");
-    set(bs->byte_map_base, tmp);
-    stb(G0, tmp, store_addr);
+    post_filter_masm->nop();
   }
+  generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
+  save_frame(0);
+  call(dirty_card_log_enqueue);
+  if (use_scr) {
+    delayed()->mov(scr, O0);
+  } else {
+    delayed()->mov(store_addr->after_save(), O0);
+  }
+  restore();
 
   bind(filtered);
 
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Jul 01 15:06:54 2009 -0700
@@ -555,6 +555,7 @@
   _collector_policy(cp),
   _should_unload_classes(false),
   _concurrent_cycles_since_last_unload(0),
+  _roots_scanning_options(0),
   _sweep_estimate(CMS_SweepWeight, CMS_SweepPadding)
 {
   if (ExplicitGCInvokesConcurrentAndUnloadsClasses) {
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Wed Jul 01 15:06:54 2009 -0700
@@ -545,6 +545,11 @@
   bool unloaded_classes_last_cycle() const {
     return concurrent_cycles_since_last_unload() == 0;
   }
+  // Root scanning options for perm gen
+  int _roots_scanning_options;
+  int roots_scanning_options() const      { return _roots_scanning_options; }
+  void add_root_scanning_option(int o)    { _roots_scanning_options |= o;   }
+  void remove_root_scanning_option(int o) { _roots_scanning_options &= ~o;  }
 
   // Verification support
   CMSBitMap     _verification_mark_bm;
@@ -719,11 +724,6 @@
   NOT_PRODUCT(bool simulate_overflow();)       // sequential
   NOT_PRODUCT(bool par_simulate_overflow();)   // MT version
 
-  int _roots_scanning_options;
-  int roots_scanning_options() const      { return _roots_scanning_options; }
-  void add_root_scanning_option(int o)    { _roots_scanning_options |= o;   }
-  void remove_root_scanning_option(int o) { _roots_scanning_options &= ~o;  }
-
   // CMS work methods
   void checkpointRootsInitialWork(bool asynch); // initial checkpoint work
 
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Wed Jul 01 15:06:54 2009 -0700
@@ -25,26 +25,37 @@
 #include "incls/_precompiled.incl"
 #include "incls/_concurrentG1Refine.cpp.incl"
 
-bool ConcurrentG1Refine::_enabled = false;
-
 ConcurrentG1Refine::ConcurrentG1Refine() :
-  _pya(PYA_continue), _last_pya(PYA_continue),
-  _last_cards_during(), _first_traversal(false),
   _card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL),
   _hot_cache(NULL),
   _def_use_cache(false), _use_cache(false),
-  _n_periods(0), _total_cards(0), _total_travs(0)
+  _n_periods(0), _total_cards(0), _total_travs(0),
+  _threads(NULL), _n_threads(0)
 {
   if (G1ConcRefine) {
-    _cg1rThread = new ConcurrentG1RefineThread(this);
-    assert(cg1rThread() != NULL, "Conc refine should have been created");
-    assert(cg1rThread()->cg1r() == this,
-           "Conc refine thread should refer to this");
-  } else {
-    _cg1rThread = NULL;
+    _n_threads = (int)thread_num();
+    if (_n_threads > 0) {
+      _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);
+      int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();
+      ConcurrentG1RefineThread *next = NULL;
+      for (int i = _n_threads - 1; i >= 0; i--) {
+        ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);
+        assert(t != NULL, "Conc refine should have been created");
+        assert(t->cg1r() == this, "Conc refine thread should refer to this");
+        _threads[i] = t;
+        next = t;
+      }
+    }
   }
 }
 
+size_t ConcurrentG1Refine::thread_num() {
+  if (G1ConcRefine) {
+    return (G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads;
+  }
+  return 0;
+}
+
 void ConcurrentG1Refine::init() {
   if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
     G1CollectedHeap* g1h = G1CollectedHeap::heap();
@@ -75,6 +86,14 @@
   }
 }
 
+void ConcurrentG1Refine::stop() {
+  if (_threads != NULL) {
+    for (int i = 0; i < _n_threads; i++) {
+      _threads[i]->stop();
+    }
+  }
+}
+
 ConcurrentG1Refine::~ConcurrentG1Refine() {
   if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
     assert(_card_counts != NULL, "Logic");
@@ -88,104 +107,22 @@
     assert(_hot_cache != NULL, "Logic");
     FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
   }
-}
-
-bool ConcurrentG1Refine::refine() {
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  unsigned cards_before = g1h->g1_rem_set()->conc_refine_cards();
-  clear_hot_cache();  // Any previous values in this are now invalid.
-  g1h->g1_rem_set()->concurrentRefinementPass(this);
-  _traversals++;
-  unsigned cards_after = g1h->g1_rem_set()->conc_refine_cards();
-  unsigned cards_during = cards_after-cards_before;
-  // If this is the first traversal in the current enabling
-  // and we did some cards, or if the number of cards found is decreasing
-  // sufficiently quickly, then keep going.  Otherwise, sleep a while.
-  bool res =
-    (_first_traversal && cards_during > 0)
-    ||
-    (!_first_traversal && cards_during * 3 < _last_cards_during * 2);
-  _last_cards_during = cards_during;
-  _first_traversal = false;
-  return res;
-}
-
-void ConcurrentG1Refine::enable() {
-  MutexLocker x(G1ConcRefine_mon);
-  if (!_enabled) {
-    _enabled = true;
-    _first_traversal = true; _last_cards_during = 0;
-    G1ConcRefine_mon->notify_all();
-  }
-}
-
-unsigned ConcurrentG1Refine::disable() {
-  MutexLocker x(G1ConcRefine_mon);
-  if (_enabled) {
-    _enabled = false;
-    return _traversals;
-  } else {
-    return 0;
+  if (_threads != NULL) {
+    for (int i = 0; i < _n_threads; i++) {
+      delete _threads[i];
+    }
+    FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads);
   }
 }
 
-void ConcurrentG1Refine::wait_for_ConcurrentG1Refine_enabled() {
-  G1ConcRefine_mon->lock();
-  while (!_enabled) {
-    G1ConcRefine_mon->wait(Mutex::_no_safepoint_check_flag);
-  }
-  G1ConcRefine_mon->unlock();
-  _traversals = 0;
-};
-
-void ConcurrentG1Refine::set_pya_restart() {
-  // If we're using the log-based RS barrier, the above will cause
-  // in-progress traversals of completed log buffers to quit early; we will
-  // also abandon all other buffers.
-  if (G1RSBarrierUseQueue) {
-    DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
-    dcqs.abandon_logs();
-    // Reset the post-yield actions.
-    _pya = PYA_continue;
-    _last_pya = PYA_continue;
-  } else {
-    _pya = PYA_restart;
+void ConcurrentG1Refine::threads_do(ThreadClosure *tc) {
+  if (_threads != NULL) {
+    for (int i = 0; i < _n_threads; i++) {
+      tc->do_thread(_threads[i]);
+    }
   }
 }
 
-void ConcurrentG1Refine::set_pya_cancel() {
-  _pya = PYA_cancel;
-}
-
-PostYieldAction ConcurrentG1Refine::get_pya() {
-  if (_pya != PYA_continue) {
-    jint val = _pya;
-    while (true) {
-      jint val_read = Atomic::cmpxchg(PYA_continue, &_pya, val);
-      if (val_read == val) {
-        PostYieldAction res = (PostYieldAction)val;
-        assert(res != PYA_continue, "Only the refine thread should reset.");
-        _last_pya = res;
-        return res;
-      } else {
-        val = val_read;
-      }
-    }
-  }
-  // QQQ WELL WHAT DO WE RETURN HERE???
-  // make up something!
-  return PYA_continue;
-}
-
-PostYieldAction ConcurrentG1Refine::get_last_pya() {
-  PostYieldAction res = _last_pya;
-  _last_pya = PYA_continue;
-  return res;
-}
-
-bool ConcurrentG1Refine::do_traversal() {
-  return _cg1rThread->do_traversal();
-}
 
 int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) {
   size_t card_num = (card_ptr - _ct_bot);
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Wed Jul 01 15:06:54 2009 -0700
@@ -26,26 +26,9 @@
 class ConcurrentG1RefineThread;
 class G1RemSet;
 
-// What to do after a yield:
-enum PostYieldAction {
-  PYA_continue,  // Continue the traversal
-  PYA_restart,   // Restart
-  PYA_cancel     // It's been completed by somebody else: cancel.
-};
-
 class ConcurrentG1Refine: public CHeapObj {
-  ConcurrentG1RefineThread* _cg1rThread;
-
-  volatile jint _pya;
-  PostYieldAction _last_pya;
-
-  static bool _enabled;  // Protected by G1ConcRefine_mon.
-  unsigned _traversals;
-
-  // Number of cards processed during last refinement traversal.
-  unsigned _first_traversal;
-  unsigned _last_cards_during;
-
+  ConcurrentG1RefineThread** _threads;
+  int _n_threads;
   // The cache for card refinement.
   bool     _use_cache;
   bool     _def_use_cache;
@@ -74,37 +57,10 @@
   ~ConcurrentG1Refine();
 
   void init(); // Accomplish some initialization that has to wait.
-
-  // Enabled Conc refinement, waking up thread if necessary.
-  void enable();
-
-  // Returns the number of traversals performed since this refiner was enabled.
-  unsigned disable();
-
-  // Requires G1ConcRefine_mon to be held.
-  bool enabled() { return _enabled; }
-
-  // Returns only when G1 concurrent refinement has been enabled.
-  void wait_for_ConcurrentG1Refine_enabled();
+  void stop();
 
-  // Do one concurrent refinement pass over the card table.  Returns "true"
-  // if heuristics determine that another pass should be done immediately.
-  bool refine();
-
-  // Indicate that an in-progress refinement pass should start over.
-  void set_pya_restart();
-  // Indicate that an in-progress refinement pass should quit.
-  void set_pya_cancel();
-
-  // Get the appropriate post-yield action.  Also sets last_pya.
-  PostYieldAction get_pya();
-
-  // The last PYA read by "get_pya".
-  PostYieldAction get_last_pya();
-
-  bool do_traversal();
-
-  ConcurrentG1RefineThread* cg1rThread() { return _cg1rThread; }
+  // Iterate over the conc refine threads
+  void threads_do(ThreadClosure *tc);
 
   // If this is the first entry for the slot, writes into the cache and
   // returns NULL.  If it causes an eviction, returns the evicted pointer.
@@ -129,4 +85,6 @@
 
   void clear_and_record_card_counts();
   void print_final_card_counts();
+
+  static size_t thread_num();
 };
--- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp	Wed Jul 01 15:06:54 2009 -0700
@@ -30,12 +30,14 @@
 // The CM thread is created when the G1 garbage collector is used
 
 ConcurrentG1RefineThread::
-ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r) :
+ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread *next,
+                         int worker_id_offset, int worker_id) :
   ConcurrentGCThread(),
+  _worker_id_offset(worker_id_offset),
+  _worker_id(worker_id),
+  _active(false),
+  _next(next),
   _cg1r(cg1r),
-  _started(false),
-  _in_progress(false),
-  _do_traversal(false),
   _vtime_accum(0.0),
   _co_tracker(G1CRGroup),
   _interval_ms(5.0)
@@ -43,112 +45,6 @@
   create_and_start();
 }
 
-const long timeout = 200; // ms.
-
-void ConcurrentG1RefineThread::traversalBasedRefinement() {
-  _cg1r->wait_for_ConcurrentG1Refine_enabled();
-  MutexLocker x(G1ConcRefine_mon);
-  while (_cg1r->enabled()) {
-    MutexUnlocker ux(G1ConcRefine_mon);
-    ResourceMark rm;
-    HandleMark   hm;
-
-    if (G1TraceConcurrentRefinement) {
-      gclog_or_tty->print_cr("G1-Refine starting pass");
-    }
-    _sts.join();
-    bool no_sleep = _cg1r->refine();
-    _sts.leave();
-    if (!no_sleep) {
-      MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
-      // We do this only for the timeout; we don't expect this to be signalled.
-      CGC_lock->wait(Mutex::_no_safepoint_check_flag, timeout);
-    }
-  }
-}
-
-void ConcurrentG1RefineThread::queueBasedRefinement() {
-  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
-  // Wait for completed log buffers to exist.
-  {
-    MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
-    while (!_do_traversal && !dcqs.process_completed_buffers() &&
-           !_should_terminate) {
-      DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag);
-    }
-  }
-
-  if (_should_terminate) {
-    return;
-  }
-
-  // Now we take them off (this doesn't hold locks while it applies
-  // closures.)  (If we did a full collection, then we'll do a full
-  // traversal.
-  _sts.join();
-  if (_do_traversal) {
-    (void)_cg1r->refine();
-    switch (_cg1r->get_last_pya()) {
-    case PYA_cancel: case PYA_continue:
-      // Continue was caught and handled inside "refine".  If it's still
-      // "continue" when we get here, we're done.
-      _do_traversal = false;
-      break;
-    case PYA_restart:
-      assert(_do_traversal, "Because of Full GC.");
-      break;
-    }
-  } else {
-    int n_logs = 0;
-    int lower_limit = 0;
-    double start_vtime_sec; // only used when G1SmoothConcRefine is on
-    int prev_buffer_num; // only used when G1SmoothConcRefine is on
-
-    if (G1SmoothConcRefine) {
-      lower_limit = 0;
-      start_vtime_sec = os::elapsedVTime();
-      prev_buffer_num = (int) dcqs.completed_buffers_num();
-    } else {
-      lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now.
-    }
-    while (dcqs.apply_closure_to_completed_buffer(0, lower_limit)) {
-      double end_vtime_sec;
-      double elapsed_vtime_sec;
-      int elapsed_vtime_ms;
-      int curr_buffer_num;
-
-      if (G1SmoothConcRefine) {
-        end_vtime_sec = os::elapsedVTime();
-        elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
-        elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0);
-        curr_buffer_num = (int) dcqs.completed_buffers_num();
-
-        if (curr_buffer_num > prev_buffer_num ||
-            curr_buffer_num > DCQBarrierProcessCompletedThreshold) {
-          decreaseInterval(elapsed_vtime_ms);
-        } else if (curr_buffer_num < prev_buffer_num) {
-          increaseInterval(elapsed_vtime_ms);
-        }
-      }
-
-      sample_young_list_rs_lengths();
-      _co_tracker.update(false);
-
-      if (G1SmoothConcRefine) {
-        prev_buffer_num = curr_buffer_num;
-        _sts.leave();
-        os::sleep(Thread::current(), (jlong) _interval_ms, false);
-        _sts.join();
-        start_vtime_sec = os::elapsedVTime();
-      }
-      n_logs++;
-    }
-    // Make sure we harvest the PYA, if any.
-    (void)_cg1r->get_pya();
-  }
-  _sts.leave();
-}
-
 void ConcurrentG1RefineThread::sample_young_list_rs_lengths() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   G1CollectorPolicy* g1p = g1h->g1_policy();
@@ -184,15 +80,97 @@
   _co_tracker.start();
 
   while (!_should_terminate) {
-    // wait until started is set.
-    if (G1RSBarrierUseQueue) {
-      queueBasedRefinement();
+    DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+    // Wait for completed log buffers to exist.
+    {
+      MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
+      while (((_worker_id == 0 && !dcqs.process_completed_buffers()) ||
+              (_worker_id > 0 && !is_active())) &&
+             !_should_terminate) {
+         DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag);
+      }
+    }
+
+    if (_should_terminate) {
+      return;
+    }
+
+    // Now we take them off (this doesn't hold locks while it applies
+    // closures.)  (If we did a full collection, then we'll do a full
+    // traversal.
+    _sts.join();
+    int n_logs = 0;
+    int lower_limit = 0;
+    double start_vtime_sec; // only used when G1SmoothConcRefine is on
+    int prev_buffer_num; // only used when G1SmoothConcRefine is on
+    // This thread activation threshold
+    int threshold = DCQBarrierProcessCompletedThreshold * _worker_id;
+    // Next thread activation threshold
+    int next_threshold = threshold + DCQBarrierProcessCompletedThreshold;
+    int deactivation_threshold = MAX2<int>(threshold - DCQBarrierProcessCompletedThreshold / 2, 0);
+
+    if (G1SmoothConcRefine) {
+      lower_limit = 0;
+      start_vtime_sec = os::elapsedVTime();
+      prev_buffer_num = (int) dcqs.completed_buffers_num();
     } else {
-      traversalBasedRefinement();
+      lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now.
     }
-    _sts.join();
-    _co_tracker.update();
+    while (dcqs.apply_closure_to_completed_buffer(_worker_id + _worker_id_offset, lower_limit)) {
+      double end_vtime_sec;
+      double elapsed_vtime_sec;
+      int elapsed_vtime_ms;
+      int curr_buffer_num = (int) dcqs.completed_buffers_num();
+
+      if (G1SmoothConcRefine) {
+        end_vtime_sec = os::elapsedVTime();
+        elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
+        elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0);
+
+        if (curr_buffer_num > prev_buffer_num ||
+            curr_buffer_num > next_threshold) {
+          decreaseInterval(elapsed_vtime_ms);
+        } else if (curr_buffer_num < prev_buffer_num) {
+          increaseInterval(elapsed_vtime_ms);
+        }
+      }
+      if (_worker_id == 0) {
+        sample_young_list_rs_lengths();
+      } else if (curr_buffer_num < deactivation_threshold) {
+        // If the number of the buffer has fallen below our threshold
+        // we should deactivate. The predecessor will reactivate this
+        // thread should the number of the buffers cross the threshold again.
+        MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
+        deactivate();
+        if (G1TraceConcurrentRefinement) {
+          gclog_or_tty->print_cr("G1-Refine-deactivated worker %d", _worker_id);
+        }
+        break;
+      }
+      _co_tracker.update(false);
+
+      // Check if we need to activate the next thread.
+      if (curr_buffer_num > next_threshold && _next != NULL && !_next->is_active()) {
+        MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
+        _next->activate();
+        DirtyCardQ_CBL_mon->notify_all();
+        if (G1TraceConcurrentRefinement) {
+          gclog_or_tty->print_cr("G1-Refine-activated worker %d", _next->_worker_id);
+        }
+      }
+
+      if (G1SmoothConcRefine) {
+        prev_buffer_num = curr_buffer_num;
+        _sts.leave();
+        os::sleep(Thread::current(), (jlong) _interval_ms, false);
+        _sts.join();
+        start_vtime_sec = os::elapsedVTime();
+      }
+      n_logs++;
+    }
+    _co_tracker.update(false);
     _sts.leave();
+
     if (os::supports_vtime()) {
       _vtime_accum = (os::elapsedVTime() - _vtime_start);
     } else {
@@ -240,7 +218,3 @@
   Thread::print();
   gclog_or_tty->cr();
 }
-
-void ConcurrentG1RefineThread::set_do_traversal(bool b) {
-  _do_traversal = b;
-}
--- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp	Wed Jul 01 15:06:54 2009 -0700
@@ -33,21 +33,27 @@
 
   double _vtime_start;  // Initial virtual time.
   double _vtime_accum;  // Initial virtual time.
+  int _worker_id;
+  int _worker_id_offset;
 
+  // The refinement threads collection is linked list. A predecessor can activate a successor
+  // when the number of the rset update buffer crosses a certain threshold. A successor
+  // would self-deactivate when the number of the buffers falls below the threshold.
+  bool _active;
+  ConcurrentG1RefineThread *       _next;
  public:
   virtual void run();
 
+  bool is_active()  { return _active;  }
+  void activate()   { _active = true;  }
+  void deactivate() { _active = false; }
+
  private:
   ConcurrentG1Refine*              _cg1r;
-  bool                             _started;
-  bool                             _in_progress;
-  volatile bool                    _restart;
 
   COTracker                        _co_tracker;
   double                           _interval_ms;
 
-  bool                             _do_traversal;
-
   void decreaseInterval(int processing_time_ms) {
     double min_interval_ms = (double) processing_time_ms;
     _interval_ms = 0.8 * _interval_ms;
@@ -63,16 +69,13 @@
 
   void sleepBeforeNextCycle();
 
-  void traversalBasedRefinement();
-
-  void queueBasedRefinement();
-
   // For use by G1CollectedHeap, which is a friend.
   static SuspendibleThreadSet* sts() { return &_sts; }
 
  public:
   // Constructor
-  ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r);
+  ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread* next,
+                           int worker_id_offset, int worker_id);
 
   // Printing
   void print();
@@ -82,23 +85,11 @@
 
   ConcurrentG1Refine* cg1r()                     { return _cg1r;     }
 
-
-  void            set_started()                  { _started = true;   }
-  void            clear_started()                { _started = false;  }
-  bool            started()                      { return _started;   }
-
-  void            set_in_progress()              { _in_progress = true;   }
-  void            clear_in_progress()            { _in_progress = false;  }
-  bool            in_progress()                  { return _in_progress;   }
-
-  void            set_do_traversal(bool b);
-  bool            do_traversal() { return _do_traversal; }
-
   void            sample_young_list_rs_lengths();
 
   // Yield for GC
   void            yield();
 
   // shutdown
-  static void stop();
+  void stop();
 };
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed Jul 01 15:06:54 2009 -0700
@@ -1157,6 +1157,13 @@
   } else {
     // We're done with marking.
     JavaThread::satb_mark_queue_set().set_active_all_threads(false);
+
+    if (VerifyDuringGC) {
+      g1h->prepare_for_verify();
+      g1h->verify(/* allow_dirty */      true,
+                  /* silent */           false,
+                  /* use_prev_marking */ false);
+    }
   }
 
 #if VERIFY_OBJS_PROCESSED
@@ -1747,12 +1754,12 @@
   // races with it goes around and waits for completeCleanup to finish.
   g1h->increment_total_collections();
 
-#ifndef PRODUCT
   if (VerifyDuringGC) {
-    G1CollectedHeap::heap()->prepare_for_verify();
-    G1CollectedHeap::heap()->verify(true,false);
+    g1h->prepare_for_verify();
+    g1h->verify(/* allow_dirty */      true,
+                /* silent */           false,
+                /* use_prev_marking */ true);
   }
-#endif
 }
 
 void ConcurrentMark::completeCleanup() {
--- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp	Wed Jul 01 15:06:54 2009 -0700
@@ -80,5 +80,5 @@
   void            yield();
 
   // shutdown
-  static void stop();
+  void stop();
 };
--- a/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp	Wed Jul 01 15:06:54 2009 -0700
@@ -73,7 +73,7 @@
   // while holding the ZF_needed_mon lock.
 
   // shutdown
-  static void stop();
+  void stop();
 
   // Stats
   static void note_region_alloc() {_region_allocs++; }
--- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp	Wed Jul 01 15:06:54 2009 -0700
@@ -71,11 +71,11 @@
   _all_active = true;
 }
 
+// Determines how many mutator threads can process the buffers in parallel.
 size_t DirtyCardQueueSet::num_par_ids() {
-  return MAX2(ParallelGCThreads, (size_t)2);
+  return os::processor_count();
 }
 
-
 void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
                                    int max_completed_queue,
                                    Mutex* lock, PtrQueueSet* fl_owner) {
@@ -85,8 +85,6 @@
 
   _shared_dirty_card_queue.set_lock(lock);
   _free_ids = new FreeIdSet((int) num_par_ids(), _cbl_mon);
-  bool b = _free_ids->claim_perm_id(0);
-  guarantee(b, "Must reserve id zero for concurrent refinement thread.");
 }
 
 void DirtyCardQueueSet::handle_zero_index_for_thread(JavaThread* t) {
@@ -234,7 +232,7 @@
     nd = get_completed_buffer_lock(stop_at);
   }
   bool res = apply_closure_to_completed_buffer_helper(worker_i, nd);
-  if (res) _processed_buffers_rs_thread++;
+  if (res) Atomic::inc(&_processed_buffers_rs_thread);
   return res;
 }
 
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Jul 01 15:06:54 2009 -0700
@@ -446,8 +446,61 @@
   gclog_or_tty->print_cr("");
 }
 
+void G1CollectedHeap::push_dirty_cards_region(HeapRegion* hr)
+{
+  // Claim the right to put the region on the dirty cards region list
+  // by installing a self pointer.
+  HeapRegion* next = hr->get_next_dirty_cards_region();
+  if (next == NULL) {
+    HeapRegion* res = (HeapRegion*)
+      Atomic::cmpxchg_ptr(hr, hr->next_dirty_cards_region_addr(),
+                          NULL);
+    if (res == NULL) {
+      HeapRegion* head;
+      do {
+        // Put the region to the dirty cards region list.
+        head = _dirty_cards_region_list;
+        next = (HeapRegion*)
+          Atomic::cmpxchg_ptr(hr, &_dirty_cards_region_list, head);
+        if (next == head) {
+          assert(hr->get_next_dirty_cards_region() == hr,
+                 "hr->get_next_dirty_cards_region() != hr");
+          if (next == NULL) {
+            // The last region in the list points to itself.
+            hr->set_next_dirty_cards_region(hr);
+          } else {
+            hr->set_next_dirty_cards_region(next);
+          }
+        }
+      } while (next != head);
+    }
+  }
+}
+
+HeapRegion* G1CollectedHeap::pop_dirty_cards_region()
+{
+  HeapRegion* head;
+  HeapRegion* hr;
+  do {
+    head = _dirty_cards_region_list;
+    if (head == NULL) {
+      return NULL;
+    }
+    HeapRegion* new_head = head->get_next_dirty_cards_region();
+    if (head == new_head) {
+      // The last region.
+      new_head = NULL;
+    }
+    hr = (HeapRegion*)Atomic::cmpxchg_ptr(new_head, &_dirty_cards_region_list,
+                                          head);
+  } while (hr != head);
+  assert(hr != NULL, "invariant");
+  hr->set_next_dirty_cards_region(NULL);
+  return hr;
+}
+
 void G1CollectedHeap::stop_conc_gc_threads() {
-  _cg1r->cg1rThread()->stop();
+  _cg1r->stop();
   _czft->stop();
   _cmThread->stop();
 }
@@ -1001,12 +1054,8 @@
 
     gc_epilogue(true);
 
-    // Abandon concurrent refinement.  This must happen last: in the
-    // dirty-card logging system, some cards may be dirty by weak-ref
-    // processing, and may be enqueued.  But the whole card table is
-    // dirtied, so this should abandon those logs, and set "do_traversal"
-    // to true.
-    concurrent_g1_refine()->set_pya_restart();
+    // Discard all rset updates
+    JavaThread::dirty_card_queue_set().abandon_logs();
     assert(!G1DeferredRSUpdate
            || (G1DeferredRSUpdate && (dirty_card_queue_set().completed_buffers_num() == 0)), "Should not be any");
     assert(regions_accounted_for(), "Region leakage!");
@@ -1333,7 +1382,8 @@
   _gc_time_stamp(0),
   _surviving_young_words(NULL),
   _in_cset_fast_test(NULL),
-  _in_cset_fast_test_base(NULL) {
+  _in_cset_fast_test_base(NULL),
+  _dirty_cards_region_list(NULL) {
   _g1h = this; // To catch bugs.
   if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) {
     vm_exit_during_initialization("Failed necessary allocation.");
@@ -1485,6 +1535,15 @@
   guarantee(_hrs != NULL, "Couldn't allocate HeapRegionSeq");
   guarantee(_cur_alloc_region == NULL, "from constructor");
 
+  // 6843694 - ensure that the maximum region index can fit
+  // in the remembered set structures.
+  const size_t max_region_idx = ((size_t)1 << (sizeof(RegionIdx_t)*BitsPerByte-1)) - 1;
+  guarantee((max_regions() - 1) <= max_region_idx, "too many regions");
+
+  const size_t cards_per_region = HeapRegion::GrainBytes >> CardTableModRefBS::card_shift;
+  size_t max_cards_per_region = ((size_t)1 << (sizeof(CardIdx_t)*BitsPerByte-1)) - 1;
+  guarantee(cards_per_region < max_cards_per_region, "too many cards per region");
+
   _bot_shared = new G1BlockOffsetSharedArray(_reserved,
                                              heap_word_size(init_byte_size));
 
@@ -1521,12 +1580,12 @@
                                                SATB_Q_FL_lock,
                                                0,
                                                Shared_SATB_Q_lock);
-  if (G1RSBarrierUseQueue) {
-    JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
-                                                  DirtyCardQ_FL_lock,
-                                                  G1DirtyCardQueueMax,
-                                                  Shared_DirtyCardQ_lock);
-  }
+
+  JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
+                                                DirtyCardQ_FL_lock,
+                                                G1DirtyCardQueueMax,
+                                                Shared_DirtyCardQ_lock);
+
   if (G1DeferredRSUpdate) {
     dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
                                       DirtyCardQ_FL_lock,
@@ -2077,17 +2136,22 @@
 };
 
 class VerifyObjsInRegionClosure: public ObjectClosure {
+private:
   G1CollectedHeap* _g1h;
   size_t _live_bytes;
   HeapRegion *_hr;
+  bool _use_prev_marking;
 public:
-  VerifyObjsInRegionClosure(HeapRegion *hr) : _live_bytes(0), _hr(hr) {
+  // use_prev_marking == true  -> use "prev" marking information,
+  // use_prev_marking == false -> use "next" marking information
+  VerifyObjsInRegionClosure(HeapRegion *hr, bool use_prev_marking)
+    : _live_bytes(0), _hr(hr), _use_prev_marking(use_prev_marking) {
     _g1h = G1CollectedHeap::heap();
   }
   void do_object(oop o) {
     VerifyLivenessOopClosure isLive(_g1h);
     assert(o != NULL, "Huh?");
-    if (!_g1h->is_obj_dead(o)) {
+    if (!_g1h->is_obj_dead_cond(o, _use_prev_marking)) {
       o->oop_iterate(&isLive);
       if (!_hr->obj_allocated_since_prev_marking(o))
         _live_bytes += (o->size() * HeapWordSize);
@@ -2126,17 +2190,22 @@
 };
 
 class VerifyRegionClosure: public HeapRegionClosure {
-public:
+private:
   bool _allow_dirty;
   bool _par;
-  VerifyRegionClosure(bool allow_dirty, bool par = false)
-    : _allow_dirty(allow_dirty), _par(par) {}
+  bool _use_prev_marking;
+public:
+  // use_prev_marking == true  -> use "prev" marking information,
+  // use_prev_marking == false -> use "next" marking information
+  VerifyRegionClosure(bool allow_dirty, bool par, bool use_prev_marking)
+    : _allow_dirty(allow_dirty), _par(par),
+      _use_prev_marking(use_prev_marking) {}
   bool doHeapRegion(HeapRegion* r) {
     guarantee(_par || r->claim_value() == HeapRegion::InitialClaimValue,
               "Should be unclaimed at verify points.");
     if (!r->continuesHumongous()) {
-      VerifyObjsInRegionClosure not_dead_yet_cl(r);
-      r->verify(_allow_dirty);
+      VerifyObjsInRegionClosure not_dead_yet_cl(r, _use_prev_marking);
+      r->verify(_allow_dirty, _use_prev_marking);
       r->object_iterate(&not_dead_yet_cl);
       guarantee(r->max_live_bytes() >= not_dead_yet_cl.live_bytes(),
                 "More live objects than counted in last complete marking.");
@@ -2149,10 +2218,13 @@
 private:
   G1CollectedHeap* _g1h;
   bool             _failures;
-
+  bool             _use_prev_marking;
 public:
-  VerifyRootsClosure() :
-    _g1h(G1CollectedHeap::heap()), _failures(false) { }
+  // use_prev_marking == true  -> use "prev" marking information,
+  // use_prev_marking == false -> use "next" marking information
+  VerifyRootsClosure(bool use_prev_marking) :
+    _g1h(G1CollectedHeap::heap()), _failures(false),
+    _use_prev_marking(use_prev_marking) { }
 
   bool failures() { return _failures; }
 
@@ -2163,7 +2235,7 @@
   void do_oop(oop* p) {
     oop obj = *p;
     if (obj != NULL) {
-      if (_g1h->is_obj_dead(obj)) {
+      if (_g1h->is_obj_dead_cond(obj, _use_prev_marking)) {
         gclog_or_tty->print_cr("Root location "PTR_FORMAT" "
                                "points to dead obj "PTR_FORMAT, p, (void*) obj);
         obj->print_on(gclog_or_tty);
@@ -2179,24 +2251,35 @@
 private:
   G1CollectedHeap* _g1h;
   bool _allow_dirty;
+  bool _use_prev_marking;
 
 public:
-  G1ParVerifyTask(G1CollectedHeap* g1h, bool allow_dirty) :
+  // use_prev_marking == true  -> use "prev" marking information,
+  // use_prev_marking == false -> use "next" marking information
+  G1ParVerifyTask(G1CollectedHeap* g1h, bool allow_dirty,
+                  bool use_prev_marking) :
     AbstractGangTask("Parallel verify task"),
-    _g1h(g1h), _allow_dirty(allow_dirty) { }
+    _g1h(g1h), _allow_dirty(allow_dirty),
+    _use_prev_marking(use_prev_marking) { }
 
   void work(int worker_i) {
     HandleMark hm;
-    VerifyRegionClosure blk(_allow_dirty, true);
+    VerifyRegionClosure blk(_allow_dirty, true, _use_prev_marking);
     _g1h->heap_region_par_iterate_chunked(&blk, worker_i,
                                           HeapRegion::ParVerifyClaimValue);
   }
 };
 
 void G1CollectedHeap::verify(bool allow_dirty, bool silent) {
+  verify(allow_dirty, silent, /* use_prev_marking */ true);
+}
+
+void G1CollectedHeap::verify(bool allow_dirty,
+                             bool silent,
+                             bool use_prev_marking) {
   if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) {
     if (!silent) { gclog_or_tty->print("roots "); }
-    VerifyRootsClosure rootsCl;
+    VerifyRootsClosure rootsCl(use_prev_marking);
     process_strong_roots(false,
                          SharedHeap::SO_AllClasses,
                          &rootsCl,
@@ -2207,7 +2290,7 @@
       assert(check_heap_region_claim_values(HeapRegion::InitialClaimValue),
              "sanity check");
 
-      G1ParVerifyTask task(this, allow_dirty);
+      G1ParVerifyTask task(this, allow_dirty, use_prev_marking);
       int n_workers = workers()->total_workers();
       set_par_threads(n_workers);
       workers()->run_task(&task);
@@ -2221,7 +2304,7 @@
       assert(check_heap_region_claim_values(HeapRegion::InitialClaimValue),
              "sanity check");
     } else {
-      VerifyRegionClosure blk(allow_dirty);
+      VerifyRegionClosure blk(allow_dirty, false, use_prev_marking);
       _hrs->iterate(&blk);
     }
     if (!silent) gclog_or_tty->print("remset ");
@@ -2249,6 +2332,15 @@
   _hrs->iterate(&blk);
 }
 
+class PrintOnThreadsClosure : public ThreadClosure {
+  outputStream* _st;
+public:
+  PrintOnThreadsClosure(outputStream* st) : _st(st) { }
+  virtual void do_thread(Thread *t) {
+    t->print_on(_st);
+  }
+};
+
 void G1CollectedHeap::print_gc_threads_on(outputStream* st) const {
   if (ParallelGCThreads > 0) {
     workers()->print_worker_threads();
@@ -2256,8 +2348,9 @@
   st->print("\"G1 concurrent mark GC Thread\" ");
   _cmThread->print();
   st->cr();
-  st->print("\"G1 concurrent refinement GC Thread\" ");
-  _cg1r->cg1rThread()->print_on(st);
+  st->print("\"G1 concurrent refinement GC Threads\" ");
+  PrintOnThreadsClosure p(st);
+  _cg1r->threads_do(&p);
   st->cr();
   st->print("\"G1 zero-fill GC Thread\" ");
   _czft->print_on(st);
@@ -2269,7 +2362,7 @@
     workers()->threads_do(tc);
   }
   tc->do_thread(_cmThread);
-  tc->do_thread(_cg1r->cg1rThread());
+  _cg1r->threads_do(tc);
   tc->do_thread(_czft);
 }
 
@@ -4685,15 +4778,58 @@
   }
 }
 
+
+class G1ParCleanupCTTask : public AbstractGangTask {
+  CardTableModRefBS* _ct_bs;
+  G1CollectedHeap* _g1h;
+public:
+  G1ParCleanupCTTask(CardTableModRefBS* ct_bs,
+                     G1CollectedHeap* g1h) :
+    AbstractGangTask("G1 Par Cleanup CT Task"),
+    _ct_bs(ct_bs),
+    _g1h(g1h)
+  { }
+
+  void work(int i) {
+    HeapRegion* r;
+    while (r = _g1h->pop_dirty_cards_region()) {
+      clear_cards(r);
+    }
+  }
+  void clear_cards(HeapRegion* r) {
+    // Cards for Survivor and Scan-Only regions will be dirtied later.
+    if (!r->is_scan_only() && !r->is_survivor()) {
+      _ct_bs->clear(MemRegion(r->bottom(), r->end()));
+    }
+  }
+};
+
+
 void G1CollectedHeap::cleanUpCardTable() {
   CardTableModRefBS* ct_bs = (CardTableModRefBS*) (barrier_set());
   double start = os::elapsedTime();
 
-  ct_bs->clear(_g1_committed);
-
+  // Iterate over the dirty cards region list.
+  G1ParCleanupCTTask cleanup_task(ct_bs, this);
+  if (ParallelGCThreads > 0) {
+    set_par_threads(workers()->total_workers());
+    workers()->run_task(&cleanup_task);
+    set_par_threads(0);
+  } else {
+    while (_dirty_cards_region_list) {
+      HeapRegion* r = _dirty_cards_region_list;
+      cleanup_task.clear_cards(r);
+      _dirty_cards_region_list = r->get_next_dirty_cards_region();
+      if (_dirty_cards_region_list == r) {
+        // The last region.
+        _dirty_cards_region_list = NULL;
+      }
+      r->set_next_dirty_cards_region(NULL);
+    }
+  }
   // now, redirty the cards of the scan-only and survivor regions
   // (it seemed faster to do it this way, instead of iterating over
-  // all regions and then clearing / dirtying as approprite)
+  // all regions and then clearing / dirtying as appropriate)
   dirtyCardsForYoungRegions(ct_bs, _young_list->first_scan_only_region());
   dirtyCardsForYoungRegions(ct_bs, _young_list->first_survivor_region());
 
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Jul 01 15:06:54 2009 -0700
@@ -59,6 +59,9 @@
 typedef GenericTaskQueue<oop*>    RefToScanQueue;
 typedef GenericTaskQueueSet<oop*> RefToScanQueueSet;
 
+typedef int RegionIdx_t;   // needs to hold [ 0..max_regions() )
+typedef int CardIdx_t;     // needs to hold [ 0..CardsPerRegion )
+
 enum G1GCThreadGroups {
   G1CRGroup = 0,
   G1ZFGroup = 1,
@@ -158,6 +161,7 @@
   friend class RegionSorter;
   friend class CountRCClosure;
   friend class EvacPopObjClosure;
+  friend class G1ParCleanupCTTask;
 
   // Other related classes.
   friend class G1MarkSweep;
@@ -1045,6 +1049,17 @@
   virtual void prepare_for_verify();
 
   // Perform verification.
+
+  // use_prev_marking == true  -> use "prev" marking information,
+  // use_prev_marking == false -> use "next" marking information
+  // NOTE: Only the "prev" marking information is guaranteed to be
+  // consistent most of the time, so most calls to this should use
+  // use_prev_marking == true. Currently, there is only one case where
+  // this is called with use_prev_marking == false, which is to verify
+  // the "next" marking information at the end of remark.
+  void verify(bool allow_dirty, bool silent, bool use_prev_marking);
+
+  // Override; it uses the "prev" marking information
   virtual void verify(bool allow_dirty, bool silent);
   virtual void print() const;
   virtual void print_on(outputStream* st) const;
@@ -1121,6 +1136,18 @@
   bool isMarkedPrev(oop obj) const;
   bool isMarkedNext(oop obj) const;
 
+  // use_prev_marking == true  -> use "prev" marking information,
+  // use_prev_marking == false -> use "next" marking information
+  bool is_obj_dead_cond(const oop obj,
+                        const HeapRegion* hr,
+                        const bool use_prev_marking) const {
+    if (use_prev_marking) {
+      return is_obj_dead(obj, hr);
+    } else {
+      return is_obj_ill(obj, hr);
+    }
+  }
+
   // Determine if an object is dead, given the object and also
   // the region to which the object belongs. An object is dead
   // iff a) it was not allocated since the last mark and b) it
@@ -1158,8 +1185,19 @@
   // Added if it is in permanent gen it isn't dead.
   // Added if it is NULL it isn't dead.
 
-  bool is_obj_dead(oop obj) {
-    HeapRegion* hr = heap_region_containing(obj);
+  // use_prev_marking == true  -> use "prev" marking information,
+  // use_prev_marking == false -> use "next" marking information
+  bool is_obj_dead_cond(const oop obj,
+                        const bool use_prev_marking) {
+    if (use_prev_marking) {
+      return is_obj_dead(obj);
+    } else {
+      return is_obj_ill(obj);
+    }
+  }
+
+  bool is_obj_dead(const oop obj) {
+    const HeapRegion* hr = heap_region_containing(obj);
     if (hr == NULL) {
       if (Universe::heap()->is_in_permanent(obj))
         return false;
@@ -1169,8 +1207,8 @@
     else return is_obj_dead(obj, hr);
   }
 
-  bool is_obj_ill(oop obj) {
-    HeapRegion* hr = heap_region_containing(obj);
+  bool is_obj_ill(const oop obj) {
+    const HeapRegion* hr = heap_region_containing(obj);
     if (hr == NULL) {
       if (Universe::heap()->is_in_permanent(obj))
         return false;
@@ -1191,6 +1229,16 @@
   ConcurrentMark* concurrent_mark() const { return _cm; }
   ConcurrentG1Refine* concurrent_g1_refine() const { return _cg1r; }
 
+  // The dirty cards region list is used to record a subset of regions
+  // whose cards need clearing. The list if populated during the
+  // remembered set scanning and drained during the card table
+  // cleanup. Although the methods are reentrant, population/draining
+  // phases must not overlap. For synchronization purposes the last
+  // element on the list points to itself.
+  HeapRegion* _dirty_cards_region_list;
+  void push_dirty_cards_region(HeapRegion* hr);
+  HeapRegion* pop_dirty_cards_region();
+
 public:
   void stop_conc_gc_threads();
 
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Wed Jul 01 15:06:54 2009 -0700
@@ -167,11 +167,6 @@
 
   _all_full_gc_times_ms(new NumberSeq()),
 
-  _conc_refine_enabled(0),
-  _conc_refine_zero_traversals(0),
-  _conc_refine_max_traversals(0),
-  _conc_refine_current_delta(G1ConcRefineInitialDelta),
-
   // G1PausesBtwnConcMark defaults to -1
   // so the hack is to do the cast  QQQ FIXME
   _pauses_btwn_concurrent_mark((size_t)G1PausesBtwnConcMark),
@@ -1634,9 +1629,8 @@
         print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
         print_par_stats(2, "Update RS (Start)", _par_last_update_rs_start_times_ms, false);
         print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
-        if (G1RSBarrierUseQueue)
-          print_par_buffers(3, "Processed Buffers",
-                            _par_last_update_rs_processed_buffers, true);
+        print_par_buffers(3, "Processed Buffers",
+                          _par_last_update_rs_processed_buffers, true);
         print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms);
         print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms);
         print_par_stats(2, "Scan-Only Scanning", _par_last_scan_only_times_ms);
@@ -1649,9 +1643,8 @@
         print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
       } else {
         print_stats(1, "Update RS", update_rs_time);
-        if (G1RSBarrierUseQueue)
-          print_stats(2, "Processed Buffers",
-                      (int)update_rs_processed_buffers);
+        print_stats(2, "Processed Buffers",
+                    (int)update_rs_processed_buffers);
         print_stats(1, "Ext Root Scanning", ext_root_scan_time);
         print_stats(1, "Mark Stack Scanning", mark_stack_scan_time);
         print_stats(1, "Scan-Only Scanning", scan_only_time);
@@ -2467,18 +2460,6 @@
                (double) _region_num_young / (double) all_region_num * 100.0,
                _region_num_tenured,
                (double) _region_num_tenured / (double) all_region_num * 100.0);
-
-    if (!G1RSBarrierUseQueue) {
-      gclog_or_tty->print_cr("Of %d times conc refinement was enabled, %d (%7.2f%%) "
-                    "did zero traversals.",
-                    _conc_refine_enabled, _conc_refine_zero_traversals,
-                    _conc_refine_enabled > 0 ?
-                    100.0 * (float)_conc_refine_zero_traversals/
-                    (float)_conc_refine_enabled : 0.0);
-      gclog_or_tty->print_cr("  Max # of traversals = %d.",
-                    _conc_refine_max_traversals);
-      gclog_or_tty->print_cr("");
-    }
   }
   if (TraceGen1Time) {
     if (_all_full_gc_times_ms->num() > 0) {
@@ -2500,38 +2481,6 @@
 #endif // PRODUCT
 }
 
-void G1CollectorPolicy::update_conc_refine_data() {
-  unsigned traversals = _g1->concurrent_g1_refine()->disable();
-  if (traversals == 0) _conc_refine_zero_traversals++;
-  _conc_refine_max_traversals = MAX2(_conc_refine_max_traversals,
-                                     (size_t)traversals);
-
-  if (G1PolicyVerbose > 1)
-    gclog_or_tty->print_cr("Did a CR traversal series: %d traversals.", traversals);
-  double multiplier = 1.0;
-  if (traversals == 0) {
-    multiplier = 4.0;
-  } else if (traversals > (size_t)G1ConcRefineTargTraversals) {
-    multiplier = 1.0/1.5;
-  } else if (traversals < (size_t)G1ConcRefineTargTraversals) {
-    multiplier = 1.5;
-  }
-  if (G1PolicyVerbose > 1) {
-    gclog_or_tty->print_cr("  Multiplier = %7.2f.", multiplier);
-    gclog_or_tty->print("  Delta went from %d regions to ",
-               _conc_refine_current_delta);
-  }
-  _conc_refine_current_delta =
-    MIN2(_g1->n_regions(),
-         (size_t)(_conc_refine_current_delta * multiplier));
-  _conc_refine_current_delta =
-    MAX2(_conc_refine_current_delta, (size_t)1);
-  if (G1PolicyVerbose > 1) {
-    gclog_or_tty->print_cr("%d regions.", _conc_refine_current_delta);
-  }
-  _conc_refine_enabled++;
-}
-
 bool
 G1CollectorPolicy::should_add_next_region_to_young_list() {
   assert(in_young_gc_mode(), "should be in young GC mode");
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Wed Jul 01 15:06:54 2009 -0700
@@ -637,18 +637,6 @@
   // The number of collection pauses at the end of the last mark.
   size_t _n_pauses_at_mark_end;
 
-  // ==== This section is for stats related to starting Conc Refinement on time.
-  size_t _conc_refine_enabled;
-  size_t _conc_refine_zero_traversals;
-  size_t _conc_refine_max_traversals;
-  // In # of heap regions.
-  size_t _conc_refine_current_delta;
-
-  // At the beginning of a collection pause, update the variables above,
-  // especially the "delta".
-  void update_conc_refine_data();
-  // ====
-
   // Stash a pointer to the g1 heap.
   G1CollectedHeap* _g1;
 
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Wed Jul 01 15:06:54 2009 -0700
@@ -105,28 +105,6 @@
   _g1->heap_region_iterate(&rc);
 }
 
-class UpdateRSOutOfRegionClosure: public HeapRegionClosure {
-  G1CollectedHeap*    _g1h;
-  ModRefBarrierSet*   _mr_bs;
-  UpdateRSOopClosure  _cl;
-  int _worker_i;
-public:
-  UpdateRSOutOfRegionClosure(G1CollectedHeap* g1, int worker_i = 0) :
-    _cl(g1->g1_rem_set()->as_HRInto_G1RemSet(), worker_i),
-    _mr_bs(g1->mr_bs()),
-    _worker_i(worker_i),
-    _g1h(g1)
-    {}
-  bool doHeapRegion(HeapRegion* r) {
-    if (!r->in_collection_set() && !r->continuesHumongous()) {
-      _cl.set_from(r);
-      r->set_next_filter_kind(HeapRegionDCTOC::OutOfRegionFilterKind);
-      _mr_bs->mod_oop_in_space_iterate(r, &_cl, true, true);
-    }
-    return false;
-  }
-};
-
 class VerifyRSCleanCardOopClosure: public OopClosure {
   G1CollectedHeap* _g1;
 public:
@@ -241,6 +219,7 @@
     HeapRegionRemSet* hrrs = r->rem_set();
     if (hrrs->iter_is_complete()) return false; // All done.
     if (!_try_claimed && !hrrs->claim_iter()) return false;
+    _g1h->push_dirty_cards_region(r);
     // If we didn't return above, then
     //   _try_claimed || r->claim_iter()
     // is true: either we're supposed to work on claimed-but-not-complete
@@ -264,6 +243,10 @@
       assert(card_region != NULL, "Yielding cards not in the heap?");
       _cards++;
 
+      if (!card_region->is_on_dirty_cards_region_list()) {
+        _g1h->push_dirty_cards_region(card_region);
+      }
+
        // If the card is dirty, then we will scan it during updateRS.
       if (!card_region->in_collection_set() && !_ct_bs->is_card_dirty(card_index)) {
           if (!_ct_bs->is_card_claimed(card_index) && _ct_bs->claim_card(card_index)) {
@@ -350,30 +333,17 @@
   double start = os::elapsedTime();
   _g1p->record_update_rs_start_time(worker_i, start * 1000.0);
 
-  if (G1RSBarrierUseQueue && !cg1r->do_traversal()) {
-    // Apply the appropriate closure to all remaining log entries.
-    _g1->iterate_dirty_card_closure(false, worker_i);
-    // Now there should be no dirty cards.
-    if (G1RSLogCheckCardTable) {
-      CountNonCleanMemRegionClosure cl(_g1);
-      _ct_bs->mod_card_iterate(&cl);
-      // XXX This isn't true any more: keeping cards of young regions
-      // marked dirty broke it.  Need some reasonable fix.
-      guarantee(cl.n() == 0, "Card table should be clean.");
-    }
-  } else {
-    UpdateRSOutOfRegionClosure update_rs(_g1, worker_i);
-    _g1->heap_region_iterate(&update_rs);
-    // We did a traversal; no further one is necessary.
-    if (G1RSBarrierUseQueue) {
-      assert(cg1r->do_traversal(), "Or we shouldn't have gotten here.");
-      cg1r->set_pya_cancel();
-    }
-    if (_cg1r->use_cache()) {
-      _cg1r->clear_and_record_card_counts();
-      _cg1r->clear_hot_cache();
-    }
+  // Apply the appropriate closure to all remaining log entries.
+  _g1->iterate_dirty_card_closure(false, worker_i);
+  // Now there should be no dirty cards.
+  if (G1RSLogCheckCardTable) {
+    CountNonCleanMemRegionClosure cl(_g1);
+    _ct_bs->mod_card_iterate(&cl);
+    // XXX This isn't true any more: keeping cards of young regions
+    // marked dirty broke it.  Need some reasonable fix.
+    guarantee(cl.n() == 0, "Card table should be clean.");
   }
+
   _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
 }
 
@@ -486,11 +456,6 @@
                                   * 1000.0);
 }
 
-void HRInto_G1RemSet::set_par_traversal(bool b) {
-  _par_traversal_in_progress = b;
-  HeapRegionRemSet::set_par_traversal(b);
-}
-
 void HRInto_G1RemSet::cleanupHRRS() {
   HeapRegionRemSet::cleanup();
 }
@@ -527,7 +492,7 @@
       updateRS(worker_i);
       scanNewRefsRS(oc, worker_i);
     } else {
-      _g1p->record_update_rs_start_time(worker_i, os::elapsedTime());
+      _g1p->record_update_rs_start_time(worker_i, os::elapsedTime() * 1000.0);
       _g1p->record_update_rs_processed_buffers(worker_i, 0.0);
       _g1p->record_update_rs_time(worker_i, 0.0);
       _g1p->record_scan_new_refs_time(worker_i, 0.0);
@@ -535,7 +500,7 @@
     if (G1ParallelRSetScanningEnabled || (worker_i == 0)) {
       scanRS(oc, worker_i);
     } else {
-      _g1p->record_scan_rs_start_time(worker_i, os::elapsedTime());
+      _g1p->record_scan_rs_start_time(worker_i, os::elapsedTime() * 1000.0);
       _g1p->record_scan_rs_time(worker_i, 0.0);
     }
   } else {
@@ -562,11 +527,6 @@
   if (ParallelGCThreads > 0) {
     set_par_traversal(true);
     _seq_task->set_par_threads((int)n_workers());
-    if (cg1r->do_traversal()) {
-      updateRS(0);
-      // Have to do this again after updaters
-      cleanupHRRS();
-    }
   }
   guarantee( _cards_scanned == NULL, "invariant" );
   _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers());
@@ -647,11 +607,8 @@
   _g1->collection_set_iterate(&iterClosure);
   // Set all cards back to clean.
   _g1->cleanUpCardTable();
+
   if (ParallelGCThreads > 0) {
-    ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine();
-    if (cg1r->do_traversal()) {
-      cg1r->cg1rThread()->set_do_traversal(false);
-    }
     set_par_traversal(false);
   }
 
@@ -721,139 +678,8 @@
 }
 
 
-class ConcRefineRegionClosure: public HeapRegionClosure {
-  G1CollectedHeap* _g1h;
-  CardTableModRefBS* _ctbs;
-  ConcurrentGCThread* _cgc_thrd;
-  ConcurrentG1Refine* _cg1r;
-  unsigned _cards_processed;
-  UpdateRSOopClosure _update_rs_oop_cl;
-public:
-  ConcRefineRegionClosure(CardTableModRefBS* ctbs,
-                          ConcurrentG1Refine* cg1r,
-                          HRInto_G1RemSet* g1rs) :
-    _ctbs(ctbs), _cg1r(cg1r), _cgc_thrd(cg1r->cg1rThread()),
-    _update_rs_oop_cl(g1rs), _cards_processed(0),
-    _g1h(G1CollectedHeap::heap())
-  {}
-
-  bool doHeapRegion(HeapRegion* r) {
-    if (!r->in_collection_set() &&
-        !r->continuesHumongous() &&
-        !r->is_young()) {
-      _update_rs_oop_cl.set_from(r);
-      UpdateRSObjectClosure update_rs_obj_cl(&_update_rs_oop_cl);
-
-      // For each run of dirty card in the region:
-      //   1) Clear the cards.
-      //   2) Process the range corresponding to the run, adding any
-      //      necessary RS entries.
-      // 1 must precede 2, so that a concurrent modification redirties the
-      // card.  If a processing attempt does not succeed, because it runs
-      // into an unparseable region, we will do binary search to find the
-      // beginning of the next parseable region.
-      HeapWord* startAddr = r->bottom();
-      HeapWord* endAddr = r->used_region().end();
-      HeapWord* lastAddr;
-      HeapWord* nextAddr;
-
-      for (nextAddr = lastAddr = startAddr;
-           nextAddr < endAddr;
-           nextAddr = lastAddr) {
-        MemRegion dirtyRegion;
-
-        // Get and clear dirty region from card table
-        MemRegion next_mr(nextAddr, endAddr);
-        dirtyRegion =
-          _ctbs->dirty_card_range_after_reset(
-                           next_mr,
-                           true, CardTableModRefBS::clean_card_val());
-        assert(dirtyRegion.start() >= nextAddr,
-               "returned region inconsistent?");
-
-        if (!dirtyRegion.is_empty()) {
-          HeapWord* stop_point =
-            r->object_iterate_mem_careful(dirtyRegion,
-                                          &update_rs_obj_cl);
-          if (stop_point == NULL) {
-            lastAddr = dirtyRegion.end();
-            _cards_processed +=
-              (int) (dirtyRegion.word_size() / CardTableModRefBS::card_size_in_words);
-          } else {
-            // We're going to skip one or more cards that we can't parse.
-            HeapWord* next_parseable_card =
-              r->next_block_start_careful(stop_point);
-            // Round this up to a card boundary.
-            next_parseable_card =
-              _ctbs->addr_for(_ctbs->byte_after_const(next_parseable_card));
-            // Now we invalidate the intervening cards so we'll see them
-            // again.
-            MemRegion remaining_dirty =
-              MemRegion(stop_point, dirtyRegion.end());
-            MemRegion skipped =
-              MemRegion(stop_point, next_parseable_card);
-            _ctbs->invalidate(skipped.intersection(remaining_dirty));
-
-            // Now start up again where we can parse.
-            lastAddr = next_parseable_card;
-
-            // Count how many we did completely.
-            _cards_processed +=
-              (stop_point - dirtyRegion.start()) /
-              CardTableModRefBS::card_size_in_words;
-          }
-          // Allow interruption at regular intervals.
-          // (Might need to make them more regular, if we get big
-          // dirty regions.)
-          if (_cgc_thrd != NULL) {
-            if (_cgc_thrd->should_yield()) {
-              _cgc_thrd->yield();
-              switch (_cg1r->get_pya()) {
-              case PYA_continue:
-                // This may have changed: re-read.
-                endAddr = r->used_region().end();
-                continue;
-              case PYA_restart: case PYA_cancel:
-                return true;
-              }
-            }
-          }
-        } else {
-          break;
-        }
-      }
-    }
-    // A good yield opportunity.
-    if (_cgc_thrd != NULL) {
-      if (_cgc_thrd->should_yield()) {
-        _cgc_thrd->yield();
-        switch (_cg1r->get_pya()) {
-        case PYA_restart: case PYA_cancel:
-          return true;
-        default:
-          break;
-        }
-
-      }
-    }
-    return false;
-  }
-
-  unsigned cards_processed() { return _cards_processed; }
-};
-
-
-void HRInto_G1RemSet::concurrentRefinementPass(ConcurrentG1Refine* cg1r) {
-  ConcRefineRegionClosure cr_cl(ct_bs(), cg1r, this);
-  _g1->heap_region_iterate(&cr_cl);
-  _conc_refine_traversals++;
-  _conc_refine_cards += cr_cl.cards_processed();
-}
-
 static IntHistogram out_of_histo(50, 50);
 
-
-
 void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {
   // If the card is no longer dirty, nothing to do.
   if (*card_ptr != CardTableModRefBS::dirty_card_val()) return;
@@ -983,10 +809,16 @@
   HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; }
 };
 
+class PrintRSThreadVTimeClosure : public ThreadClosure {
+public:
+  virtual void do_thread(Thread *t) {
+    ConcurrentG1RefineThread* crt = (ConcurrentG1RefineThread*) t;
+    gclog_or_tty->print("    %5.2f", crt->vtime_accum());
+  }
+};
+
 void HRInto_G1RemSet::print_summary_info() {
   G1CollectedHeap* g1 = G1CollectedHeap::heap();
-  ConcurrentG1RefineThread* cg1r_thrd =
-    g1->concurrent_g1_refine()->cg1rThread();
 
 #if CARD_REPEAT_HISTO
   gclog_or_tty->print_cr("\nG1 card_repeat count histogram: ");
@@ -999,15 +831,13 @@
     gclog_or_tty->print_cr("  # of CS ptrs --> # of cards with that number.");
     out_of_histo.print_on(gclog_or_tty);
   }
-  gclog_or_tty->print_cr("\n Concurrent RS processed %d cards in "
-                "%5.2fs.",
-                _conc_refine_cards, cg1r_thrd->vtime_accum());
-
+  gclog_or_tty->print_cr("\n Concurrent RS processed %d cards",
+                         _conc_refine_cards);
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   jint tot_processed_buffers =
     dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread();
   gclog_or_tty->print_cr("  Of %d completed buffers:", tot_processed_buffers);
-  gclog_or_tty->print_cr("     %8d (%5.1f%%) by conc RS thread.",
+  gclog_or_tty->print_cr("     %8d (%5.1f%%) by conc RS threads.",
                 dcqs.processed_buffers_rs_thread(),
                 100.0*(float)dcqs.processed_buffers_rs_thread()/
                 (float)tot_processed_buffers);
@@ -1015,15 +845,12 @@
                 dcqs.processed_buffers_mut(),
                 100.0*(float)dcqs.processed_buffers_mut()/
                 (float)tot_processed_buffers);
-  gclog_or_tty->print_cr("   Did %d concurrent refinement traversals.",
-                _conc_refine_traversals);
-  if (!G1RSBarrierUseQueue) {
-    gclog_or_tty->print_cr("   Scanned %8.2f cards/traversal.",
-                  _conc_refine_traversals > 0 ?
-                  (float)_conc_refine_cards/(float)_conc_refine_traversals :
-                  0);
-  }
+  gclog_or_tty->print_cr("  Conc RS threads times(s)");
+  PrintRSThreadVTimeClosure p;
+  gclog_or_tty->print("     ");
+  g1->concurrent_g1_refine()->threads_do(&p);
   gclog_or_tty->print_cr("");
+
   if (G1UseHRIntoRS) {
     HRRSStatsIter blk;
     g1->heap_region_iterate(&blk);
--- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Wed Jul 01 15:06:54 2009 -0700
@@ -33,15 +33,12 @@
 class G1RemSet: public CHeapObj {
 protected:
   G1CollectedHeap* _g1;
-
-  unsigned _conc_refine_traversals;
   unsigned _conc_refine_cards;
-
   size_t n_workers();
 
 public:
   G1RemSet(G1CollectedHeap* g1) :
-    _g1(g1), _conc_refine_traversals(0), _conc_refine_cards(0)
+    _g1(g1), _conc_refine_cards(0)
   {}
 
   // Invoke "blk->do_oop" on all pointers into the CS in object in regions
@@ -81,19 +78,11 @@
   virtual void scrub_par(BitMap* region_bm, BitMap* card_bm,
                          int worker_num, int claim_val) = 0;
 
-  // Do any "refinement" activity that might be appropriate to the given
-  // G1RemSet.  If "refinement" has iterateive "passes", do one pass.
-  // If "t" is non-NULL, it is the thread performing the refinement.
-  // Default implementation does nothing.
-  virtual void concurrentRefinementPass(ConcurrentG1Refine* cg1r) {}
-
   // Refine the card corresponding to "card_ptr".  If "sts" is non-NULL,
   // join and leave around parts that must be atomic wrt GC.  (NULL means
   // being done at a safepoint.)
   virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {}
 
-  unsigned conc_refine_cards() { return _conc_refine_cards; }
-
   // Print any relevant summary info.
   virtual void print_summary_info() {}
 
@@ -153,7 +142,7 @@
   // progress.  If so, then cards added to remembered sets should also have
   // their references into the collection summarized in "_new_refs".
   bool _par_traversal_in_progress;
-  void set_par_traversal(bool b);
+  void set_par_traversal(bool b) { _par_traversal_in_progress = b; }
   GrowableArray<oop*>** _new_refs;
   void new_refs_iterate(OopClosure* cl);
 
@@ -194,7 +183,6 @@
   void scrub_par(BitMap* region_bm, BitMap* card_bm,
                  int worker_num, int claim_val);
 
-  virtual void concurrentRefinementPass(ConcurrentG1Refine* t);
   virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i);
 
   virtual void print_summary_info();
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Wed Jul 01 15:06:54 2009 -0700
@@ -147,9 +147,6 @@
   develop(bool, G1PrintCTFilterStats, false,                                \
           "If true, print stats on RS filtering effectiveness")             \
                                                                             \
-  develop(bool, G1RSBarrierUseQueue, true,                                  \
-          "If true, use queueing RS barrier")                               \
-                                                                            \
   develop(bool, G1DeferredRSUpdate, true,                                   \
           "If true, use deferred RS updates")                               \
                                                                             \
@@ -253,6 +250,10 @@
                                                                             \
   experimental(bool, G1ParallelRSetScanningEnabled, false,                  \
           "Enables the parallelization of remembered set scanning "         \
-          "during evacuation pauses")
+          "during evacuation pauses")                                       \
+                                                                            \
+  product(uintx, G1ParallelRSetThreads, 0,                                  \
+          "If non-0 is the number of parallel rem set update threads, "     \
+          "otherwise the value is determined ergonomically.")
 
 G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Wed Jul 01 15:06:54 2009 -0700
@@ -40,15 +40,19 @@
 {}
 
 class VerifyLiveClosure: public OopClosure {
+private:
   G1CollectedHeap* _g1h;
   CardTableModRefBS* _bs;
   oop _containing_obj;
   bool _failures;
   int _n_failures;
+  bool _use_prev_marking;
 public:
-  VerifyLiveClosure(G1CollectedHeap* g1h) :
+  // use_prev_marking == true  -> use "prev" marking information,
+  // use_prev_marking == false -> use "next" marking information
+  VerifyLiveClosure(G1CollectedHeap* g1h, bool use_prev_marking) :
     _g1h(g1h), _bs(NULL), _containing_obj(NULL),
-    _failures(false), _n_failures(0)
+    _failures(false), _n_failures(0), _use_prev_marking(use_prev_marking)
   {
     BarrierSet* bs = _g1h->barrier_set();
     if (bs->is_a(BarrierSet::CardTableModRef))
@@ -68,11 +72,13 @@
 
   void do_oop(oop* p) {
     assert(_containing_obj != NULL, "Precondition");
-    assert(!_g1h->is_obj_dead(_containing_obj), "Precondition");
+    assert(!_g1h->is_obj_dead_cond(_containing_obj, _use_prev_marking),
+           "Precondition");
     oop obj = *p;
     if (obj != NULL) {
       bool failed = false;
-      if (!_g1h->is_in_closed_subset(obj) || _g1h->is_obj_dead(obj)) {
+      if (!_g1h->is_in_closed_subset(obj) ||
+          _g1h->is_obj_dead_cond(obj, _use_prev_marking)) {
         if (!_failures) {
           gclog_or_tty->print_cr("");
           gclog_or_tty->print_cr("----------");
@@ -351,6 +357,7 @@
     _claimed(InitialClaimValue), _evacuation_failed(false),
     _prev_marked_bytes(0), _next_marked_bytes(0), _sort_index(-1),
     _young_type(NotYoung), _next_young_region(NULL),
+    _next_dirty_cards_region(NULL),
     _young_index_in_cset(-1), _surv_rate_group(NULL), _age_index(-1),
     _rem_set(NULL), _zfs(NotZeroFilled)
 {
@@ -646,19 +653,23 @@
   G1OffsetTableContigSpace::print_on(st);
 }
 
+void HeapRegion::verify(bool allow_dirty) const {
+  verify(allow_dirty, /* use_prev_marking */ true);
+}
+
 #define OBJ_SAMPLE_INTERVAL 0
 #define BLOCK_SAMPLE_INTERVAL 100
 
 // This really ought to be commoned up into OffsetTableContigSpace somehow.
 // We would need a mechanism to make that code skip dead objects.
 
-void HeapRegion::verify(bool allow_dirty) const {
+void HeapRegion::verify(bool allow_dirty, bool use_prev_marking) const {
   G1CollectedHeap* g1 = G1CollectedHeap::heap();
   HeapWord* p = bottom();
   HeapWord* prev_p = NULL;
   int objs = 0;
   int blocks = 0;
-  VerifyLiveClosure vl_cl(g1);
+  VerifyLiveClosure vl_cl(g1, use_prev_marking);
   while (p < top()) {
     size_t size = oop(p)->size();
     if (blocks == BLOCK_SAMPLE_INTERVAL) {
@@ -670,7 +681,7 @@
     }
     if (objs == OBJ_SAMPLE_INTERVAL) {
       oop obj = oop(p);
-      if (!g1->is_obj_dead(obj, this)) {
+      if (!g1->is_obj_dead_cond(obj, this, use_prev_marking)) {
         obj->verify();
         vl_cl.set_containing_obj(obj);
         obj->oop_iterate(&vl_cl);
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Wed Jul 01 15:06:54 2009 -0700
@@ -227,6 +227,9 @@
   // next region in the young "generation" region set
   HeapRegion* _next_young_region;
 
+  // Next region whose cards need cleaning
+  HeapRegion* _next_dirty_cards_region;
+
   // For parallel heapRegion traversal.
   jint _claimed;
 
@@ -468,6 +471,11 @@
     _next_young_region = hr;
   }
 
+  HeapRegion* get_next_dirty_cards_region() const { return _next_dirty_cards_region; }
+  HeapRegion** next_dirty_cards_region_addr() { return &_next_dirty_cards_region; }
+  void set_next_dirty_cards_region(HeapRegion* hr) { _next_dirty_cards_region = hr; }
+  bool is_on_dirty_cards_region_list() const { return get_next_dirty_cards_region() != NULL; }
+
   // Allows logical separation between objects allocated before and after.
   void save_marks();
 
@@ -774,7 +782,16 @@
   void print() const;
   void print_on(outputStream* st) const;
 
-  // Override
+  // use_prev_marking == true  -> use "prev" marking information,
+  // use_prev_marking == false -> use "next" marking information
+  // NOTE: Only the "prev" marking information is guaranteed to be
+  // consistent most of the time, so most calls to this should use
+  // use_prev_marking == true. Currently, there is only one case where
+  // this is called with use_prev_marking == false, which is to verify
+  // the "next" marking information at the end of remark.
+  void verify(bool allow_dirty, bool use_prev_marking) const;
+
+  // Override; it uses the "prev" marking information
   virtual void verify(bool allow_dirty) const;
 
 #ifdef DEBUG
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Wed Jul 01 15:06:54 2009 -0700
@@ -109,7 +109,7 @@
     return new PerRegionTable(hr);
   }
 
-  void add_card_work(short from_card, bool par) {
+  void add_card_work(CardIdx_t from_card, bool par) {
     if (!_bm.at(from_card)) {
       if (par) {
         if (_bm.par_at_put(from_card, 1)) {
@@ -141,11 +141,11 @@
     // and adding a bit to the new table is never incorrect.
     if (loc_hr->is_in_reserved(from)) {
       size_t hw_offset = pointer_delta((HeapWord*)from, loc_hr->bottom());
-      size_t from_card =
-        hw_offset >>
-        (CardTableModRefBS::card_shift - LogHeapWordSize);
+      CardIdx_t from_card = (CardIdx_t)
+          hw_offset >> (CardTableModRefBS::card_shift - LogHeapWordSize);
 
-      add_card_work((short) from_card, par);
+      assert(0 <= from_card && from_card < CardsPerRegion, "Must be in range.");
+      add_card_work(from_card, par);
     }
   }
 
@@ -190,11 +190,11 @@
 #endif
   }
 
-  void add_card(short from_card_index) {
+  void add_card(CardIdx_t from_card_index) {
     add_card_work(from_card_index, /*parallel*/ true);
   }
 
-  void seq_add_card(short from_card_index) {
+  void seq_add_card(CardIdx_t from_card_index) {
     add_card_work(from_card_index, /*parallel*/ false);
   }
 
@@ -604,7 +604,7 @@
 
   // Note that this may be a continued H region.
   HeapRegion* from_hr = _g1h->heap_region_containing_raw(from);
-  size_t from_hrs_ind = (size_t)from_hr->hrs_index();
+  RegionIdx_t from_hrs_ind = (RegionIdx_t) from_hr->hrs_index();
 
   // If the region is already coarsened, return.
   if (_coarse_map.at(from_hrs_ind)) {
@@ -627,11 +627,11 @@
       uintptr_t from_hr_bot_card_index =
         uintptr_t(from_hr->bottom())
           >> CardTableModRefBS::card_shift;
-      int card_index = from_card - from_hr_bot_card_index;
+      CardIdx_t card_index = from_card - from_hr_bot_card_index;
       assert(0 <= card_index && card_index < PosParPRT::CardsPerRegion,
              "Must be in range.");
       if (G1HRRSUseSparseTable &&
-          _sparse_table.add_card((short) from_hrs_ind, card_index)) {
+          _sparse_table.add_card(from_hrs_ind, card_index)) {
         if (G1RecordHRRSOops) {
           HeapRegionRemSet::record(hr(), from);
 #if HRRS_VERBOSE
@@ -656,9 +656,9 @@
       }
 
       // Otherwise, transfer from sparse to fine-grain.
-      short cards[SparsePRTEntry::CardsPerEntry];
+      CardIdx_t cards[SparsePRTEntry::CardsPerEntry];
       if (G1HRRSUseSparseTable) {
-        bool res = _sparse_table.get_cards((short) from_hrs_ind, &cards[0]);
+        bool res = _sparse_table.get_cards(from_hrs_ind, &cards[0]);
         assert(res, "There should have been an entry");
       }
 
@@ -679,13 +679,13 @@
       // Add in the cards from the sparse table.
       if (G1HRRSUseSparseTable) {
         for (int i = 0; i < SparsePRTEntry::CardsPerEntry; i++) {
-          short c = cards[i];
+          CardIdx_t c = cards[i];
           if (c != SparsePRTEntry::NullEntry) {
             prt->add_card(c);
           }
         }
         // Now we can delete the sparse entry.
-        bool res = _sparse_table.delete_entry((short) from_hrs_ind);
+        bool res = _sparse_table.delete_entry(from_hrs_ind);
         assert(res, "It should have been there.");
       }
     }
@@ -1030,7 +1030,7 @@
 bool OtherRegionsTable::contains_reference_locked(oop* from) const {
   HeapRegion* hr = _g1h->heap_region_containing_raw(from);
   if (hr == NULL) return false;
-  size_t hr_ind = hr->hrs_index();
+  RegionIdx_t hr_ind = (RegionIdx_t) hr->hrs_index();
   // Is this region in the coarse map?
   if (_coarse_map.at(hr_ind)) return true;
 
@@ -1045,25 +1045,19 @@
     uintptr_t hr_bot_card_index =
       uintptr_t(hr->bottom()) >> CardTableModRefBS::card_shift;
     assert(from_card >= hr_bot_card_index, "Inv");
-    int card_index = from_card - hr_bot_card_index;
-    return _sparse_table.contains_card((short)hr_ind, card_index);
+    CardIdx_t card_index = from_card - hr_bot_card_index;
+    assert(0 <= card_index && card_index < PosParPRT::CardsPerRegion, "Must be in range.");
+    return _sparse_table.contains_card(hr_ind, card_index);
   }
 
 
 }
 
-
-bool HeapRegionRemSet::_par_traversal = false;
-
-void HeapRegionRemSet::set_par_traversal(bool b) {
-  assert(_par_traversal != b, "Proper alternation...");
-  _par_traversal = b;
-}
-
+// Determines how many threads can add records to an rset in parallel.
+// This can be done by either mutator threads together with the
+// concurrent refinement threads or GC threads.
 int HeapRegionRemSet::num_par_rem_sets() {
-  // We always have at least two, so that a mutator thread can claim an
-  // id and add to a rem set.
-  return (int) MAX2(ParallelGCThreads, (size_t)2);
+  return (int)MAX2(DirtyCardQueueSet::num_par_ids() + ConcurrentG1Refine::thread_num(), ParallelGCThreads);
 }
 
 HeapRegionRemSet::HeapRegionRemSet(G1BlockOffsetSharedArray* bosa,
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Wed Jul 01 15:06:54 2009 -0700
@@ -177,8 +177,6 @@
   G1BlockOffsetSharedArray* _bosa;
   G1BlockOffsetSharedArray* bosa() const { return _bosa; }
 
-  static bool _par_traversal;
-
   OtherRegionsTable _other_regions;
 
   // One set bit for every region that has an entry for this one.
@@ -211,8 +209,6 @@
                    HeapRegion* hr);
 
   static int num_par_rem_sets();
-  static bool par_traversal() { return _par_traversal; }
-  static void set_par_traversal(bool b);
 
   HeapRegion* hr() const {
     return _other_regions.hr();
--- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Wed Jul 01 15:06:54 2009 -0700
@@ -172,7 +172,7 @@
   _n_completed_buffers++;
 
   if (!_process_completed &&
-      _n_completed_buffers == _process_completed_threshold) {
+      _n_completed_buffers >= _process_completed_threshold) {
     _process_completed = true;
     if (_notify_when_complete)
       _cbl_mon->notify_all();
--- a/src/share/vm/gc_implementation/g1/sparsePRT.cpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/sparsePRT.cpp	Wed Jul 01 15:06:54 2009 -0700
@@ -33,7 +33,7 @@
     sprt_iter->init(this);
 }
 
-void SparsePRTEntry::init(short region_ind) {
+void SparsePRTEntry::init(RegionIdx_t region_ind) {
   _region_ind = region_ind;
   _next_index = NullEntry;
 #if UNROLL_CARD_LOOPS
@@ -43,11 +43,12 @@
   _cards[2] = NullEntry;
   _cards[3] = NullEntry;
 #else
-  for (int i = 0; i < CardsPerEntry; i++) _cards[i] = NullEntry;
+  for (int i = 0; i < CardsPerEntry; i++)
+    _cards[i] = NullEntry;
 #endif
 }
 
-bool SparsePRTEntry::contains_card(short card_index) const {
+bool SparsePRTEntry::contains_card(CardIdx_t card_index) const {
 #if UNROLL_CARD_LOOPS
   assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
   if (_cards[0] == card_index) return true;
@@ -80,10 +81,10 @@
   return sum;
 }
 
-SparsePRTEntry::AddCardResult SparsePRTEntry::add_card(short card_index) {
+SparsePRTEntry::AddCardResult SparsePRTEntry::add_card(CardIdx_t card_index) {
 #if UNROLL_CARD_LOOPS
   assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
-  short c = _cards[0];
+  CardIdx_t c = _cards[0];
   if (c == card_index) return found;
   if (c == NullEntry) { _cards[0] = card_index; return added; }
   c = _cards[1];
@@ -97,16 +98,19 @@
   if (c == NullEntry) { _cards[3] = card_index; return added; }
 #else
   for (int i = 0; i < CardsPerEntry; i++) {
-    short c = _cards[i];
+    CardIdx_t c = _cards[i];
     if (c == card_index) return found;
-    if (c == NullEntry) { _cards[i] = card_index; return added; }
+    if (c == NullEntry) {
+      _cards[i] = card_index;
+      return added;
+    }
   }
 #endif
   // Otherwise, we're full.
   return overflow;
 }
 
-void SparsePRTEntry::copy_cards(short* cards) const {
+void SparsePRTEntry::copy_cards(CardIdx_t* cards) const {
 #if UNROLL_CARD_LOOPS
   assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
   cards[0] = _cards[0];
@@ -130,7 +134,7 @@
   _capacity(capacity), _capacity_mask(capacity-1),
   _occupied_entries(0), _occupied_cards(0),
   _entries(NEW_C_HEAP_ARRAY(SparsePRTEntry, capacity)),
-  _buckets(NEW_C_HEAP_ARRAY(short, capacity)),
+  _buckets(NEW_C_HEAP_ARRAY(int, capacity)),
   _next_deleted(NULL), _deleted(false),
   _free_list(NullEntry), _free_region(0)
 {
@@ -143,7 +147,7 @@
     _entries = NULL;
   }
   if (_buckets != NULL) {
-    FREE_C_HEAP_ARRAY(short, _buckets);
+    FREE_C_HEAP_ARRAY(int, _buckets);
     _buckets = NULL;
   }
 }
@@ -153,14 +157,18 @@
   _occupied_cards = 0;
   guarantee(_entries != NULL, "INV");
   guarantee(_buckets != NULL, "INV");
+
+  guarantee(_capacity <= ((size_t)1 << (sizeof(int)*BitsPerByte-1)) - 1,
+                "_capacity too large");
+
   // This will put -1 == NullEntry in the key field of all entries.
   memset(_entries, -1, _capacity * sizeof(SparsePRTEntry));
-  memset(_buckets, -1, _capacity * sizeof(short));
+  memset(_buckets, -1, _capacity * sizeof(int));
   _free_list = NullEntry;
   _free_region = 0;
 }
 
-bool RSHashTable::add_card(short region_ind, short card_index) {
+bool RSHashTable::add_card(RegionIdx_t region_ind, CardIdx_t card_index) {
   SparsePRTEntry* e = entry_for_region_ind_create(region_ind);
   assert(e != NULL && e->r_ind() == region_ind,
          "Postcondition of call above.");
@@ -175,9 +183,9 @@
   return res != SparsePRTEntry::overflow;
 }
 
-bool RSHashTable::get_cards(short region_ind, short* cards) {
-  short ind = (short) (region_ind & capacity_mask());
-  short cur_ind = _buckets[ind];
+bool RSHashTable::get_cards(RegionIdx_t region_ind, CardIdx_t* cards) {
+  int ind = (int) (region_ind & capacity_mask());
+  int cur_ind = _buckets[ind];
   SparsePRTEntry* cur;
   while (cur_ind != NullEntry &&
          (cur = entry(cur_ind))->r_ind() != region_ind) {
@@ -192,10 +200,10 @@
   return true;
 }
 
-bool RSHashTable::delete_entry(short region_ind) {
-  short ind = (short) (region_ind & capacity_mask());
-  short* prev_loc = &_buckets[ind];
-  short cur_ind = *prev_loc;
+bool RSHashTable::delete_entry(RegionIdx_t region_ind) {
+  int ind = (int) (region_ind & capacity_mask());
+  int* prev_loc = &_buckets[ind];
+  int cur_ind = *prev_loc;
   SparsePRTEntry* cur;
   while (cur_ind != NullEntry &&
          (cur = entry(cur_ind))->r_ind() != region_ind) {
@@ -212,10 +220,11 @@
   return true;
 }
 
-SparsePRTEntry* RSHashTable::entry_for_region_ind(short region_ind) const {
+SparsePRTEntry*
+RSHashTable::entry_for_region_ind(RegionIdx_t region_ind) const {
   assert(occupied_entries() < capacity(), "Precondition");
-  short ind = (short) (region_ind & capacity_mask());
-  short cur_ind = _buckets[ind];
+  int ind = (int) (region_ind & capacity_mask());
+  int cur_ind = _buckets[ind];
   SparsePRTEntry* cur;
   // XXX
   // int k = 0;
@@ -242,15 +251,16 @@
   }
 }
 
-SparsePRTEntry* RSHashTable::entry_for_region_ind_create(short region_ind) {
+SparsePRTEntry*
+RSHashTable::entry_for_region_ind_create(RegionIdx_t region_ind) {
   SparsePRTEntry* res = entry_for_region_ind(region_ind);
   if (res == NULL) {
-    short new_ind = alloc_entry();
+    int new_ind = alloc_entry();
     assert(0 <= new_ind && (size_t)new_ind < capacity(), "There should be room.");
     res = entry(new_ind);
     res->init(region_ind);
     // Insert at front.
-    short ind = (short) (region_ind & capacity_mask());
+    int ind = (int) (region_ind & capacity_mask());
     res->set_next_index(_buckets[ind]);
     _buckets[ind] = new_ind;
     _occupied_entries++;
@@ -258,8 +268,8 @@
   return res;
 }
 
-short RSHashTable::alloc_entry() {
-  short res;
+int RSHashTable::alloc_entry() {
+  int res;
   if (_free_list != NullEntry) {
     res = _free_list;
     _free_list = entry(res)->next_index();
@@ -273,13 +283,11 @@
   }
 }
 
-
-void RSHashTable::free_entry(short fi) {
+void RSHashTable::free_entry(int fi) {
   entry(fi)->set_next_index(_free_list);
   _free_list = fi;
 }
 
-
 void RSHashTable::add_entry(SparsePRTEntry* e) {
   assert(e->num_valid_cards() > 0, "Precondition.");
   SparsePRTEntry* e2 = entry_for_region_ind_create(e->r_ind());
@@ -322,8 +330,8 @@
   return NULL;
 }
 
-short /* RSHashTable:: */ RSHashTableIter::find_first_card_in_list() {
-  short res;
+CardIdx_t /* RSHashTable:: */ RSHashTableIter::find_first_card_in_list() {
+  CardIdx_t res;
   while (_bl_ind != RSHashTable::NullEntry) {
     res = _rsht->entry(_bl_ind)->card(0);
     if (res != SparsePRTEntry::NullEntry) {
@@ -336,7 +344,7 @@
   return SparsePRTEntry::NullEntry;
 }
 
-size_t /* RSHashTable:: */ RSHashTableIter::compute_card_ind(short ci) {
+size_t /* RSHashTable:: */ RSHashTableIter::compute_card_ind(CardIdx_t ci) {
   return
     _heap_bot_card_ind
     + (_rsht->entry(_bl_ind)->r_ind() * CardsPerRegion)
@@ -345,7 +353,7 @@
 
 bool /* RSHashTable:: */ RSHashTableIter::has_next(size_t& card_index) {
   _card_ind++;
-  short ci;
+  CardIdx_t ci;
   if (_card_ind < SparsePRTEntry::CardsPerEntry &&
       ((ci = _rsht->entry(_bl_ind)->card(_card_ind)) !=
        SparsePRTEntry::NullEntry)) {
@@ -379,16 +387,16 @@
   return false;
 }
 
-bool RSHashTable::contains_card(short region_index, short card_index) const {
+bool RSHashTable::contains_card(RegionIdx_t region_index, CardIdx_t card_index) const {
   SparsePRTEntry* e = entry_for_region_ind(region_index);
   return (e != NULL && e->contains_card(card_index));
 }
 
 size_t RSHashTable::mem_size() const {
-  return sizeof(this) + capacity() * (sizeof(SparsePRTEntry) + sizeof(short));
+  return sizeof(this) +
+    capacity() * (sizeof(SparsePRTEntry) + sizeof(int));
 }
 
-
 // ----------------------------------------------------------------------
 
 SparsePRT* SparsePRT::_head_expanded_list = NULL;
@@ -408,6 +416,7 @@
   }
 }
 
+
 SparsePRT* SparsePRT::get_from_expanded_list() {
   SparsePRT* hd = _head_expanded_list;
   while (hd != NULL) {
@@ -452,6 +461,7 @@
   _next = _cur;
 }
 
+
 SparsePRT::~SparsePRT() {
   assert(_next != NULL && _cur != NULL, "Inv");
   if (_cur != _next) { delete _cur; }
@@ -465,7 +475,7 @@
   return sizeof(this) + _next->mem_size();
 }
 
-bool SparsePRT::add_card(short region_id, short card_index) {
+bool SparsePRT::add_card(RegionIdx_t region_id, CardIdx_t card_index) {
 #if SPARSE_PRT_VERBOSE
   gclog_or_tty->print_cr("  Adding card %d from region %d to region %d sparse.",
                 card_index, region_id, _hr->hrs_index());
@@ -476,11 +486,11 @@
   return _next->add_card(region_id, card_index);
 }
 
-bool SparsePRT::get_cards(short region_id, short* cards) {
+bool SparsePRT::get_cards(RegionIdx_t region_id, CardIdx_t* cards) {
   return _next->get_cards(region_id, cards);
 }
 
-bool SparsePRT::delete_entry(short region_id) {
+bool SparsePRT::delete_entry(RegionIdx_t region_id) {
   return _next->delete_entry(region_id);
 }
 
--- a/src/share/vm/gc_implementation/g1/sparsePRT.hpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/g1/sparsePRT.hpp	Wed Jul 01 15:06:54 2009 -0700
@@ -35,32 +35,32 @@
 
 class SparsePRTEntry: public CHeapObj {
 public:
+
   enum SomePublicConstants {
-    CardsPerEntry = (short)4,
-    NullEntry = (short)-1,
-    DeletedEntry = (short)-2
+    CardsPerEntry =  4,
+    NullEntry     = -1
   };
 
 private:
-  short _region_ind;
-  short _next_index;
-  short _cards[CardsPerEntry];
+  RegionIdx_t _region_ind;
+  int         _next_index;
+  CardIdx_t   _cards[CardsPerEntry];
 
 public:
 
   // Set the region_ind to the given value, and delete all cards.
-  inline void init(short region_ind);
+  inline void init(RegionIdx_t region_ind);
 
-  short r_ind() const { return _region_ind; }
+  RegionIdx_t r_ind() const { return _region_ind; }
   bool valid_entry() const { return r_ind() >= 0; }
-  void set_r_ind(short rind) { _region_ind = rind; }
+  void set_r_ind(RegionIdx_t rind) { _region_ind = rind; }
 
-  short next_index() const { return _next_index; }
-  short* next_index_addr() { return &_next_index; }
-  void set_next_index(short ni) { _next_index = ni; }
+  int next_index() const { return _next_index; }
+  int* next_index_addr() { return &_next_index; }
+  void set_next_index(int ni) { _next_index = ni; }
 
   // Returns "true" iff the entry contains the given card index.
-  inline bool contains_card(short card_index) const;
+  inline bool contains_card(CardIdx_t card_index) const;
 
   // Returns the number of non-NULL card entries.
   inline int num_valid_cards() const;
@@ -73,14 +73,14 @@
     found,
     added
   };
-  inline AddCardResult add_card(short card_index);
+  inline AddCardResult add_card(CardIdx_t card_index);
 
   // Copy the current entry's cards into "cards".
-  inline void copy_cards(short* cards) const;
+  inline void copy_cards(CardIdx_t* cards) const;
   // Copy the current entry's cards into the "_card" array of "e."
   inline void copy_cards(SparsePRTEntry* e) const;
 
-  inline short card(int i) const { return _cards[i]; }
+  inline CardIdx_t card(int i) const { return _cards[i]; }
 };
 
 
@@ -98,9 +98,9 @@
   size_t _occupied_cards;
 
   SparsePRTEntry* _entries;
-  short* _buckets;
-  short  _free_region;
-  short  _free_list;
+  int* _buckets;
+  int  _free_region;
+  int  _free_list;
 
   static RSHashTable* _head_deleted_list;
   RSHashTable* _next_deleted;
@@ -113,20 +113,20 @@
   // operations, and that the the table be less than completely full.  If
   // an entry for "region_ind" is already in the table, finds it and
   // returns its address; otherwise returns "NULL."
-  SparsePRTEntry* entry_for_region_ind(short region_ind) const;
+  SparsePRTEntry* entry_for_region_ind(RegionIdx_t region_ind) const;
 
   // Requires that the caller hold a lock preventing parallel modifying
   // operations, and that the the table be less than completely full.  If
   // an entry for "region_ind" is already in the table, finds it and
   // returns its address; otherwise allocates, initializes, inserts and
   // returns a new entry for "region_ind".
-  SparsePRTEntry* entry_for_region_ind_create(short region_ind);
+  SparsePRTEntry* entry_for_region_ind_create(RegionIdx_t region_ind);
 
   // Returns the index of the next free entry in "_entries".
-  short alloc_entry();
+  int alloc_entry();
   // Declares the entry "fi" to be free.  (It must have already been
   // deleted from any bucket lists.
-  void free_entry(short fi);
+  void free_entry(int fi);
 
 public:
   RSHashTable(size_t capacity);
@@ -138,12 +138,12 @@
   // Otherwise, returns "false" to indicate that the addition would
   // overflow the entry for the region.  The caller must transfer these
   // entries to a larger-capacity representation.
-  bool add_card(short region_id, short card_index);
+  bool add_card(RegionIdx_t region_id, CardIdx_t card_index);
 
-  bool get_cards(short region_id, short* cards);
-  bool delete_entry(short region_id);
+  bool get_cards(RegionIdx_t region_id, CardIdx_t* cards);
+  bool delete_entry(RegionIdx_t region_id);
 
-  bool contains_card(short region_id, short card_index) const;
+  bool contains_card(RegionIdx_t region_id, CardIdx_t card_index) const;
 
   void add_entry(SparsePRTEntry* e);
 
@@ -162,51 +162,49 @@
 
   static void add_to_deleted_list(RSHashTable* rsht);
   static RSHashTable* get_from_deleted_list();
-
-
 };
 
-  // ValueObj because will be embedded in HRRS iterator.
+// ValueObj because will be embedded in HRRS iterator.
 class RSHashTableIter VALUE_OBJ_CLASS_SPEC {
-    short _tbl_ind;
-    short _bl_ind;
-    short _card_ind;
-    RSHashTable* _rsht;
-    size_t _heap_bot_card_ind;
+  int _tbl_ind;         // [-1, 0.._rsht->_capacity)
+  int _bl_ind;          // [-1, 0.._rsht->_capacity)
+  short _card_ind;      // [0..CardsPerEntry)
+  RSHashTable* _rsht;
+  size_t _heap_bot_card_ind;
 
-    enum SomePrivateConstants {
-      CardsPerRegion = HeapRegion::GrainBytes >> CardTableModRefBS::card_shift
-    };
+  enum SomePrivateConstants {
+    CardsPerRegion = HeapRegion::GrainBytes >> CardTableModRefBS::card_shift
+  };
+
+  // If the bucket list pointed to by _bl_ind contains a card, sets
+  // _bl_ind to the index of that entry, and returns the card.
+  // Otherwise, returns SparseEntry::NullEntry.
+  CardIdx_t find_first_card_in_list();
 
-    // If the bucket list pointed to by _bl_ind contains a card, sets
-    // _bl_ind to the index of that entry, and returns the card.
-    // Otherwise, returns SparseEntry::NullEnty.
-    short find_first_card_in_list();
-    // Computes the proper card index for the card whose offset in the
-    // current region (as indicated by _bl_ind) is "ci".
-    // This is subject to errors when there is iteration concurrent with
-    // modification, but these errors should be benign.
-    size_t compute_card_ind(short ci);
+  // Computes the proper card index for the card whose offset in the
+  // current region (as indicated by _bl_ind) is "ci".
+  // This is subject to errors when there is iteration concurrent with
+  // modification, but these errors should be benign.
+  size_t compute_card_ind(CardIdx_t ci);
 
-  public:
-    RSHashTableIter(size_t heap_bot_card_ind) :
-      _tbl_ind(RSHashTable::NullEntry),
-      _bl_ind(RSHashTable::NullEntry),
-      _card_ind((SparsePRTEntry::CardsPerEntry-1)),
-      _rsht(NULL),
-      _heap_bot_card_ind(heap_bot_card_ind)
-    {}
+public:
+  RSHashTableIter(size_t heap_bot_card_ind) :
+    _tbl_ind(RSHashTable::NullEntry),
+    _bl_ind(RSHashTable::NullEntry),
+    _card_ind((SparsePRTEntry::CardsPerEntry-1)),
+    _rsht(NULL),
+    _heap_bot_card_ind(heap_bot_card_ind)
+  {}
 
-    void init(RSHashTable* rsht) {
-      _rsht = rsht;
-      _tbl_ind = -1; // So that first increment gets to 0.
-      _bl_ind = RSHashTable::NullEntry;
-      _card_ind = (SparsePRTEntry::CardsPerEntry-1);
-    }
+  void init(RSHashTable* rsht) {
+    _rsht = rsht;
+    _tbl_ind = -1; // So that first increment gets to 0.
+    _bl_ind = RSHashTable::NullEntry;
+    _card_ind = (SparsePRTEntry::CardsPerEntry-1);
+  }
 
-    bool has_next(size_t& card_index);
-
-  };
+  bool has_next(size_t& card_index);
+};
 
 // Concurrent accesss to a SparsePRT must be serialized by some external
 // mutex.
@@ -238,7 +236,6 @@
   SparsePRT* next_expanded() { return _next_expanded; }
   void set_next_expanded(SparsePRT* nxt) { _next_expanded = nxt; }
 
-
   static SparsePRT* _head_expanded_list;
 
 public:
@@ -255,16 +252,16 @@
   // Otherwise, returns "false" to indicate that the addition would
   // overflow the entry for the region.  The caller must transfer these
   // entries to a larger-capacity representation.
-  bool add_card(short region_id, short card_index);
+  bool add_card(RegionIdx_t region_id, CardIdx_t card_index);
 
   // If the table hold an entry for "region_ind",  Copies its
   // cards into "cards", which must be an array of length at least
   // "CardsPerEntry", and returns "true"; otherwise, returns "false".
-  bool get_cards(short region_ind, short* cards);
+  bool get_cards(RegionIdx_t region_ind, CardIdx_t* cards);
 
   // If there is an entry for "region_ind", removes it and return "true";
   // otherwise returns "false."
-  bool delete_entry(short region_ind);
+  bool delete_entry(RegionIdx_t region_ind);
 
   // Clear the table, and reinitialize to initial capacity.
   void clear();
@@ -276,13 +273,12 @@
   static void cleanup_all();
   RSHashTable* cur() const { return _cur; }
 
-
   void init_iterator(SparsePRTIter* sprt_iter);
 
   static void add_to_expanded_list(SparsePRT* sprt);
   static SparsePRT* get_from_expanded_list();
 
-  bool contains_card(short region_id, short card_index) const {
+  bool contains_card(RegionIdx_t region_id, CardIdx_t card_index) const {
     return _next->contains_card(region_id, card_index);
   }
 
--- a/src/share/vm/gc_implementation/includeDB_gc_g1	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/includeDB_gc_g1	Wed Jul 01 15:06:54 2009 -0700
@@ -49,6 +49,7 @@
 
 concurrentG1Refine.hpp			globalDefinitions.hpp
 concurrentG1Refine.hpp			allocation.hpp
+concurrentG1Refine.hpp			thread.hpp
 
 concurrentG1RefineThread.cpp		concurrentG1Refine.hpp
 concurrentG1RefineThread.cpp		concurrentG1RefineThread.hpp
@@ -280,6 +281,7 @@
 
 heapRegionRemSet.cpp                    allocation.hpp
 heapRegionRemSet.cpp                    bitMap.inline.hpp
+heapRegionRemSet.cpp                    concurrentG1Refine.hpp
 heapRegionRemSet.cpp                    g1BlockOffsetTable.inline.hpp
 heapRegionRemSet.cpp                    g1CollectedHeap.inline.hpp
 heapRegionRemSet.cpp                    heapRegionRemSet.hpp
@@ -331,6 +333,7 @@
 sparsePRT.hpp				allocation.hpp
 sparsePRT.hpp				cardTableModRefBS.hpp
 sparsePRT.hpp				globalDefinitions.hpp
+sparsePRT.hpp                           g1CollectedHeap.inline.hpp
 sparsePRT.hpp				heapRegion.hpp
 sparsePRT.hpp				mutex.hpp
 
--- a/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.hpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.hpp	Wed Jul 01 15:06:54 2009 -0700
@@ -177,6 +177,7 @@
   // are double-word aligned in 32-bit VMs, but not in 64-bit VMs, so the 32-bit
   // granularity is 2, 64-bit is 1.
   static inline size_t obj_granularity() { return size_t(MinObjAlignment); }
+  static inline int obj_granularity_shift() { return LogMinObjAlignment; }
 
   HeapWord*       _region_start;
   size_t          _region_size;
@@ -299,13 +300,13 @@
 inline size_t
 ParMarkBitMap::bits_to_words(idx_t bits)
 {
-  return bits * obj_granularity();
+  return bits << obj_granularity_shift();
 }
 
 inline ParMarkBitMap::idx_t
 ParMarkBitMap::words_to_bits(size_t words)
 {
-  return words / obj_granularity();
+  return words >> obj_granularity_shift();
 }
 
 inline size_t ParMarkBitMap::obj_size(idx_t beg_bit, idx_t end_bit) const
--- a/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp	Wed Jul 01 15:06:54 2009 -0700
@@ -27,13 +27,12 @@
 # include "incls/_precompiled.incl"
 # include "incls/_concurrentGCThread.cpp.incl"
 
-bool ConcurrentGCThread::_should_terminate    = false;
-bool ConcurrentGCThread::_has_terminated      = false;
 int  ConcurrentGCThread::_CGC_flag            = CGC_nil;
 
 SuspendibleThreadSet ConcurrentGCThread::_sts;
 
-ConcurrentGCThread::ConcurrentGCThread() {
+ConcurrentGCThread::ConcurrentGCThread() :
+  _should_terminate(false), _has_terminated(false) {
   _sts.initialize();
 };
 
--- a/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp	Wed Jul 01 15:06:54 2009 -0700
@@ -72,8 +72,8 @@
   friend class VMStructs;
 
 protected:
-  static bool _should_terminate;
-  static bool _has_terminated;
+  bool _should_terminate;
+  bool _has_terminated;
 
   enum CGC_flag_type {
     CGC_nil           = 0x0,
--- a/src/share/vm/includeDB_compiler1	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/includeDB_compiler1	Wed Jul 01 15:06:54 2009 -0700
@@ -387,7 +387,7 @@
 c1_ValueSet.cpp                         c1_ValueSet.hpp
 
 c1_ValueSet.hpp                         allocation.hpp
-c1_ValueSet.hpp                         bitMap.hpp
+c1_ValueSet.hpp                         bitMap.inline.hpp
 c1_ValueSet.hpp                         c1_Instruction.hpp
 
 c1_ValueStack.cpp                       c1_IR.hpp
--- a/src/share/vm/memory/cardTableRS.cpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/memory/cardTableRS.cpp	Wed Jul 01 15:06:54 2009 -0700
@@ -33,12 +33,8 @@
 {
 #ifndef SERIALGC
   if (UseG1GC) {
-    if (G1RSBarrierUseQueue) {
       _ct_bs = new G1SATBCardTableLoggingModRefBS(whole_heap,
                                                   max_covered_regions);
-    } else {
-      _ct_bs = new G1SATBCardTableModRefBS(whole_heap, max_covered_regions);
-    }
   } else {
     _ct_bs = new CardTableModRefBSForCTRS(whole_heap, max_covered_regions);
   }
--- a/src/share/vm/memory/gcLocker.hpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/memory/gcLocker.hpp	Wed Jul 01 15:06:54 2009 -0700
@@ -242,6 +242,31 @@
 #endif
 };
 
+// A SkipGCALot object is used to elide the usual effect of gc-a-lot
+// over a section of execution by a thread. Currently, it's used only to
+// prevent re-entrant calls to GC.
+class SkipGCALot : public StackObj {
+  private:
+   bool _saved;
+   Thread* _t;
+
+  public:
+#ifdef ASSERT
+    SkipGCALot(Thread* t) : _t(t) {
+      _saved = _t->skip_gcalot();
+      _t->set_skip_gcalot(true);
+    }
+
+    ~SkipGCALot() {
+      assert(_t->skip_gcalot(), "Save-restore protocol invariant");
+      _t->set_skip_gcalot(_saved);
+    }
+#else
+    SkipGCALot(Thread* t) { }
+    ~SkipGCALot() { }
+#endif
+};
+
 // JRT_LEAF currently can be called from either _thread_in_Java or
 // _thread_in_native mode. In _thread_in_native, it is ok
 // for another thread to trigger GC. The rest of the JRT_LEAF
--- a/src/share/vm/runtime/interfaceSupport.cpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/runtime/interfaceSupport.cpp	Wed Jul 01 15:06:54 2009 -0700
@@ -66,11 +66,14 @@
 
 void InterfaceSupport::gc_alot() {
   Thread *thread = Thread::current();
-  if (thread->is_VM_thread()) return; // Avoid concurrent calls
+  if (!thread->is_Java_thread()) return; // Avoid concurrent calls
   // Check for new, not quite initialized thread. A thread in new mode cannot initiate a GC.
   JavaThread *current_thread = (JavaThread *)thread;
   if (current_thread->active_handles() == NULL) return;
 
+  // Short-circuit any possible re-entrant gc-a-lot attempt
+  if (thread->skip_gcalot()) return;
+
   if (is_init_completed()) {
 
     if (++_fullgc_alot_invocation < FullGCALotStart) {
--- a/src/share/vm/runtime/mutexLocker.cpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/runtime/mutexLocker.cpp	Wed Jul 01 15:06:54 2009 -0700
@@ -70,7 +70,6 @@
 Monitor* CMark_lock                   = NULL;
 Monitor* ZF_mon                       = NULL;
 Monitor* Cleanup_mon                  = NULL;
-Monitor* G1ConcRefine_mon             = NULL;
 Mutex*   SATB_Q_FL_lock               = NULL;
 Monitor* SATB_Q_CBL_mon               = NULL;
 Mutex*   Shared_SATB_Q_lock           = NULL;
@@ -168,7 +167,6 @@
     def(CMark_lock                 , Monitor, nonleaf,     true ); // coordinate concurrent mark thread
     def(ZF_mon                     , Monitor, leaf,        true );
     def(Cleanup_mon                , Monitor, nonleaf,     true );
-    def(G1ConcRefine_mon           , Monitor, nonleaf,     true );
     def(SATB_Q_FL_lock             , Mutex  , special,     true );
     def(SATB_Q_CBL_mon             , Monitor, nonleaf,     true );
     def(Shared_SATB_Q_lock         , Mutex,   nonleaf,     true );
--- a/src/share/vm/runtime/mutexLocker.hpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/runtime/mutexLocker.hpp	Wed Jul 01 15:06:54 2009 -0700
@@ -63,9 +63,6 @@
 extern Monitor* CMark_lock;                      // used for concurrent mark thread coordination
 extern Monitor* ZF_mon;                          // used for G1 conc zero-fill.
 extern Monitor* Cleanup_mon;                     // used for G1 conc cleanup.
-extern Monitor* G1ConcRefine_mon;                // used for G1 conc-refine
-                                                 // coordination.
-
 extern Mutex*   SATB_Q_FL_lock;                  // Protects SATB Q
                                                  // buffer free list.
 extern Monitor* SATB_Q_CBL_mon;                  // Protects SATB Q
--- a/src/share/vm/runtime/thread.cpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/runtime/thread.cpp	Wed Jul 01 15:06:54 2009 -0700
@@ -127,6 +127,7 @@
   debug_only(_owned_locks = NULL;)
   debug_only(_allow_allocation_count = 0;)
   NOT_PRODUCT(_allow_safepoint_count = 0;)
+  NOT_PRODUCT(_skip_gcalot = false;)
   CHECK_UNHANDLED_OOPS_ONLY(_gc_locked_out_count = 0;)
   _jvmti_env_iteration_count = 0;
   _vm_operation_started_count = 0;
@@ -784,7 +785,6 @@
       // We could enter a safepoint here and thus have a gc
       InterfaceSupport::check_gc_alot();
     }
-
 #endif
 }
 #endif
--- a/src/share/vm/runtime/thread.hpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/runtime/thread.hpp	Wed Jul 01 15:06:54 2009 -0700
@@ -191,6 +191,9 @@
   NOT_PRODUCT(int _allow_safepoint_count;)       // If 0, thread allow a safepoint to happen
   debug_only (int _allow_allocation_count;)      // If 0, the thread is allowed to allocate oops.
 
+  // Used by SkipGCALot class.
+  NOT_PRODUCT(bool _skip_gcalot;)                // Should we elide gc-a-lot?
+
   // Record when GC is locked out via the GC_locker mechanism
   CHECK_UNHANDLED_OOPS_ONLY(int _gc_locked_out_count;)
 
@@ -308,6 +311,11 @@
   bool is_gc_locked_out() { return _gc_locked_out_count > 0; }
 #endif // CHECK_UNHANDLED_OOPS
 
+#ifndef PRODUCT
+  bool skip_gcalot()           { return _skip_gcalot; }
+  void set_skip_gcalot(bool v) { _skip_gcalot = v;    }
+#endif
+
  public:
   // Installs a pending exception to be inserted later
   static void send_async_exception(oop thread_oop, oop java_throwable);
--- a/src/share/vm/runtime/vmThread.cpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/runtime/vmThread.cpp	Wed Jul 01 15:06:54 2009 -0700
@@ -531,6 +531,7 @@
   Thread* t = Thread::current();
 
   if (!t->is_VM_thread()) {
+    SkipGCALot sgcalot(t);    // avoid re-entrant attempts to gc-a-lot
     // JavaThread or WatcherThread
     t->check_for_valid_safepoint_state(true);
 
--- a/src/share/vm/utilities/bitMap.cpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/utilities/bitMap.cpp	Wed Jul 01 15:06:54 2009 -0700
@@ -41,19 +41,6 @@
   resize(size_in_bits, in_resource_area);
 }
 
-
-void BitMap::verify_index(idx_t index) const {
-    assert(index < _size, "BitMap index out of bounds");
-}
-
-void BitMap::verify_range(idx_t beg_index, idx_t end_index) const {
-#ifdef ASSERT
-    assert(beg_index <= end_index, "BitMap range error");
-    // Note that [0,0) and [size,size) are both valid ranges.
-    if (end_index != _size)  verify_index(end_index);
-#endif
-}
-
 void BitMap::resize(idx_t size_in_bits, bool in_resource_area) {
   assert(size_in_bits >= 0, "just checking");
   idx_t old_size_in_words = size_in_words();
--- a/src/share/vm/utilities/bitMap.hpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/utilities/bitMap.hpp	Wed Jul 01 15:06:54 2009 -0700
@@ -93,10 +93,12 @@
   // The index of the first full word in a range.
   idx_t word_index_round_up(idx_t bit) const;
 
-  // Verification, statistics.
-  void verify_index(idx_t index) const;
-  void verify_range(idx_t beg_index, idx_t end_index) const;
+  // Verification.
+  inline void verify_index(idx_t index) const NOT_DEBUG_RETURN;
+  inline void verify_range(idx_t beg_index, idx_t end_index) const
+    NOT_DEBUG_RETURN;
 
+  // Statistics.
   static idx_t* _pop_count_table;
   static void init_pop_count_table();
   static idx_t num_set_bits(bm_word_t w);
@@ -287,7 +289,6 @@
 #endif
 };
 
-
 // Convenience class wrapping BitMap which provides multiple bits per slot.
 class BitMap2D VALUE_OBJ_CLASS_SPEC {
  public:
--- a/src/share/vm/utilities/bitMap.inline.hpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/utilities/bitMap.inline.hpp	Wed Jul 01 15:06:54 2009 -0700
@@ -22,6 +22,17 @@
  *
  */
 
+#ifdef ASSERT
+inline void BitMap::verify_index(idx_t index) const {
+  assert(index < _size, "BitMap index out of bounds");
+}
+
+inline void BitMap::verify_range(idx_t beg_index, idx_t end_index) const {
+  assert(beg_index <= end_index, "BitMap range error");
+  // Note that [0,0) and [size,size) are both valid ranges.
+  if (end_index != _size) verify_index(end_index);
+}
+#endif // #ifdef ASSERT
 
 inline void BitMap::set_bit(idx_t bit) {
   verify_index(bit);
--- a/src/share/vm/utilities/macros.hpp	Wed Jul 01 12:22:23 2009 -0700
+++ b/src/share/vm/utilities/macros.hpp	Wed Jul 01 15:06:54 2009 -0700
@@ -106,11 +106,13 @@
 #ifdef ASSERT
 #define DEBUG_ONLY(code) code
 #define NOT_DEBUG(code)
+#define NOT_DEBUG_RETURN  /*next token must be ;*/
 // Historical.
 #define debug_only(code) code
 #else // ASSERT
 #define DEBUG_ONLY(code)
 #define NOT_DEBUG(code) code
+#define NOT_DEBUG_RETURN {}
 #define debug_only(code)
 #endif // ASSERT