# HG changeset patch
# User jcoomes
# Date 1319841376 25200
# Node ID 6534482ff68ad79066dfe15dfb6d8905f09681bd
# Parent  02fe430d493e72bfcdecf85849171a974ea08424# Parent  8487c835efbfd2ff42f829726f46f7fc1ea909b5
Merge

diff -r 02fe430d493e -r 6534482ff68a make/hotspot_version
--- a/make/hotspot_version	Thu Oct 27 13:54:31 2011 -0700
+++ b/make/hotspot_version	Fri Oct 28 15:36:16 2011 -0700
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=23
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=03
+HS_BUILD_NUMBER=04
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Thu Oct 27 13:54:31 2011 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Fri Oct 28 15:36:16 2011 -0700
@@ -62,7 +62,7 @@
   MinChunkSize = numQuanta(sizeof(FreeChunk), MinObjAlignmentInBytes) * MinObjAlignment;
 
   assert(IndexSetStart == 0 && IndexSetStride == 0, "already set");
-  IndexSetStart  = MinObjAlignment;
+  IndexSetStart  = (int) MinChunkSize;
   IndexSetStride = MinObjAlignment;
 }
 
@@ -138,7 +138,7 @@
   } else {
     _fitStrategy = FreeBlockStrategyNone;
   }
-  checkFreeListConsistency();
+  check_free_list_consistency();
 
   // Initialize locks for parallel case.
 
@@ -1358,17 +1358,29 @@
   ShouldNotReachHere();
 }
 
-bool CompactibleFreeListSpace::verifyChunkInIndexedFreeLists(FreeChunk* fc)
-  const {
+bool CompactibleFreeListSpace::verifyChunkInIndexedFreeLists(FreeChunk* fc) const {
   assert(fc->size() < IndexSetSize, "Size of chunk is too large");
   return _indexedFreeList[fc->size()].verifyChunkInFreeLists(fc);
 }
 
+bool CompactibleFreeListSpace::verify_chunk_is_linear_alloc_block(FreeChunk* fc) const {
+  assert((_smallLinearAllocBlock._ptr != (HeapWord*)fc) ||
+         (_smallLinearAllocBlock._word_size == fc->size()),
+         "Linear allocation block shows incorrect size");
+  return ((_smallLinearAllocBlock._ptr == (HeapWord*)fc) &&
+          (_smallLinearAllocBlock._word_size == fc->size()));
+}
+
+// Check if the purported free chunk is present either as a linear
+// allocation block, the size-indexed table of (smaller) free blocks,
+// or the larger free blocks kept in the binary tree dictionary.
 bool CompactibleFreeListSpace::verifyChunkInFreeLists(FreeChunk* fc) const {
-  if (fc->size() >= IndexSetSize) {
+  if (verify_chunk_is_linear_alloc_block(fc)) {
+    return true;
+  } else if (fc->size() < IndexSetSize) {
+    return verifyChunkInIndexedFreeLists(fc);
+  } else {
     return dictionary()->verifyChunkInFreeLists(fc);
-  } else {
-    return verifyChunkInIndexedFreeLists(fc);
   }
 }
 
@@ -2495,7 +2507,8 @@
   FreeChunk* tail =  _indexedFreeList[size].tail();
   size_t    num = _indexedFreeList[size].count();
   size_t      n = 0;
-  guarantee((size % 2 == 0) || fc == NULL, "Odd slots should be empty");
+  guarantee(((size >= MinChunkSize) && (size % IndexSetStride == 0)) || fc == NULL,
+            "Slot should have been empty");
   for (; fc != NULL; fc = fc->next(), n++) {
     guarantee(fc->size() == size, "Size inconsistency");
     guarantee(fc->isFree(), "!free?");
@@ -2506,14 +2519,14 @@
 }
 
 #ifndef PRODUCT
-void CompactibleFreeListSpace::checkFreeListConsistency() const {
+void CompactibleFreeListSpace::check_free_list_consistency() const {
   assert(_dictionary->minSize() <= IndexSetSize,
     "Some sizes can't be allocated without recourse to"
     " linear allocation buffers");
   assert(MIN_TREE_CHUNK_SIZE*HeapWordSize == sizeof(TreeChunk),
     "else MIN_TREE_CHUNK_SIZE is wrong");
-  assert((IndexSetStride == 2 && IndexSetStart == 2) ||
-         (IndexSetStride == 1 && IndexSetStart == 1), "just checking");
+  assert((IndexSetStride == 2 && IndexSetStart == 4) ||                   // 32-bit
+         (IndexSetStride == 1 && IndexSetStart == 3), "just checking");   // 64-bit
   assert((IndexSetStride != 2) || (MinChunkSize % 2 == 0),
       "Some for-loops may be incorrectly initialized");
   assert((IndexSetStride != 2) || (IndexSetSize % 2 == 1),
@@ -2688,33 +2701,27 @@
   }
 }
 
+// If this is changed in the future to allow parallel
+// access, one would need to take the FL locks and,
+// depending on how it is used, stagger access from
+// parallel threads to reduce contention.
 void CFLS_LAB::retire(int tid) {
   // We run this single threaded with the world stopped;
   // so no need for locks and such.
-#define CFLS_LAB_PARALLEL_ACCESS 0
   NOT_PRODUCT(Thread* t = Thread::current();)
   assert(Thread::current()->is_VM_thread(), "Error");
-  assert(CompactibleFreeListSpace::IndexSetStart == CompactibleFreeListSpace::IndexSetStride,
-         "Will access to uninitialized slot below");
-#if CFLS_LAB_PARALLEL_ACCESS
-  for (size_t i = CompactibleFreeListSpace::IndexSetSize - 1;
-       i > 0;
-       i -= CompactibleFreeListSpace::IndexSetStride) {
-#else // CFLS_LAB_PARALLEL_ACCESS
   for (size_t i =  CompactibleFreeListSpace::IndexSetStart;
        i < CompactibleFreeListSpace::IndexSetSize;
        i += CompactibleFreeListSpace::IndexSetStride) {
-#endif // !CFLS_LAB_PARALLEL_ACCESS
     assert(_num_blocks[i] >= (size_t)_indexedFreeList[i].count(),
            "Can't retire more than what we obtained");
     if (_num_blocks[i] > 0) {
       size_t num_retire =  _indexedFreeList[i].count();
       assert(_num_blocks[i] > num_retire, "Should have used at least one");
       {
-#if CFLS_LAB_PARALLEL_ACCESS
-        MutexLockerEx x(_cfls->_indexedFreeListParLocks[i],
-                        Mutex::_no_safepoint_check_flag);
-#endif // CFLS_LAB_PARALLEL_ACCESS
+        // MutexLockerEx x(_cfls->_indexedFreeListParLocks[i],
+        //                Mutex::_no_safepoint_check_flag);
+
         // Update globals stats for num_blocks used
         _global_num_blocks[i] += (_num_blocks[i] - num_retire);
         _global_num_workers[i]++;
diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Thu Oct 27 13:54:31 2011 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Fri Oct 28 15:36:16 2011 -0700
@@ -502,10 +502,14 @@
   void verifyFreeLists()                  const PRODUCT_RETURN;
   void verifyIndexedFreeLists()           const;
   void verifyIndexedFreeList(size_t size) const;
-  // verify that the given chunk is in the free lists.
+  // Verify that the given chunk is in the free lists:
+  // i.e. either the binary tree dictionary, the indexed free lists
+  // or the linear allocation block.
   bool verifyChunkInFreeLists(FreeChunk* fc) const;
+  // Verify that the given chunk is the linear allocation block
+  bool verify_chunk_is_linear_alloc_block(FreeChunk* fc) const;
   // Do some basic checks on the the free lists.
-  void checkFreeListConsistency()         const PRODUCT_RETURN;
+  void check_free_list_consistency()      const PRODUCT_RETURN;
 
   // Printing support
   void dump_at_safepoint_with_locks(CMSCollector* c, outputStream* st);
diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp
--- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Thu Oct 27 13:54:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Fri Oct 28 15:36:16 2011 -0700
@@ -147,12 +147,8 @@
           }
         }
       } while (cm()->restart_for_overflow());
+
       double counting_start_time = os::elapsedVTime();
-
-      // YSR: These look dubious (i.e. redundant) !!! FIX ME
-      slt()->manipulatePLL(SurrogateLockerThread::acquirePLL);
-      slt()->manipulatePLL(SurrogateLockerThread::releaseAndNotifyPLL);
-
       if (!cm()->has_aborted()) {
         double count_start_sec = os::elapsedTime();
         if (PrintGC) {
@@ -175,6 +171,7 @@
           }
         }
       }
+
       double end_time = os::elapsedVTime();
       _vtime_count_accum += (end_time - counting_start_time);
       // Update the total virtual time before doing this, since it will try
@@ -335,13 +332,15 @@
   clear_started();
 }
 
-// Note: this method, although exported by the ConcurrentMarkSweepThread,
-// which is a non-JavaThread, can only be called by a JavaThread.
-// Currently this is done at vm creation time (post-vm-init) by the
-// main/Primordial (Java)Thread.
-// XXX Consider changing this in the future to allow the CMS thread
+// Note: As is the case with CMS - this method, although exported
+// by the ConcurrentMarkThread, which is a non-JavaThread, can only
+// be called by a JavaThread. Currently this is done at vm creation
+// time (post-vm-init) by the main/Primordial (Java)Thread.
+// XXX Consider changing this in the future to allow the CM thread
 // itself to create this thread?
 void ConcurrentMarkThread::makeSurrogateLockerThread(TRAPS) {
+  assert(UseG1GC, "SLT thread needed only for concurrent GC");
+  assert(THREAD->is_Java_thread(), "must be a Java thread");
   assert(_slt == NULL, "SLT already created");
   _slt = SurrogateLockerThread::make(THREAD);
 }
diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Oct 27 13:54:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Oct 28 15:36:16 2011 -0700
@@ -5502,34 +5502,36 @@
   CardTableModRefBS* ct_bs = (CardTableModRefBS*) (barrier_set());
   double start = os::elapsedTime();
 
-  // Iterate over the dirty cards region list.
-  G1ParCleanupCTTask cleanup_task(ct_bs, this);
-
-  if (ParallelGCThreads > 0) {
-    set_par_threads(workers()->total_workers());
-    workers()->run_task(&cleanup_task);
-    set_par_threads(0);
-  } else {
-    while (_dirty_cards_region_list) {
-      HeapRegion* r = _dirty_cards_region_list;
-      cleanup_task.clear_cards(r);
-      _dirty_cards_region_list = r->get_next_dirty_cards_region();
-      if (_dirty_cards_region_list == r) {
-        // The last region.
-        _dirty_cards_region_list = NULL;
+  {
+    // Iterate over the dirty cards region list.
+    G1ParCleanupCTTask cleanup_task(ct_bs, this);
+
+    if (ParallelGCThreads > 0) {
+      set_par_threads(workers()->total_workers());
+      workers()->run_task(&cleanup_task);
+      set_par_threads(0);
+    } else {
+      while (_dirty_cards_region_list) {
+        HeapRegion* r = _dirty_cards_region_list;
+        cleanup_task.clear_cards(r);
+        _dirty_cards_region_list = r->get_next_dirty_cards_region();
+        if (_dirty_cards_region_list == r) {
+          // The last region.
+          _dirty_cards_region_list = NULL;
+        }
+        r->set_next_dirty_cards_region(NULL);
       }
-      r->set_next_dirty_cards_region(NULL);
     }
+#ifndef PRODUCT
+    if (G1VerifyCTCleanup || VerifyAfterGC) {
+      G1VerifyCardTableCleanup cleanup_verifier(this, ct_bs);
+      heap_region_iterate(&cleanup_verifier);
+    }
+#endif
   }
 
   double elapsed = os::elapsedTime() - start;
   g1_policy()->record_clear_ct_time(elapsed * 1000.0);
-#ifndef PRODUCT
-  if (G1VerifyCTCleanup || VerifyAfterGC) {
-    G1VerifyCardTableCleanup cleanup_verifier(this, ct_bs);
-    heap_region_iterate(&cleanup_verifier);
-  }
-#endif
 }
 
 void G1CollectedHeap::free_collection_set(HeapRegion* cs_head) {
diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Oct 27 13:54:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Fri Oct 28 15:36:16 2011 -0700
@@ -320,6 +320,7 @@
   _par_last_termination_attempts = new double[_parallel_gc_threads];
   _par_last_gc_worker_end_times_ms = new double[_parallel_gc_threads];
   _par_last_gc_worker_times_ms = new double[_parallel_gc_threads];
+  _par_last_gc_worker_other_times_ms = new double[_parallel_gc_threads];
 
   // start conservatively
   _expensive_region_limit_ms = 0.5 * (double) MaxGCPauseMillis;
@@ -497,7 +498,6 @@
   initialize_gc_policy_counters();
 
   G1YoungGenSizer sizer;
-  size_t initial_region_num = sizer.initial_young_region_num();
   _min_desired_young_length = sizer.min_young_region_num();
   _max_desired_young_length = sizer.max_young_region_num();
 
@@ -511,17 +511,14 @@
     }
   }
 
-  // GenCollectorPolicy guarantees that min <= initial <= max.
-  // Asserting here just to state that we rely on this property.
   assert(_min_desired_young_length <= _max_desired_young_length, "Invalid min/max young gen size values");
-  assert(initial_region_num <= _max_desired_young_length, "Initial young gen size too large");
-  assert(_min_desired_young_length <= initial_region_num, "Initial young gen size too small");
 
   set_adaptive_young_list_length(_min_desired_young_length < _max_desired_young_length);
   if (adaptive_young_list_length()) {
     _young_list_fixed_length = 0;
   } else {
-    _young_list_fixed_length = initial_region_num;
+    assert(_min_desired_young_length == _max_desired_young_length, "Min and max young size differ");
+    _young_list_fixed_length = _min_desired_young_length;
   }
   _free_regions_at_end_of_collection = _g1->free_regions();
   update_young_list_target_length();
@@ -976,6 +973,7 @@
     _par_last_termination_attempts[i] = -1234.0;
     _par_last_gc_worker_end_times_ms[i] = -1234.0;
     _par_last_gc_worker_times_ms[i] = -1234.0;
+    _par_last_gc_worker_other_times_ms[i] = -1234.0;
   }
 #endif
 
@@ -984,8 +982,10 @@
     _cur_aux_times_set[i] = false;
   }
 
-  _satb_drain_time_set = false;
-  _last_satb_drain_processed_buffers = -1;
+  // These are initialized to zero here and they are set during
+  // the evacuation pause if marking is in progress.
+  _cur_satb_drain_time_ms = 0.0;
+  _last_satb_drain_processed_buffers = 0;
 
   _last_young_gc_full = false;
 
@@ -1097,61 +1097,65 @@
     (int)total, (int)avg, (int)min, (int)max, (int)max - (int)min);
 }
 
-void G1CollectorPolicy::print_stats (int level,
-                                     const char* str,
-                                     double value) {
+void G1CollectorPolicy::print_stats(int level,
+                                    const char* str,
+                                    double value) {
   LineBuffer(level).append_and_print_cr("[%s: %5.1lf ms]", str, value);
 }
 
-void G1CollectorPolicy::print_stats (int level,
-                                     const char* str,
-                                     int value) {
+void G1CollectorPolicy::print_stats(int level,
+                                    const char* str,
+                                    int value) {
   LineBuffer(level).append_and_print_cr("[%s: %d]", str, value);
 }
 
-double G1CollectorPolicy::avg_value (double* data) {
+double G1CollectorPolicy::avg_value(double* data) {
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     double ret = 0.0;
-    for (uint i = 0; i < ParallelGCThreads; ++i)
+    for (uint i = 0; i < ParallelGCThreads; ++i) {
       ret += data[i];
+    }
     return ret / (double) ParallelGCThreads;
   } else {
     return data[0];
   }
 }
 
-double G1CollectorPolicy::max_value (double* data) {
+double G1CollectorPolicy::max_value(double* data) {
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     double ret = data[0];
-    for (uint i = 1; i < ParallelGCThreads; ++i)
-      if (data[i] > ret)
+    for (uint i = 1; i < ParallelGCThreads; ++i) {
+      if (data[i] > ret) {
         ret = data[i];
+      }
+    }
     return ret;
   } else {
     return data[0];
   }
 }
 
-double G1CollectorPolicy::sum_of_values (double* data) {
+double G1CollectorPolicy::sum_of_values(double* data) {
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     double sum = 0.0;
-    for (uint i = 0; i < ParallelGCThreads; i++)
+    for (uint i = 0; i < ParallelGCThreads; i++) {
       sum += data[i];
+    }
     return sum;
   } else {
     return data[0];
   }
 }
 
-double G1CollectorPolicy::max_sum (double* data1,
-                                   double* data2) {
+double G1CollectorPolicy::max_sum(double* data1, double* data2) {
   double ret = data1[0] + data2[0];
 
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     for (uint i = 1; i < ParallelGCThreads; ++i) {
       double data = data1[i] + data2[i];
-      if (data > ret)
+      if (data > ret) {
         ret = data;
+      }
     }
   }
   return ret;
@@ -1251,6 +1255,10 @@
 
   _n_pauses++;
 
+  // These values are used to update the summary information that is
+  // displayed when TraceGen0Time is enabled, and are output as part
+  // of the PrintGCDetails output, in the non-parallel case.
+
   double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms);
   double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms);
   double update_rs_time = avg_value(_par_last_update_rs_times_ms);
@@ -1260,42 +1268,68 @@
   double obj_copy_time = avg_value(_par_last_obj_copy_times_ms);
   double termination_time = avg_value(_par_last_termination_times_ms);
 
-  double parallel_known_time = update_rs_time +
-                               ext_root_scan_time +
-                               mark_stack_scan_time +
-                               scan_rs_time +
-                               obj_copy_time +
-                               termination_time;
-
-  double parallel_other_time = _cur_collection_par_time_ms - parallel_known_time;
-
-  PauseSummary* summary = _summary;
+  double known_time = ext_root_scan_time +
+                      mark_stack_scan_time +
+                      update_rs_time +
+                      scan_rs_time +
+                      obj_copy_time;
+
+  double other_time_ms = elapsed_ms;
+
+  // Subtract the SATB drain time. It's initialized to zero at the
+  // start of the pause and is updated during the pause if marking
+  // is in progress.
+  other_time_ms -= _cur_satb_drain_time_ms;
+
+  if (parallel) {
+    other_time_ms -= _cur_collection_par_time_ms;
+  } else {
+    other_time_ms -= known_time;
+  }
+
+  // Subtract the time taken to clean the card table from the
+  // current value of "other time"
+  other_time_ms -= _cur_clear_ct_time_ms;
+
+  // TraceGen0Time and TraceGen1Time summary info updating.
+  _all_pause_times_ms->add(elapsed_ms);
 
   if (update_stats) {
     _recent_rs_scan_times_ms->add(scan_rs_time);
     _recent_pause_times_ms->add(elapsed_ms);
     _recent_rs_sizes->add(rs_size);
 
-    MainBodySummary* body_summary = summary->main_body_summary();
-    guarantee(body_summary != NULL, "should not be null!");
-
-    if (_satb_drain_time_set)
-      body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms);
-    else
-      body_summary->record_satb_drain_time_ms(0.0);
+    _summary->record_total_time_ms(elapsed_ms);
+    _summary->record_other_time_ms(other_time_ms);
+
+    MainBodySummary* body_summary = _summary->main_body_summary();
+    assert(body_summary != NULL, "should not be null!");
+
+    // This will be non-zero iff marking is currently in progress (i.e.
+    // _g1->mark_in_progress() == true) and the currrent pause was not
+    // an initial mark pause. Since the body_summary items are NumberSeqs,
+    // however, they have to be consistent and updated in lock-step with
+    // each other. Therefore we unconditionally record the SATB drain
+    // time - even if it's zero.
+    body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms);
 
     body_summary->record_ext_root_scan_time_ms(ext_root_scan_time);
     body_summary->record_mark_stack_scan_time_ms(mark_stack_scan_time);
     body_summary->record_update_rs_time_ms(update_rs_time);
     body_summary->record_scan_rs_time_ms(scan_rs_time);
     body_summary->record_obj_copy_time_ms(obj_copy_time);
+
     if (parallel) {
       body_summary->record_parallel_time_ms(_cur_collection_par_time_ms);
-      body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms);
       body_summary->record_termination_time_ms(termination_time);
+
+      double parallel_known_time = known_time + termination_time;
+      double parallel_other_time = _cur_collection_par_time_ms - parallel_known_time;
       body_summary->record_parallel_other_time_ms(parallel_other_time);
     }
+
     body_summary->record_mark_closure_time_ms(_mark_closure_time_ms);
+    body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms);
 
     // We exempt parallel collection from this check because Alloc Buffer
     // fragmentation can produce negative collections.  Same with evac
@@ -1307,6 +1341,7 @@
            || _g1->evacuation_failed()
            || surviving_bytes <= _collection_set_bytes_used_before,
            "Or else negative collection!");
+
     _recent_CS_bytes_used_before->add(_collection_set_bytes_used_before);
     _recent_CS_bytes_surviving->add(surviving_bytes);
 
@@ -1357,6 +1392,13 @@
     }
   }
 
+  for (int i = 0; i < _aux_num; ++i) {
+    if (_cur_aux_times_set[i]) {
+      _all_aux_times_ms[i].add(_cur_aux_times_ms[i]);
+    }
+  }
+
+
   if (G1PolicyVerbose > 1) {
     gclog_or_tty->print_cr("   Recording collection pause(%d)", _n_pauses);
   }
@@ -1383,61 +1425,60 @@
                            recent_avg_pause_time_ratio() * 100.0);
   }
 
-  double other_time_ms = elapsed_ms;
-
-  if (_satb_drain_time_set) {
-    other_time_ms -= _cur_satb_drain_time_ms;
-  }
-
-  if (parallel) {
-    other_time_ms -= _cur_collection_par_time_ms + _cur_clear_ct_time_ms;
-  } else {
-    other_time_ms -=
-      update_rs_time +
-      ext_root_scan_time + mark_stack_scan_time +
-      scan_rs_time + obj_copy_time;
-  }
-
+  // PrintGCDetails output
   if (PrintGCDetails) {
+    bool print_marking_info =
+      _g1->mark_in_progress() && !last_pause_included_initial_mark;
+
     gclog_or_tty->print_cr("%s, %1.8lf secs]",
                            (last_pause_included_initial_mark) ? " (initial-mark)" : "",
                            elapsed_ms / 1000.0);
 
-    if (_satb_drain_time_set) {
+    if (print_marking_info) {
       print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms);
-    }
-    if (_last_satb_drain_processed_buffers >= 0) {
       print_stats(2, "Processed Buffers", _last_satb_drain_processed_buffers);
     }
+
     if (parallel) {
       print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
-      print_par_stats(2, "GC Worker Start Time", _par_last_gc_worker_start_times_ms);
+      print_par_stats(2, "GC Worker Start", _par_last_gc_worker_start_times_ms);
+      print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms);
+      if (print_marking_info) {
+        print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms);
+      }
       print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
       print_par_sizes(3, "Processed Buffers", _par_last_update_rs_processed_buffers);
-      print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms);
-      print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms);
       print_par_stats(2, "Scan RS", _par_last_scan_rs_times_ms);
       print_par_stats(2, "Object Copy", _par_last_obj_copy_times_ms);
       print_par_stats(2, "Termination", _par_last_termination_times_ms);
       print_par_sizes(3, "Termination Attempts", _par_last_termination_attempts);
-      print_par_stats(2, "GC Worker End Time", _par_last_gc_worker_end_times_ms);
+      print_par_stats(2, "GC Worker End", _par_last_gc_worker_end_times_ms);
 
       for (int i = 0; i < _parallel_gc_threads; i++) {
         _par_last_gc_worker_times_ms[i] = _par_last_gc_worker_end_times_ms[i] - _par_last_gc_worker_start_times_ms[i];
+
+        double worker_known_time = _par_last_ext_root_scan_times_ms[i] +
+                                   _par_last_mark_stack_scan_times_ms[i] +
+                                   _par_last_update_rs_times_ms[i] +
+                                   _par_last_scan_rs_times_ms[i] +
+                                   _par_last_obj_copy_times_ms[i] +
+                                   _par_last_termination_times_ms[i];
+
+        _par_last_gc_worker_other_times_ms[i] = _cur_collection_par_time_ms - worker_known_time;
       }
-      print_par_stats(2, "GC Worker Times", _par_last_gc_worker_times_ms);
-
-      print_stats(2, "Parallel Other", parallel_other_time);
-      print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
+      print_par_stats(2, "GC Worker", _par_last_gc_worker_times_ms);
+      print_par_stats(2, "GC Worker Other", _par_last_gc_worker_other_times_ms);
     } else {
+      print_stats(1, "Ext Root Scanning", ext_root_scan_time);
+      if (print_marking_info) {
+        print_stats(1, "Mark Stack Scanning", mark_stack_scan_time);
+      }
       print_stats(1, "Update RS", update_rs_time);
-      print_stats(2, "Processed Buffers",
-                  (int)update_rs_processed_buffers);
-      print_stats(1, "Ext Root Scanning", ext_root_scan_time);
-      print_stats(1, "Mark Stack Scanning", mark_stack_scan_time);
+      print_stats(2, "Processed Buffers", (int)update_rs_processed_buffers);
       print_stats(1, "Scan RS", scan_rs_time);
       print_stats(1, "Object Copying", obj_copy_time);
     }
+    print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
 #ifndef PRODUCT
     print_stats(1, "Cur Clear CC", _cur_clear_cc_time_ms);
     print_stats(1, "Cum Clear CC", _cum_clear_cc_time_ms);
@@ -1461,16 +1502,6 @@
     }
   }
 
-  _all_pause_times_ms->add(elapsed_ms);
-  if (update_stats) {
-    summary->record_total_time_ms(elapsed_ms);
-    summary->record_other_time_ms(other_time_ms);
-  }
-  for (int i = 0; i < _aux_num; ++i)
-    if (_cur_aux_times_set[i]) {
-      _all_aux_times_ms[i].add(_cur_aux_times_ms[i]);
-    }
-
   // Update the efficiency-since-mark vars.
   double proc_ms = elapsed_ms * (double) _parallel_gc_threads;
   if (elapsed_ms < MIN_TIMER_GRANULARITY) {
@@ -2138,17 +2169,17 @@
   _g1->collection_set_iterate(&cs_closure);
 }
 
-void G1CollectorPolicy::print_summary (int level,
-                                       const char* str,
-                                       NumberSeq* seq) const {
+void G1CollectorPolicy::print_summary(int level,
+                                      const char* str,
+                                      NumberSeq* seq) const {
   double sum = seq->sum();
   LineBuffer(level + 1).append_and_print_cr("%-24s = %8.2lf s (avg = %8.2lf ms)",
                 str, sum / 1000.0, seq->avg());
 }
 
-void G1CollectorPolicy::print_summary_sd (int level,
-                                          const char* str,
-                                          NumberSeq* seq) const {
+void G1CollectorPolicy::print_summary_sd(int level,
+                                         const char* str,
+                                         NumberSeq* seq) const {
   print_summary(level, str, seq);
   LineBuffer(level + 6).append_and_print_cr("(num = %5d, std dev = %8.2lf ms, max = %8.2lf ms)",
                 seq->num(), seq->sd(), seq->maximum());
@@ -2211,20 +2242,18 @@
       print_summary(1, "SATB Drain", body_summary->get_satb_drain_seq());
       if (parallel) {
         print_summary(1, "Parallel Time", body_summary->get_parallel_seq());
+        print_summary(2, "Ext Root Scanning", body_summary->get_ext_root_scan_seq());
+        print_summary(2, "Mark Stack Scanning", body_summary->get_mark_stack_scan_seq());
         print_summary(2, "Update RS", body_summary->get_update_rs_seq());
-        print_summary(2, "Ext Root Scanning",
-                      body_summary->get_ext_root_scan_seq());
-        print_summary(2, "Mark Stack Scanning",
-                      body_summary->get_mark_stack_scan_seq());
         print_summary(2, "Scan RS", body_summary->get_scan_rs_seq());
         print_summary(2, "Object Copy", body_summary->get_obj_copy_seq());
         print_summary(2, "Termination", body_summary->get_termination_seq());
-        print_summary(2, "Other", body_summary->get_parallel_other_seq());
+        print_summary(2, "Parallel Other", body_summary->get_parallel_other_seq());
         {
           NumberSeq* other_parts[] = {
-            body_summary->get_update_rs_seq(),
             body_summary->get_ext_root_scan_seq(),
             body_summary->get_mark_stack_scan_seq(),
+            body_summary->get_update_rs_seq(),
             body_summary->get_scan_rs_seq(),
             body_summary->get_obj_copy_seq(),
             body_summary->get_termination_seq()
@@ -2234,18 +2263,16 @@
           check_other_times(2, body_summary->get_parallel_other_seq(),
                             &calc_other_times_ms);
         }
-        print_summary(1, "Mark Closure", body_summary->get_mark_closure_seq());
-        print_summary(1, "Clear CT", body_summary->get_clear_ct_seq());
       } else {
+        print_summary(1, "Ext Root Scanning", body_summary->get_ext_root_scan_seq());
+        print_summary(1, "Mark Stack Scanning", body_summary->get_mark_stack_scan_seq());
         print_summary(1, "Update RS", body_summary->get_update_rs_seq());
-        print_summary(1, "Ext Root Scanning",
-                      body_summary->get_ext_root_scan_seq());
-        print_summary(1, "Mark Stack Scanning",
-                      body_summary->get_mark_stack_scan_seq());
         print_summary(1, "Scan RS", body_summary->get_scan_rs_seq());
         print_summary(1, "Object Copy", body_summary->get_obj_copy_seq());
       }
     }
+    print_summary(1, "Mark Closure", body_summary->get_mark_closure_seq());
+    print_summary(1, "Clear CT", body_summary->get_clear_ct_seq());
     print_summary(1, "Other", summary->get_other_seq());
     {
       if (body_summary != NULL) {
diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Thu Oct 27 13:54:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Fri Oct 28 15:36:16 2011 -0700
@@ -74,7 +74,7 @@
     define_num_seq(termination) // parallel only
     define_num_seq(parallel_other) // parallel only
   define_num_seq(mark_closure)
-  define_num_seq(clear_ct)  // parallel only
+  define_num_seq(clear_ct)
 };
 
 class Summary: public PauseSummary,
@@ -115,7 +115,6 @@
   double _cur_collection_par_time_ms;
   double _cur_satb_drain_time_ms;
   double _cur_clear_ct_time_ms;
-  bool   _satb_drain_time_set;
   double _cur_ref_proc_time_ms;
   double _cur_ref_enq_time_ms;
 
@@ -176,6 +175,11 @@
   double* _par_last_gc_worker_end_times_ms;
   double* _par_last_gc_worker_times_ms;
 
+  // Each workers 'other' time i.e. the elapsed time of the parallel
+  // phase of the pause minus the sum of the individual sub-phase
+  // times for a given worker thread.
+  double* _par_last_gc_worker_other_times_ms;
+
   // indicates whether we are in full young or partially young GC mode
   bool _full_young_gcs;
 
@@ -892,11 +896,12 @@
   }
 
   void record_satb_drain_time(double ms) {
+    assert(_g1->mark_in_progress(), "shouldn't be here otherwise");
     _cur_satb_drain_time_ms = ms;
-    _satb_drain_time_set    = true;
   }
 
-  void record_satb_drain_processed_buffers (int processed_buffers) {
+  void record_satb_drain_processed_buffers(int processed_buffers) {
+    assert(_g1->mark_in_progress(), "shouldn't be here otherwise");
     _last_satb_drain_processed_buffers = processed_buffers;
   }
 
diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/g1/g1RemSet.cpp
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Thu Oct 27 13:54:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Fri Oct 28 15:36:16 2011 -0700
@@ -122,10 +122,10 @@
   void set_try_claimed() { _try_claimed = true; }
 
   void scanCard(size_t index, HeapRegion *r) {
-    DirtyCardToOopClosure* cl =
-      r->new_dcto_closure(_oc,
-                         CardTableModRefBS::Precise,
-                         HeapRegionDCTOC::IntoCSFilterKind);
+    // Stack allocate the DirtyCardToOopClosure instance
+    HeapRegionDCTOC cl(_g1h, r, _oc,
+                       CardTableModRefBS::Precise,
+                       HeapRegionDCTOC::IntoCSFilterKind);
 
     // Set the "from" region in the closure.
     _oc->set_region(r);
@@ -140,7 +140,7 @@
       // scans (the rsets of the regions in the cset can intersect).
       _ct_bs->set_card_claimed(index);
       _cards_done++;
-      cl->do_MemRegion(mr);
+      cl.do_MemRegion(mr);
     }
   }
 
diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/g1/heapRegion.cpp
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Thu Oct 27 13:54:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Fri Oct 28 15:36:16 2011 -0700
@@ -340,14 +340,6 @@
   init_top_at_mark_start();
 }
 
-DirtyCardToOopClosure*
-HeapRegion::new_dcto_closure(OopClosure* cl,
-                             CardTableModRefBS::PrecisionStyle precision,
-                             HeapRegionDCTOC::FilterKind fk) {
-  return new HeapRegionDCTOC(G1CollectedHeap::heap(),
-                             this, cl, precision, fk);
-}
-
 void HeapRegion::hr_clear(bool par, bool clear_space) {
   assert(_humongous_type == NotHumongous,
          "we should have already filtered out humongous regions");
diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/g1/heapRegion.hpp
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Thu Oct 27 13:54:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Fri Oct 28 15:36:16 2011 -0700
@@ -431,6 +431,14 @@
     return _humongous_start_region;
   }
 
+  // Same as Space::is_in_reserved, but will use the original size of the region.
+  // The original size is different only for start humongous regions. They get
+  // their _end set up to be the end of the last continues region of the
+  // corresponding humongous object.
+  bool is_in_reserved_raw(const void* p) const {
+    return _bottom <= p && p < _orig_end;
+  }
+
   // Makes the current region be a "starts humongous" region, i.e.,
   // the first region in a series of one or more contiguous regions
   // that will contain a single "humongous" object. The two parameters
@@ -569,11 +577,6 @@
   // allocated in the current region before the last call to "save_mark".
   void oop_before_save_marks_iterate(OopClosure* cl);
 
-  DirtyCardToOopClosure*
-  new_dcto_closure(OopClosure* cl,
-                   CardTableModRefBS::PrecisionStyle precision,
-                   HeapRegionDCTOC::FilterKind fk);
-
   // Note the start or end of marking. This tells the heap region
   // that the collector is about to start or has finished (concurrently)
   // marking the heap.
diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Thu Oct 27 13:54:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Fri Oct 28 15:36:16 2011 -0700
@@ -143,7 +143,11 @@
     // If the test below fails, then this table was reused concurrently
     // with this operation.  This is OK, since the old table was coarsened,
     // and adding a bit to the new table is never incorrect.
-    if (loc_hr->is_in_reserved(from)) {
+    // If the table used to belong to a continues humongous region and is
+    // now reused for the corresponding start humongous region, we need to
+    // make sure that we detect this. Thus, we call is_in_reserved_raw()
+    // instead of just is_in_reserved() here.
+    if (loc_hr->is_in_reserved_raw(from)) {
       size_t hw_offset = pointer_delta((HeapWord*)from, loc_hr->bottom());
       CardIdx_t from_card = (CardIdx_t)
           hw_offset >> (CardTableModRefBS::card_shift - LogHeapWordSize);
diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/g1/vm_operations_g1.cpp
--- a/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp	Thu Oct 27 13:54:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp	Fri Oct 28 15:36:16 2011 -0700
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
 #include "gc_implementation/g1/vm_operations_g1.hpp"
@@ -165,6 +166,20 @@
   }
 }
 
+void VM_CGC_Operation::acquire_pending_list_lock() {
+  // The caller may block while communicating
+  // with the SLT thread in order to acquire/release the PLL.
+  ConcurrentMarkThread::slt()->
+    manipulatePLL(SurrogateLockerThread::acquirePLL);
+}
+
+void VM_CGC_Operation::release_and_notify_pending_list_lock() {
+  // The caller may block while communicating
+  // with the SLT thread in order to acquire/release the PLL.
+  ConcurrentMarkThread::slt()->
+    manipulatePLL(SurrogateLockerThread::releaseAndNotifyPLL);
+}
+
 void VM_CGC_Operation::doit() {
   gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
   TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
@@ -180,12 +195,19 @@
 }
 
 bool VM_CGC_Operation::doit_prologue() {
+  // Note the relative order of the locks must match that in
+  // VM_GC_Operation::doit_prologue() or deadlocks can occur
+  acquire_pending_list_lock();
+
   Heap_lock->lock();
   SharedHeap::heap()->_thread_holds_heap_lock_for_gc = true;
   return true;
 }
 
 void VM_CGC_Operation::doit_epilogue() {
+  // Note the relative order of the unlocks must match that in
+  // VM_GC_Operation::doit_epilogue()
   SharedHeap::heap()->_thread_holds_heap_lock_for_gc = false;
   Heap_lock->unlock();
+  release_and_notify_pending_list_lock();
 }
diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/g1/vm_operations_g1.hpp
--- a/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp	Thu Oct 27 13:54:31 2011 -0700
+++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp	Fri Oct 28 15:36:16 2011 -0700
@@ -93,11 +93,17 @@
   }
 };
 
-// Concurrent GC stop-the-world operations such as initial and final mark;
+// Concurrent GC stop-the-world operations such as remark and cleanup;
 // consider sharing these with CMS's counterparts.
 class VM_CGC_Operation: public VM_Operation {
   VoidClosure* _cl;
   const char* _printGCMessage;
+
+protected:
+  // java.lang.ref.Reference support
+  void acquire_pending_list_lock();
+  void release_and_notify_pending_list_lock();
+
 public:
   VM_CGC_Operation(VoidClosure* cl, const char *printGCMsg)
     : _cl(cl), _printGCMessage(printGCMsg) { }
diff -r 02fe430d493e -r 6534482ff68a src/share/vm/gc_implementation/shared/concurrentGCThread.cpp
--- a/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp	Thu Oct 27 13:54:31 2011 -0700
+++ b/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp	Fri Oct 28 15:36:16 2011 -0700
@@ -224,6 +224,8 @@
   MutexLockerEx x(&_monitor, Mutex::_no_safepoint_check_flag);
   assert(_buffer == empty, "Should be empty");
   assert(msg != empty, "empty message");
+  assert(!Heap_lock->owned_by_self(), "Heap_lock owned by requesting thread");
+
   _buffer = msg;
   while (_buffer != empty) {
     _monitor.notify();