Mercurial > hg > graal-jvmci-8

--- a/.hgtags	Tue Jan 19 15:54:42 2010 -0800
+++ b/.hgtags	Wed Jan 20 12:54:25 2010 -0800
@@ -53,3 +53,4 @@
 9174bb32e934965288121f75394874eeb1fcb649 jdk7-b76
 455105fc81d941482f8f8056afaa7aa0949c9300 jdk7-b77
 e703499b4b51e3af756ae77c3d5e8b3058a14e4e jdk7-b78
+a5a6adfca6ecefb5894a848debabfe442ff50e25 jdk7-b79
--- a/make/hotspot_version	Tue Jan 19 15:54:42 2010 -0800
+++ b/make/hotspot_version	Wed Jan 20 12:54:25 2010 -0800
@@ -35,7 +35,7 @@

 HS_MAJOR_VER=17
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=06
+HS_BUILD_NUMBER=07

 JDK_MAJOR_VER=1
 JDK_MINOR_VER=7
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -62,12 +62,13 @@
   tl->link_head(tc);
   tl->link_tail(tc);
   tl->set_count(1);
-  tl->init_statistics();
+  tl->init_statistics(true /* split_birth */);
   tl->setParent(NULL);
   tl->setLeft(NULL);
   tl->setRight(NULL);
   return tl;
 }
+
 TreeList* TreeList::as_TreeList(HeapWord* addr, size_t size) {
   TreeChunk* tc = (TreeChunk*) addr;
   assert(size >= sizeof(TreeChunk), "Chunk is too small for a TreeChunk");
@@ -267,6 +268,31 @@
   return retTC;
 }

+// Returns the block with the largest heap address amongst
+// those in the list for this size; potentially slow and expensive,
+// use with caution!
+TreeChunk* TreeList::largest_address() {
+  guarantee(head() != NULL, "The head of the list cannot be NULL");
+  FreeChunk* fc = head()->next();
+  TreeChunk* retTC;
+  if (fc == NULL) {
+    retTC = head_as_TreeChunk();
+  } else {
+    // walk down the list and return the one with the highest
+    // heap address among chunks of this size.
+    FreeChunk* last = fc;
+    while (fc->next() != NULL) {
+      if ((HeapWord*)last < (HeapWord*)fc) {
+        last = fc;
+      }
+      fc = fc->next();
+    }
+    retTC = TreeChunk::as_TreeChunk(last);
+  }
+  assert(retTC->list() == this, "Wrong type of chunk.");
+  return retTC;
+}
+
 BinaryTreeDictionary::BinaryTreeDictionary(MemRegion mr, bool splay):
   _splay(splay)
 {
@@ -379,7 +405,7 @@
             break;
           }
           // The evm code reset the hint of the candidate as
-          // at an interrim point.  Why?  Seems like this leaves
+          // at an interim point.  Why?  Seems like this leaves
           // the hint pointing to a list that didn't work.
           // curTL->set_hint(hintTL->size());
         }
@@ -436,7 +462,7 @@
   TreeList *curTL = root();
   if (curTL != NULL) {
     while(curTL->right() != NULL) curTL = curTL->right();
-    return curTL->first_available();
+    return curTL->largest_address();
   } else {
     return NULL;
   }
@@ -664,7 +690,7 @@
     }
   }
   TreeChunk* tc = TreeChunk::as_TreeChunk(fc);
-  // This chunk is being returned to the binary try.  It's embedded
+  // This chunk is being returned to the binary tree.  Its embedded
   // TreeList should be unused at this point.
   tc->initialize();
   if (curTL != NULL) {          // exact match
@@ -807,6 +833,8 @@
 }

 bool BinaryTreeDictionary::coalDictOverPopulated(size_t size) {
+  if (FLSAlwaysCoalesceLarge) return true;
+
   TreeList* list_of_size = findList(size);
   // None of requested size implies overpopulated.
   return list_of_size == NULL || list_of_size->coalDesired() <= 0 ||
@@ -854,17 +882,20 @@
   double _percentage;
   float _inter_sweep_current;
   float _inter_sweep_estimate;
+  float _intra_sweep_estimate;

  public:
   BeginSweepClosure(double p, float inter_sweep_current,
-                              float inter_sweep_estimate) :
+                              float inter_sweep_estimate,
+                              float intra_sweep_estimate) :
    _percentage(p),
    _inter_sweep_current(inter_sweep_current),
-   _inter_sweep_estimate(inter_sweep_estimate) { }
+   _inter_sweep_estimate(inter_sweep_estimate),
+   _intra_sweep_estimate(intra_sweep_estimate) { }

   void do_list(FreeList* fl) {
     double coalSurplusPercent = _percentage;
-    fl->compute_desired(_inter_sweep_current, _inter_sweep_estimate);
+    fl->compute_desired(_inter_sweep_current, _inter_sweep_estimate, _intra_sweep_estimate);
     fl->set_coalDesired((ssize_t)((double)fl->desired() * coalSurplusPercent));
     fl->set_beforeSweep(fl->count());
     fl->set_bfrSurp(fl->surplus());
@@ -939,9 +970,10 @@
 }

 void BinaryTreeDictionary::beginSweepDictCensus(double coalSurplusPercent,
-  float inter_sweep_current, float inter_sweep_estimate) {
+  float inter_sweep_current, float inter_sweep_estimate, float intra_sweep_estimate) {
   BeginSweepClosure bsc(coalSurplusPercent, inter_sweep_current,
-                                            inter_sweep_estimate);
+                                            inter_sweep_estimate,
+                                            intra_sweep_estimate);
   bsc.do_tree(root());
 }

@@ -1077,13 +1109,13 @@
 // Print census information - counts, births, deaths, etc.
 // for each list in the tree.  Also print some summary
 // information.
-class printTreeCensusClosure : public AscendTreeCensusClosure {
+class PrintTreeCensusClosure : public AscendTreeCensusClosure {
   int _print_line;
   size_t _totalFree;
   FreeList _total;

  public:
-  printTreeCensusClosure() {
+  PrintTreeCensusClosure() {
     _print_line = 0;
     _totalFree = 0;
   }
@@ -1113,7 +1145,7 @@

   gclog_or_tty->print("\nBinaryTree\n");
   FreeList::print_labels_on(gclog_or_tty, "size");
-  printTreeCensusClosure ptc;
+  PrintTreeCensusClosure ptc;
   ptc.do_tree(root());

   FreeList* total = ptc.total();
@@ -1130,6 +1162,38 @@
              /(total->desired() != 0 ? (double)total->desired() : 1.0));
 }

+class PrintFreeListsClosure : public AscendTreeCensusClosure {
+  outputStream* _st;
+  int _print_line;
+
+ public:
+  PrintFreeListsClosure(outputStream* st) {
+    _st = st;
+    _print_line = 0;
+  }
+  void do_list(FreeList* fl) {
+    if (++_print_line >= 40) {
+      FreeList::print_labels_on(_st, "size");
+      _print_line = 0;
+    }
+    fl->print_on(gclog_or_tty);
+    size_t sz = fl->size();
+    for (FreeChunk* fc = fl->head(); fc != NULL;
+         fc = fc->next()) {
+      _st->print_cr("\t[" PTR_FORMAT "," PTR_FORMAT ")  %s",
+                    fc, (HeapWord*)fc + sz,
+                    fc->cantCoalesce() ? "\t CC" : "");
+    }
+  }
+};
+
+void BinaryTreeDictionary::print_free_lists(outputStream* st) const {
+
+  FreeList::print_labels_on(st, "size");
+  PrintFreeListsClosure pflc(st);
+  pflc.do_tree(root());
+}
+
 // Verify the following tree invariants:
 // . _root has no parent
 // . parent and child point to each other
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.hpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.hpp	Wed Jan 20 12:54:25 2010 -0800
@@ -42,9 +42,6 @@
   friend class AscendTreeCensusClosure;
   friend class DescendTreeCensusClosure;
   friend class DescendTreeSearchClosure;
-  TreeList* _parent;
-  TreeList* _left;
-  TreeList* _right;

  protected:
   TreeList* parent() const { return _parent; }
@@ -82,6 +79,11 @@
   // to a TreeChunk.
   TreeChunk* first_available();

+  // Returns the block with the largest heap address amongst
+  // those in the list for this size; potentially slow and expensive,
+  // use with caution!
+  TreeChunk* largest_address();
+
   // removeChunkReplaceIfNeeded() removes the given "tc" from the TreeList.
   // If "tc" is the first chunk in the list, it is also the
   // TreeList that is the node in the tree.  removeChunkReplaceIfNeeded()
@@ -254,8 +256,9 @@
   // Methods called at the beginning of a sweep to prepare the
   // statistics for the sweep.
   void       beginSweepDictCensus(double coalSurplusPercent,
-                                  float sweep_current,
-                                  float sweep_estimate);
+                                  float inter_sweep_current,
+                                  float inter_sweep_estimate,
+                                  float intra_sweep_estimate);
   // Methods called after the end of a sweep to modify the
   // statistics for the sweep.
   void       endSweepDictCensus(double splitSurplusPercent);
@@ -269,6 +272,7 @@
   // Print the statistcis for all the lists in the tree.  Also may
   // print out summaries.
   void       printDictCensus(void) const;
+  void       print_free_lists(outputStream* st) const;

   // For debugging.  Returns the sum of the _returnedBytes for
   // all lists in the tree.
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -32,7 +32,9 @@
 // threads. The second argument is in support of an extra locking
 // check for CFL spaces' free list locks.
 #ifndef PRODUCT
-void CMSLockVerifier::assert_locked(const Mutex* lock, const Mutex* p_lock) {
+void CMSLockVerifier::assert_locked(const Mutex* lock,
+                                    const Mutex* p_lock1,
+                                    const Mutex* p_lock2) {
   if (!Universe::is_fully_initialized()) {
     return;
   }
@@ -40,7 +42,7 @@
   Thread* myThread = Thread::current();

   if (lock == NULL) { // a "lock-free" structure, e.g. MUT, protected by CMS token
-    assert(p_lock == NULL, "Unexpected state");
+    assert(p_lock1 == NULL && p_lock2 == NULL, "Unexpected caller error");
     if (myThread->is_ConcurrentGC_thread()) {
       // This test might have to change in the future, if there can be
       // multiple peer CMS threads.  But for now, if we're testing the CMS
@@ -60,36 +62,39 @@
     return;
   }

-  if (ParallelGCThreads == 0) {
+  if (myThread->is_VM_thread()
+      || myThread->is_ConcurrentGC_thread()
+      || myThread->is_Java_thread()) {
+    // Make sure that we are holding the associated lock.
     assert_lock_strong(lock);
+    // The checking of p_lock is a spl case for CFLS' free list
+    // locks: we make sure that none of the parallel GC work gang
+    // threads are holding "sub-locks" of freeListLock(). We check only
+    // the parDictionaryAllocLock because the others are too numerous.
+    // This spl case code is somewhat ugly and any improvements
+    // are welcome.
+    assert(p_lock1 == NULL || !p_lock1->is_locked() || p_lock1->owned_by_self(),
+           "Possible race between this and parallel GC threads");
+    assert(p_lock2 == NULL || !p_lock2->is_locked() || p_lock2->owned_by_self(),
+           "Possible race between this and parallel GC threads");
+  } else if (myThread->is_GC_task_thread()) {
+    // Make sure that the VM or CMS thread holds lock on our behalf
+    // XXX If there were a concept of a gang_master for a (set of)
+    // gang_workers, we could have used the identity of that thread
+    // for checking ownership here; for now we just disjunct.
+    assert(lock->owner() == VMThread::vm_thread() ||
+           lock->owner() == ConcurrentMarkSweepThread::cmst(),
+           "Should be locked by VM thread or CMS thread on my behalf");
+    if (p_lock1 != NULL) {
+      assert_lock_strong(p_lock1);
+    }
+    if (p_lock2 != NULL) {
+      assert_lock_strong(p_lock2);
+    }
   } else {
-    if (myThread->is_VM_thread()
-        || myThread->is_ConcurrentGC_thread()
-        || myThread->is_Java_thread()) {
-      // Make sure that we are holding the associated lock.
-      assert_lock_strong(lock);
-      // The checking of p_lock is a spl case for CFLS' free list
-      // locks: we make sure that none of the parallel GC work gang
-      // threads are holding "sub-locks" of freeListLock(). We check only
-      // the parDictionaryAllocLock because the others are too numerous.
-      // This spl case code is somewhat ugly and any improvements
-      // are welcome XXX FIX ME!!
-      if (p_lock != NULL) {
-        assert(!p_lock->is_locked() || p_lock->owned_by_self(),
-               "Possible race between this and parallel GC threads");
-      }
-    } else if (myThread->is_GC_task_thread()) {
-      // Make sure that the VM or CMS thread holds lock on our behalf
-      // XXX If there were a concept of a gang_master for a (set of)
-      // gang_workers, we could have used the identity of that thread
-      // for checking ownership here; for now we just disjunct.
-      assert(lock->owner() == VMThread::vm_thread() ||
-             lock->owner() == ConcurrentMarkSweepThread::cmst(),
-             "Should be locked by VM thread or CMS thread on my behalf");
-    } else {
-      // Make sure we didn't miss some obscure corner case
-      ShouldNotReachHere();
-    }
+    // Make sure we didn't miss some other thread type calling into here;
+    // perhaps as a result of future VM evolution.
+    ShouldNotReachHere();
   }
 }
 #endif
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.hpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.hpp	Wed Jan 20 12:54:25 2010 -0800
@@ -29,8 +29,11 @@
 // the parallel threads.
 class CMSLockVerifier: AllStatic {
  public:
-  static void assert_locked(const Mutex* lock, const Mutex* p_lock)
+  static void assert_locked(const Mutex* lock, const Mutex* p_lock1, const Mutex* p_lock2)
     PRODUCT_RETURN;
+  static void assert_locked(const Mutex* lock, const Mutex* p_lock) {
+    assert_locked(lock, p_lock, NULL);
+  }
   static void assert_locked(const Mutex* lock) {
     assert_locked(lock, NULL);
   }
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -62,18 +62,15 @@
   // implementation, namely, the simple binary tree (splaying
   // temporarily disabled).
   switch (dictionaryChoice) {
-    case FreeBlockDictionary::dictionaryBinaryTree:
-      _dictionary = new BinaryTreeDictionary(mr);
-      break;
     case FreeBlockDictionary::dictionarySplayTree:
     case FreeBlockDictionary::dictionarySkipList:
     default:
       warning("dictionaryChoice: selected option not understood; using"
               " default BinaryTreeDictionary implementation instead.");
+    case FreeBlockDictionary::dictionaryBinaryTree:
       _dictionary = new BinaryTreeDictionary(mr);
       break;
   }
-  splitBirth(mr.word_size());
   assert(_dictionary != NULL, "CMS dictionary initialization");
   // The indexed free lists are initially all empty and are lazily
   // filled in on demand. Initialize the array elements to NULL.
@@ -388,6 +385,105 @@
   return res;
 }

+void CompactibleFreeListSpace::print_indexed_free_lists(outputStream* st)
+const {
+  reportIndexedFreeListStatistics();
+  gclog_or_tty->print_cr("Layout of Indexed Freelists");
+  gclog_or_tty->print_cr("---------------------------");
+  FreeList::print_labels_on(st, "size");
+  for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
+    _indexedFreeList[i].print_on(gclog_or_tty);
+    for (FreeChunk* fc = _indexedFreeList[i].head(); fc != NULL;
+         fc = fc->next()) {
+      gclog_or_tty->print_cr("\t[" PTR_FORMAT "," PTR_FORMAT ")  %s",
+                          fc, (HeapWord*)fc + i,
+                          fc->cantCoalesce() ? "\t CC" : "");
+    }
+  }
+}
+
+void CompactibleFreeListSpace::print_promo_info_blocks(outputStream* st)
+const {
+  _promoInfo.print_on(st);
+}
+
+void CompactibleFreeListSpace::print_dictionary_free_lists(outputStream* st)
+const {
+  _dictionary->reportStatistics();
+  st->print_cr("Layout of Freelists in Tree");
+  st->print_cr("---------------------------");
+  _dictionary->print_free_lists(st);
+}
+
+class BlkPrintingClosure: public BlkClosure {
+  const CMSCollector*             _collector;
+  const CompactibleFreeListSpace* _sp;
+  const CMSBitMap*                _live_bit_map;
+  const bool                      _post_remark;
+  outputStream*                   _st;
+public:
+  BlkPrintingClosure(const CMSCollector* collector,
+                     const CompactibleFreeListSpace* sp,
+                     const CMSBitMap* live_bit_map,
+                     outputStream* st):
+    _collector(collector),
+    _sp(sp),
+    _live_bit_map(live_bit_map),
+    _post_remark(collector->abstract_state() > CMSCollector::FinalMarking),
+    _st(st) { }
+  size_t do_blk(HeapWord* addr);
+};
+
+size_t BlkPrintingClosure::do_blk(HeapWord* addr) {
+  size_t sz = _sp->block_size_no_stall(addr, _collector);
+  assert(sz != 0, "Should always be able to compute a size");
+  if (_sp->block_is_obj(addr)) {
+    const bool dead = _post_remark && !_live_bit_map->isMarked(addr);
+    _st->print_cr(PTR_FORMAT ": %s object of size " SIZE_FORMAT "%s",
+      addr,
+      dead ? "dead" : "live",
+      sz,
+      (!dead && CMSPrintObjectsInDump) ? ":" : ".");
+    if (CMSPrintObjectsInDump && !dead) {
+      oop(addr)->print_on(_st);
+      _st->print_cr("--------------------------------------");
+    }
+  } else { // free block
+    _st->print_cr(PTR_FORMAT ": free block of size " SIZE_FORMAT "%s",
+      addr, sz, CMSPrintChunksInDump ? ":" : ".");
+    if (CMSPrintChunksInDump) {
+      ((FreeChunk*)addr)->print_on(_st);
+      _st->print_cr("--------------------------------------");
+    }
+  }
+  return sz;
+}
+
+void CompactibleFreeListSpace::dump_at_safepoint_with_locks(CMSCollector* c,
+  outputStream* st) {
+  st->print_cr("\n=========================");
+  st->print_cr("Block layout in CMS Heap:");
+  st->print_cr("=========================");
+  BlkPrintingClosure  bpcl(c, this, c->markBitMap(), st);
+  blk_iterate(&bpcl);
+
+  st->print_cr("\n=======================================");
+  st->print_cr("Order & Layout of Promotion Info Blocks");
+  st->print_cr("=======================================");
+  print_promo_info_blocks(st);
+
+  st->print_cr("\n===========================");
+  st->print_cr("Order of Indexed Free Lists");
+  st->print_cr("=========================");
+  print_indexed_free_lists(st);
+
+  st->print_cr("\n=================================");
+  st->print_cr("Order of Free Lists in Dictionary");
+  st->print_cr("=================================");
+  print_dictionary_free_lists(st);
+}
+
+
 void CompactibleFreeListSpace::reportFreeListStatistics() const {
   assert_lock_strong(&_freelistLock);
   assert(PrintFLSStatistics != 0, "Reporting error");
@@ -449,37 +545,37 @@
   if (prevEnd != NULL) {
     // Resize the underlying block offset table.
     _bt.resize(pointer_delta(value, bottom()));
-  if (value <= prevEnd) {
-    assert(value >= unallocated_block(), "New end is below unallocated block");
-  } else {
-    // Now, take this new chunk and add it to the free blocks.
-    // Note that the BOT has not yet been updated for this block.
-    size_t newFcSize = pointer_delta(value, prevEnd);
-    // XXX This is REALLY UGLY and should be fixed up. XXX
-    if (!_adaptive_freelists && _smallLinearAllocBlock._ptr == NULL) {
-      // Mark the boundary of the new block in BOT
-      _bt.mark_block(prevEnd, value);
-      // put it all in the linAB
-      if (ParallelGCThreads == 0) {
-        _smallLinearAllocBlock._ptr = prevEnd;
-        _smallLinearAllocBlock._word_size = newFcSize;
-        repairLinearAllocBlock(&_smallLinearAllocBlock);
-      } else { // ParallelGCThreads > 0
-        MutexLockerEx x(parDictionaryAllocLock(),
-                        Mutex::_no_safepoint_check_flag);
-        _smallLinearAllocBlock._ptr = prevEnd;
-        _smallLinearAllocBlock._word_size = newFcSize;
-        repairLinearAllocBlock(&_smallLinearAllocBlock);
+    if (value <= prevEnd) {
+      assert(value >= unallocated_block(), "New end is below unallocated block");
+    } else {
+      // Now, take this new chunk and add it to the free blocks.
+      // Note that the BOT has not yet been updated for this block.
+      size_t newFcSize = pointer_delta(value, prevEnd);
+      // XXX This is REALLY UGLY and should be fixed up. XXX
+      if (!_adaptive_freelists && _smallLinearAllocBlock._ptr == NULL) {
+        // Mark the boundary of the new block in BOT
+        _bt.mark_block(prevEnd, value);
+        // put it all in the linAB
+        if (ParallelGCThreads == 0) {
+          _smallLinearAllocBlock._ptr = prevEnd;
+          _smallLinearAllocBlock._word_size = newFcSize;
+          repairLinearAllocBlock(&_smallLinearAllocBlock);
+        } else { // ParallelGCThreads > 0
+          MutexLockerEx x(parDictionaryAllocLock(),
+                          Mutex::_no_safepoint_check_flag);
+          _smallLinearAllocBlock._ptr = prevEnd;
+          _smallLinearAllocBlock._word_size = newFcSize;
+          repairLinearAllocBlock(&_smallLinearAllocBlock);
+        }
+        // Births of chunks put into a LinAB are not recorded.  Births
+        // of chunks as they are allocated out of a LinAB are.
+      } else {
+        // Add the block to the free lists, if possible coalescing it
+        // with the last free block, and update the BOT and census data.
+        addChunkToFreeListsAtEndRecordingStats(prevEnd, newFcSize);
       }
-      // Births of chunks put into a LinAB are not recorded.  Births
-      // of chunks as they are allocated out of a LinAB are.
-    } else {
-      // Add the block to the free lists, if possible coalescing it
-      // with the last free block, and update the BOT and census data.
-      addChunkToFreeListsAtEndRecordingStats(prevEnd, newFcSize);
     }
   }
-  }
 }

 class FreeListSpace_DCTOC : public Filtering_DCTOC {
@@ -732,7 +828,7 @@

 void CompactibleFreeListSpace::object_iterate_mem(MemRegion mr,
                                                   UpwardsObjectClosure* cl) {
-  assert_locked();
+  assert_locked(freelistLock());
   NOT_PRODUCT(verify_objects_initialized());
   Space::object_iterate_mem(mr, cl);
 }
@@ -1212,12 +1308,15 @@
 void CompactibleFreeListSpace::assert_locked() const {
   CMSLockVerifier::assert_locked(freelistLock(), parDictionaryAllocLock());
 }
+
+void CompactibleFreeListSpace::assert_locked(const Mutex* lock) const {
+  CMSLockVerifier::assert_locked(lock);
+}
 #endif

 FreeChunk* CompactibleFreeListSpace::allocateScratch(size_t size) {
   // In the parallel case, the main thread holds the free list lock
   // on behalf the parallel threads.
-  assert_locked();
   FreeChunk* fc;
   {
     // If GC is parallel, this might be called by several threads.
@@ -1298,17 +1397,18 @@
     res = blk->_ptr;
     _bt.allocated(res, blk->_word_size);
   } else if (size + MinChunkSize <= blk->_refillSize) {
+    size_t sz = blk->_word_size;
     // Update _unallocated_block if the size is such that chunk would be
     // returned to the indexed free list.  All other chunks in the indexed
     // free lists are allocated from the dictionary so that _unallocated_block
     // has already been adjusted for them.  Do it here so that the cost
     // for all chunks added back to the indexed free lists.
-    if (blk->_word_size < SmallForDictionary) {
-      _bt.allocated(blk->_ptr, blk->_word_size);
+    if (sz < SmallForDictionary) {
+      _bt.allocated(blk->_ptr, sz);
     }
     // Return the chunk that isn't big enough, and then refill below.
-    addChunkToFreeLists(blk->_ptr, blk->_word_size);
-    _bt.verify_single_block(blk->_ptr, (blk->_ptr + blk->_word_size));
+    addChunkToFreeLists(blk->_ptr, sz);
+    splitBirth(sz);
     // Don't keep statistics on adding back chunk from a LinAB.
   } else {
     // A refilled block would not satisfy the request.
@@ -1376,11 +1476,13 @@
     res = getChunkFromIndexedFreeListHelper(size);
   }
   _bt.verify_not_unallocated((HeapWord*) res, size);
+  assert(res == NULL || res->size() == size, "Incorrect block size");
   return res;
 }

 FreeChunk*
-CompactibleFreeListSpace::getChunkFromIndexedFreeListHelper(size_t size) {
+CompactibleFreeListSpace::getChunkFromIndexedFreeListHelper(size_t size,
+  bool replenish) {
   assert_locked();
   FreeChunk* fc = NULL;
   if (size < SmallForDictionary) {
@@ -1398,54 +1500,66 @@
       // and replenishing indexed lists from the small linAB.
       //
       FreeChunk* newFc = NULL;
-      size_t replenish_size = CMSIndexedFreeListReplenish * size;
+      const size_t replenish_size = CMSIndexedFreeListReplenish * size;
       if (replenish_size < SmallForDictionary) {
         // Do not replenish from an underpopulated size.
         if (_indexedFreeList[replenish_size].surplus() > 0 &&
             _indexedFreeList[replenish_size].head() != NULL) {
-          newFc =
-            _indexedFreeList[replenish_size].getChunkAtHead();
-        } else {
+          newFc = _indexedFreeList[replenish_size].getChunkAtHead();
+        } else if (bestFitFirst()) {
           newFc = bestFitSmall(replenish_size);
         }
       }
-      if (newFc != NULL) {
-        splitDeath(replenish_size);
-      } else if (replenish_size > size) {
+      if (newFc == NULL && replenish_size > size) {
         assert(CMSIndexedFreeListReplenish > 1, "ctl pt invariant");
-        newFc =
-          getChunkFromIndexedFreeListHelper(replenish_size);
+        newFc = getChunkFromIndexedFreeListHelper(replenish_size, false);
       }
+      // Note: The stats update re split-death of block obtained above
+      // will be recorded below precisely when we know we are going to
+      // be actually splitting it into more than one pieces below.
       if (newFc != NULL) {
-        assert(newFc->size() == replenish_size, "Got wrong size");
-        size_t i;
-        FreeChunk *curFc, *nextFc;
-        // carve up and link blocks 0, ..., CMSIndexedFreeListReplenish - 2
-        // The last chunk is not added to the lists but is returned as the
-        // free chunk.
-        for (curFc = newFc, nextFc = (FreeChunk*)((HeapWord*)curFc + size),
-             i = 0;
-             i < (CMSIndexedFreeListReplenish - 1);
-             curFc = nextFc, nextFc = (FreeChunk*)((HeapWord*)nextFc + size),
-             i++) {
+        if  (replenish || CMSReplenishIntermediate) {
+          // Replenish this list and return one block to caller.
+          size_t i;
+          FreeChunk *curFc, *nextFc;
+          size_t num_blk = newFc->size() / size;
+          assert(num_blk >= 1, "Smaller than requested?");
+          assert(newFc->size() % size == 0, "Should be integral multiple of request");
+          if (num_blk > 1) {
+            // we are sure we will be splitting the block just obtained
+            // into multiple pieces; record the split-death of the original
+            splitDeath(replenish_size);
+          }
+          // carve up and link blocks 0, ..., num_blk - 2
+          // The last chunk is not added to the lists but is returned as the
+          // free chunk.
+          for (curFc = newFc, nextFc = (FreeChunk*)((HeapWord*)curFc + size),
+               i = 0;
+               i < (num_blk - 1);
+               curFc = nextFc, nextFc = (FreeChunk*)((HeapWord*)nextFc + size),
+               i++) {
+            curFc->setSize(size);
+            // Don't record this as a return in order to try and
+            // determine the "returns" from a GC.
+            _bt.verify_not_unallocated((HeapWord*) fc, size);
+            _indexedFreeList[size].returnChunkAtTail(curFc, false);
+            _bt.mark_block((HeapWord*)curFc, size);
+            splitBirth(size);
+            // Don't record the initial population of the indexed list
+            // as a split birth.
+          }
+
+          // check that the arithmetic was OK above
+          assert((HeapWord*)nextFc == (HeapWord*)newFc + num_blk*size,
+            "inconsistency in carving newFc");
           curFc->setSize(size);
-          // Don't record this as a return in order to try and
-          // determine the "returns" from a GC.
-          _bt.verify_not_unallocated((HeapWord*) fc, size);
-          _indexedFreeList[size].returnChunkAtTail(curFc, false);
           _bt.mark_block((HeapWord*)curFc, size);
           splitBirth(size);
-          // Don't record the initial population of the indexed list
-          // as a split birth.
+          fc = curFc;
+        } else {
+          // Return entire block to caller
+          fc = newFc;
         }
-
-        // check that the arithmetic was OK above
-        assert((HeapWord*)nextFc == (HeapWord*)newFc + replenish_size,
-          "inconsistency in carving newFc");
-        curFc->setSize(size);
-        _bt.mark_block((HeapWord*)curFc, size);
-        splitBirth(size);
-        return curFc;
       }
     }
   } else {
@@ -1453,7 +1567,7 @@
     // replenish the indexed free list.
     fc = getChunkFromDictionaryExact(size);
   }
-  assert(fc == NULL || fc->isFree(), "Should be returning a free chunk");
+  // assert(fc == NULL || fc->isFree(), "Should be returning a free chunk");
   return fc;
 }

@@ -1512,6 +1626,11 @@
   // adjust _unallocated_block downward, as necessary
   _bt.freed((HeapWord*)chunk, size);
   _dictionary->returnChunk(chunk);
+#ifndef PRODUCT
+  if (CMSCollector::abstract_state() != CMSCollector::Sweeping) {
+    TreeChunk::as_TreeChunk(chunk)->list()->verify_stats();
+  }
+#endif // PRODUCT
 }

 void
@@ -1525,6 +1644,11 @@
   } else {
     _indexedFreeList[size].returnChunkAtHead(fc);
   }
+#ifndef PRODUCT
+  if (CMSCollector::abstract_state() != CMSCollector::Sweeping) {
+     _indexedFreeList[size].verify_stats();
+  }
+#endif // PRODUCT
 }

 // Add chunk to end of last block -- if it's the largest
@@ -1537,7 +1661,6 @@
   HeapWord* chunk, size_t     size) {
   // check that the chunk does lie in this space!
   assert(chunk != NULL && is_in_reserved(chunk), "Not in this space!");
-  assert_locked();
   // One of the parallel gc task threads may be here
   // whilst others are allocating.
   Mutex* lock = NULL;
@@ -1991,24 +2114,26 @@
   return frag;
 }

-#define CoalSurplusPercent 1.05
-#define SplitSurplusPercent 1.10
-
 void CompactibleFreeListSpace::beginSweepFLCensus(
   float inter_sweep_current,
-  float inter_sweep_estimate) {
+  float inter_sweep_estimate,
+  float intra_sweep_estimate) {
   assert_locked();
   size_t i;
   for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
     FreeList* fl    = &_indexedFreeList[i];
-    fl->compute_desired(inter_sweep_current, inter_sweep_estimate);
-    fl->set_coalDesired((ssize_t)((double)fl->desired() * CoalSurplusPercent));
+    if (PrintFLSStatistics > 1) {
+      gclog_or_tty->print("size[%d] : ", i);
+    }
+    fl->compute_desired(inter_sweep_current, inter_sweep_estimate, intra_sweep_estimate);
+    fl->set_coalDesired((ssize_t)((double)fl->desired() * CMSSmallCoalSurplusPercent));
     fl->set_beforeSweep(fl->count());
     fl->set_bfrSurp(fl->surplus());
   }
-  _dictionary->beginSweepDictCensus(CoalSurplusPercent,
+  _dictionary->beginSweepDictCensus(CMSLargeCoalSurplusPercent,
                                     inter_sweep_current,
-                                    inter_sweep_estimate);
+                                    inter_sweep_estimate,
+                                    intra_sweep_estimate);
 }

 void CompactibleFreeListSpace::setFLSurplus() {
@@ -2017,7 +2142,7 @@
   for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
     FreeList *fl = &_indexedFreeList[i];
     fl->set_surplus(fl->count() -
-                    (ssize_t)((double)fl->desired() * SplitSurplusPercent));
+                    (ssize_t)((double)fl->desired() * CMSSmallSplitSurplusPercent));
   }
 }

@@ -2048,6 +2173,11 @@
 }

 void CompactibleFreeListSpace::endSweepFLCensus(size_t sweep_count) {
+  if (PrintFLSStatistics > 0) {
+    HeapWord* largestAddr = (HeapWord*) dictionary()->findLargestDict();
+    gclog_or_tty->print_cr("CMS: Large block " PTR_FORMAT,
+                           largestAddr);
+  }
   setFLSurplus();
   setFLHints();
   if (PrintGC && PrintFLSCensus > 0) {
@@ -2055,7 +2185,7 @@
   }
   clearFLCensus();
   assert_locked();
-  _dictionary->endSweepDictCensus(SplitSurplusPercent);
+  _dictionary->endSweepDictCensus(CMSLargeSplitSurplusPercent);
 }

 bool CompactibleFreeListSpace::coalOverPopulated(size_t size) {
@@ -2312,13 +2442,18 @@
 }

 void CompactibleFreeListSpace::verifyIndexedFreeList(size_t size) const {
-  FreeChunk* fc =  _indexedFreeList[size].head();
+  FreeChunk* fc   =  _indexedFreeList[size].head();
+  FreeChunk* tail =  _indexedFreeList[size].tail();
+  size_t    num = _indexedFreeList[size].count();
+  size_t      n = 0;
   guarantee((size % 2 == 0) || fc == NULL, "Odd slots should be empty");
-  for (; fc != NULL; fc = fc->next()) {
+  for (; fc != NULL; fc = fc->next(), n++) {
     guarantee(fc->size() == size, "Size inconsistency");
     guarantee(fc->isFree(), "!free?");
     guarantee(fc->next() == NULL || fc->next()->prev() == fc, "Broken list");
+    guarantee((fc->next() == NULL) == (fc == tail), "Incorrect tail");
   }
+  guarantee(n == num, "Incorrect count");
 }

 #ifndef PRODUCT
@@ -2516,11 +2651,41 @@
   _tracking = true;
 }

-void PromotionInfo::stopTrackingPromotions() {
+#define CMSPrintPromoBlockInfo 1
+
+void PromotionInfo::stopTrackingPromotions(uint worker_id) {
   assert(_spoolHead == _spoolTail && _firstIndex == _nextIndex,
          "spooling inconsistency?");
   _firstIndex = _nextIndex = 1;
   _tracking = false;
+  if (CMSPrintPromoBlockInfo > 1) {
+    print_statistics(worker_id);
+  }
+}
+
+void PromotionInfo::print_statistics(uint worker_id) const {
+  assert(_spoolHead == _spoolTail && _firstIndex == _nextIndex,
+         "Else will undercount");
+  assert(CMSPrintPromoBlockInfo > 0, "Else unnecessary call");
+  // Count the number of blocks and slots in the free pool
+  size_t slots  = 0;
+  size_t blocks = 0;
+  for (SpoolBlock* cur_spool = _spareSpool;
+       cur_spool != NULL;
+       cur_spool = cur_spool->nextSpoolBlock) {
+    // the first entry is just a self-pointer; indices 1 through
+    // bufferSize - 1 are occupied (thus, bufferSize - 1 slots).
+    guarantee((void*)cur_spool->displacedHdr == (void*)&cur_spool->displacedHdr,
+              "first entry of displacedHdr should be self-referential");
+    slots += cur_spool->bufferSize - 1;
+    blocks++;
+  }
+  if (_spoolHead != NULL) {
+    slots += _spoolHead->bufferSize - 1;
+    blocks++;
+  }
+  gclog_or_tty->print_cr(" [worker %d] promo_blocks = %d, promo_slots = %d ",
+                         worker_id, blocks, slots);
 }

 // When _spoolTail is not NULL, then the slot <_spoolTail, _nextIndex>
@@ -2584,15 +2749,84 @@
   guarantee(numDisplacedHdrs == numObjsWithDisplacedHdrs, "Displaced hdr count");
 }

+void PromotionInfo::print_on(outputStream* st) const {
+  SpoolBlock* curSpool = NULL;
+  size_t i = 0;
+  st->print_cr("start & end indices: [" SIZE_FORMAT ", " SIZE_FORMAT ")",
+               _firstIndex, _nextIndex);
+  for (curSpool = _spoolHead; curSpool != _spoolTail && curSpool != NULL;
+       curSpool = curSpool->nextSpoolBlock) {
+    curSpool->print_on(st);
+    st->print_cr(" active ");
+    i++;
+  }
+  for (curSpool = _spoolTail; curSpool != NULL;
+       curSpool = curSpool->nextSpoolBlock) {
+    curSpool->print_on(st);
+    st->print_cr(" inactive ");
+    i++;
+  }
+  for (curSpool = _spareSpool; curSpool != NULL;
+       curSpool = curSpool->nextSpoolBlock) {
+    curSpool->print_on(st);
+    st->print_cr(" free ");
+    i++;
+  }
+  st->print_cr(SIZE_FORMAT " header spooling blocks", i);
+}
+
+void SpoolBlock::print_on(outputStream* st) const {
+  st->print("[" PTR_FORMAT "," PTR_FORMAT "), " SIZE_FORMAT " HeapWords -> " PTR_FORMAT,
+            this, (HeapWord*)displacedHdr + bufferSize,
+            bufferSize, nextSpoolBlock);
+}
+
+///////////////////////////////////////////////////////////////////////////
+// CFLS_LAB
+///////////////////////////////////////////////////////////////////////////
+
+#define VECTOR_257(x)                                                                                  \
+  /* 1  2  3  4  5  6  7  8  9 1x 11 12 13 14 15 16 17 18 19 2x 21 22 23 24 25 26 27 28 29 3x 31 32 */ \
+  {  x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x }
+
+// Initialize with default setting of CMSParPromoteBlocksToClaim, _not_
+// OldPLABSize, whose static default is different; if overridden at the
+// command-line, this will get reinitialized via a call to
+// modify_initialization() below.
+AdaptiveWeightedAverage CFLS_LAB::_blocks_to_claim[]    =
+  VECTOR_257(AdaptiveWeightedAverage(OldPLABWeight, (float)CMSParPromoteBlocksToClaim));
+size_t CFLS_LAB::_global_num_blocks[]  = VECTOR_257(0);
+int    CFLS_LAB::_global_num_workers[] = VECTOR_257(0);

 CFLS_LAB::CFLS_LAB(CompactibleFreeListSpace* cfls) :
   _cfls(cfls)
 {
-  _blocks_to_claim = CMSParPromoteBlocksToClaim;
+  assert(CompactibleFreeListSpace::IndexSetSize == 257, "Modify VECTOR_257() macro above");
   for (size_t i = CompactibleFreeListSpace::IndexSetStart;
        i < CompactibleFreeListSpace::IndexSetSize;
        i += CompactibleFreeListSpace::IndexSetStride) {
     _indexedFreeList[i].set_size(i);
+    _num_blocks[i] = 0;
+  }
+}
+
+static bool _CFLS_LAB_modified = false;
+
+void CFLS_LAB::modify_initialization(size_t n, unsigned wt) {
+  assert(!_CFLS_LAB_modified, "Call only once");
+  _CFLS_LAB_modified = true;
+  for (size_t i = CompactibleFreeListSpace::IndexSetStart;
+       i < CompactibleFreeListSpace::IndexSetSize;
+       i += CompactibleFreeListSpace::IndexSetStride) {
+    _blocks_to_claim[i].modify(n, wt, true /* force */);
   }
 }

@@ -2607,11 +2841,9 @@
     if (res == NULL) return NULL;
   } else {
     FreeList* fl = &_indexedFreeList[word_sz];
-    bool filled = false; //TRAP
     if (fl->count() == 0) {
-      bool filled = true; //TRAP
       // Attempt to refill this local free list.
-      _cfls->par_get_chunk_of_blocks(word_sz, _blocks_to_claim, fl);
+      get_from_global_pool(word_sz, fl);
       // If it didn't work, give up.
       if (fl->count() == 0) return NULL;
     }
@@ -2626,80 +2858,190 @@
   return (HeapWord*)res;
 }

-void CFLS_LAB::retire() {
-  for (size_t i = CompactibleFreeListSpace::IndexSetStart;
+// Get a chunk of blocks of the right size and update related
+// book-keeping stats
+void CFLS_LAB::get_from_global_pool(size_t word_sz, FreeList* fl) {
+  // Get the #blocks we want to claim
+  size_t n_blks = (size_t)_blocks_to_claim[word_sz].average();
+  assert(n_blks > 0, "Error");
+  assert(ResizePLAB || n_blks == OldPLABSize, "Error");
+  // In some cases, when the application has a phase change,
+  // there may be a sudden and sharp shift in the object survival
+  // profile, and updating the counts at the end of a scavenge
+  // may not be quick enough, giving rise to large scavenge pauses
+  // during these phase changes. It is beneficial to detect such
+  // changes on-the-fly during a scavenge and avoid such a phase-change
+  // pothole. The following code is a heuristic attempt to do that.
+  // It is protected by a product flag until we have gained
+  // enough experience with this heuristic and fine-tuned its behaviour.
+  // WARNING: This might increase fragmentation if we overreact to
+  // small spikes, so some kind of historical smoothing based on
+  // previous experience with the greater reactivity might be useful.
+  // Lacking sufficient experience, CMSOldPLABResizeQuicker is disabled by
+  // default.
+  if (ResizeOldPLAB && CMSOldPLABResizeQuicker) {
+    size_t multiple = _num_blocks[word_sz]/(CMSOldPLABToleranceFactor*CMSOldPLABNumRefills*n_blks);
+    n_blks +=  CMSOldPLABReactivityFactor*multiple*n_blks;
+    n_blks = MIN2(n_blks, CMSOldPLABMax);
+  }
+  assert(n_blks > 0, "Error");
+  _cfls->par_get_chunk_of_blocks(word_sz, n_blks, fl);
+  // Update stats table entry for this block size
+  _num_blocks[word_sz] += fl->count();
+}
+
+void CFLS_LAB::compute_desired_plab_size() {
+  for (size_t i =  CompactibleFreeListSpace::IndexSetStart;
        i < CompactibleFreeListSpace::IndexSetSize;
        i += CompactibleFreeListSpace::IndexSetStride) {
-    if (_indexedFreeList[i].count() > 0) {
-      MutexLockerEx x(_cfls->_indexedFreeListParLocks[i],
-                      Mutex::_no_safepoint_check_flag);
-      _cfls->_indexedFreeList[i].prepend(&_indexedFreeList[i]);
-      // Reset this list.
-      _indexedFreeList[i] = FreeList();
-      _indexedFreeList[i].set_size(i);
+    assert((_global_num_workers[i] == 0) == (_global_num_blocks[i] == 0),
+           "Counter inconsistency");
+    if (_global_num_workers[i] > 0) {
+      // Need to smooth wrt historical average
+      if (ResizeOldPLAB) {
+        _blocks_to_claim[i].sample(
+          MAX2((size_t)CMSOldPLABMin,
+          MIN2((size_t)CMSOldPLABMax,
+               _global_num_blocks[i]/(_global_num_workers[i]*CMSOldPLABNumRefills))));
+      }
+      // Reset counters for next round
+      _global_num_workers[i] = 0;
+      _global_num_blocks[i] = 0;
+      if (PrintOldPLAB) {
+        gclog_or_tty->print_cr("[%d]: %d", i, (size_t)_blocks_to_claim[i].average());
+      }
     }
   }
 }

-void
-CompactibleFreeListSpace::
-par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl) {
+void CFLS_LAB::retire(int tid) {
+  // We run this single threaded with the world stopped;
+  // so no need for locks and such.
+#define CFLS_LAB_PARALLEL_ACCESS 0
+  NOT_PRODUCT(Thread* t = Thread::current();)
+  assert(Thread::current()->is_VM_thread(), "Error");
+  assert(CompactibleFreeListSpace::IndexSetStart == CompactibleFreeListSpace::IndexSetStride,
+         "Will access to uninitialized slot below");
+#if CFLS_LAB_PARALLEL_ACCESS
+  for (size_t i = CompactibleFreeListSpace::IndexSetSize - 1;
+       i > 0;
+       i -= CompactibleFreeListSpace::IndexSetStride) {
+#else // CFLS_LAB_PARALLEL_ACCESS
+  for (size_t i =  CompactibleFreeListSpace::IndexSetStart;
+       i < CompactibleFreeListSpace::IndexSetSize;
+       i += CompactibleFreeListSpace::IndexSetStride) {
+#endif // !CFLS_LAB_PARALLEL_ACCESS
+    assert(_num_blocks[i] >= (size_t)_indexedFreeList[i].count(),
+           "Can't retire more than what we obtained");
+    if (_num_blocks[i] > 0) {
+      size_t num_retire =  _indexedFreeList[i].count();
+      assert(_num_blocks[i] > num_retire, "Should have used at least one");
+      {
+#if CFLS_LAB_PARALLEL_ACCESS
+        MutexLockerEx x(_cfls->_indexedFreeListParLocks[i],
+                        Mutex::_no_safepoint_check_flag);
+#endif // CFLS_LAB_PARALLEL_ACCESS
+        // Update globals stats for num_blocks used
+        _global_num_blocks[i] += (_num_blocks[i] - num_retire);
+        _global_num_workers[i]++;
+        assert(_global_num_workers[i] <= (ssize_t)ParallelGCThreads, "Too big");
+        if (num_retire > 0) {
+          _cfls->_indexedFreeList[i].prepend(&_indexedFreeList[i]);
+          // Reset this list.
+          _indexedFreeList[i] = FreeList();
+          _indexedFreeList[i].set_size(i);
+        }
+      }
+      if (PrintOldPLAB) {
+        gclog_or_tty->print_cr("%d[%d]: %d/%d/%d",
+                               tid, i, num_retire, _num_blocks[i], (size_t)_blocks_to_claim[i].average());
+      }
+      // Reset stats for next round
+      _num_blocks[i]         = 0;
+    }
+  }
+}
+
+void CompactibleFreeListSpace:: par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl) {
   assert(fl->count() == 0, "Precondition.");
   assert(word_sz < CompactibleFreeListSpace::IndexSetSize,
          "Precondition");

-  // We'll try all multiples of word_sz in the indexed set (starting with
-  // word_sz itself), then try getting a big chunk and splitting it.
-  int k = 1;
-  size_t cur_sz = k * word_sz;
-  bool found = false;
-  while (cur_sz < CompactibleFreeListSpace::IndexSetSize && k == 1) {
-    FreeList* gfl = &_indexedFreeList[cur_sz];
-    FreeList fl_for_cur_sz;  // Empty.
-    fl_for_cur_sz.set_size(cur_sz);
-    {
-      MutexLockerEx x(_indexedFreeListParLocks[cur_sz],
-                      Mutex::_no_safepoint_check_flag);
-      if (gfl->count() != 0) {
-        size_t nn = MAX2(n/k, (size_t)1);
-        gfl->getFirstNChunksFromList(nn, &fl_for_cur_sz);
-        found = true;
+  // We'll try all multiples of word_sz in the indexed set, starting with
+  // word_sz itself and, if CMSSplitIndexedFreeListBlocks, try larger multiples,
+  // then try getting a big chunk and splitting it.
+  {
+    bool found;
+    int  k;
+    size_t cur_sz;
+    for (k = 1, cur_sz = k * word_sz, found = false;
+         (cur_sz < CompactibleFreeListSpace::IndexSetSize) &&
+         (CMSSplitIndexedFreeListBlocks || k <= 1);
+         k++, cur_sz = k * word_sz) {
+      FreeList* gfl = &_indexedFreeList[cur_sz];
+      FreeList fl_for_cur_sz;  // Empty.
+      fl_for_cur_sz.set_size(cur_sz);
+      {
+        MutexLockerEx x(_indexedFreeListParLocks[cur_sz],
+                        Mutex::_no_safepoint_check_flag);
+        if (gfl->count() != 0) {
+          // nn is the number of chunks of size cur_sz that
+          // we'd need to split k-ways each, in order to create
+          // "n" chunks of size word_sz each.
+          const size_t nn = MAX2(n/k, (size_t)1);
+          gfl->getFirstNChunksFromList(nn, &fl_for_cur_sz);
+          found = true;
+          if (k > 1) {
+            // Update split death stats for the cur_sz-size blocks list:
+            // we increment the split death count by the number of blocks
+            // we just took from the cur_sz-size blocks list and which
+            // we will be splitting below.
+            ssize_t deaths = _indexedFreeList[cur_sz].splitDeaths() +
+                             fl_for_cur_sz.count();
+            _indexedFreeList[cur_sz].set_splitDeaths(deaths);
+          }
+        }
+      }
+      // Now transfer fl_for_cur_sz to fl.  Common case, we hope, is k = 1.
+      if (found) {
+        if (k == 1) {
+          fl->prepend(&fl_for_cur_sz);
+        } else {
+          // Divide each block on fl_for_cur_sz up k ways.
+          FreeChunk* fc;
+          while ((fc = fl_for_cur_sz.getChunkAtHead()) != NULL) {
+            // Must do this in reverse order, so that anybody attempting to
+            // access the main chunk sees it as a single free block until we
+            // change it.
+            size_t fc_size = fc->size();
+            for (int i = k-1; i >= 0; i--) {
+              FreeChunk* ffc = (FreeChunk*)((HeapWord*)fc + i * word_sz);
+              ffc->setSize(word_sz);
+              ffc->linkNext(NULL);
+              ffc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
+              // Above must occur before BOT is updated below.
+              // splitting from the right, fc_size == (k - i + 1) * wordsize
+              _bt.mark_block((HeapWord*)ffc, word_sz);
+              fc_size -= word_sz;
+              _bt.verify_not_unallocated((HeapWord*)ffc, ffc->size());
+              _bt.verify_single_block((HeapWord*)fc, fc_size);
+              _bt.verify_single_block((HeapWord*)ffc, ffc->size());
+              // Push this on "fl".
+              fl->returnChunkAtHead(ffc);
+            }
+            // TRAP
+            assert(fl->tail()->next() == NULL, "List invariant.");
+          }
+        }
+        // Update birth stats for this block size.
+        size_t num = fl->count();
+        MutexLockerEx x(_indexedFreeListParLocks[word_sz],
+                        Mutex::_no_safepoint_check_flag);
+        ssize_t births = _indexedFreeList[word_sz].splitBirths() + num;
+        _indexedFreeList[word_sz].set_splitBirths(births);
+        return;
       }
     }
-    // Now transfer fl_for_cur_sz to fl.  Common case, we hope, is k = 1.
-    if (found) {
-      if (k == 1) {
-        fl->prepend(&fl_for_cur_sz);
-      } else {
-        // Divide each block on fl_for_cur_sz up k ways.
-        FreeChunk* fc;
-        while ((fc = fl_for_cur_sz.getChunkAtHead()) != NULL) {
-          // Must do this in reverse order, so that anybody attempting to
-          // access the main chunk sees it as a single free block until we
-          // change it.
-          size_t fc_size = fc->size();
-          for (int i = k-1; i >= 0; i--) {
-            FreeChunk* ffc = (FreeChunk*)((HeapWord*)fc + i * word_sz);
-            ffc->setSize(word_sz);
-            ffc->linkNext(NULL);
-            ffc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
-            // Above must occur before BOT is updated below.
-            // splitting from the right, fc_size == (k - i + 1) * wordsize
-            _bt.mark_block((HeapWord*)ffc, word_sz);
-            fc_size -= word_sz;
-            _bt.verify_not_unallocated((HeapWord*)ffc, ffc->size());
-            _bt.verify_single_block((HeapWord*)fc, fc_size);
-            _bt.verify_single_block((HeapWord*)ffc, ffc->size());
-            // Push this on "fl".
-            fl->returnChunkAtHead(ffc);
-          }
-          // TRAP
-          assert(fl->tail()->next() == NULL, "List invariant.");
-        }
-      }
-      return;
-    }
-    k++; cur_sz = k * word_sz;
   }
   // Otherwise, we'll split a block from the dictionary.
   FreeChunk* fc = NULL;
@@ -2723,17 +3065,31 @@
       }
     }
     if (fc == NULL) return;
+    assert((ssize_t)n >= 1, "Control point invariant");
     // Otherwise, split up that block.
-    size_t nn = fc->size() / word_sz;
+    const size_t nn = fc->size() / word_sz;
     n = MIN2(nn, n);
+    assert((ssize_t)n >= 1, "Control point invariant");
     rem = fc->size() - n * word_sz;
     // If there is a remainder, and it's too small, allocate one fewer.
     if (rem > 0 && rem < MinChunkSize) {
       n--; rem += word_sz;
     }
+    // Note that at this point we may have n == 0.
+    assert((ssize_t)n >= 0, "Control point invariant");
+
+    // If n is 0, the chunk fc that was found is not large
+    // enough to leave a viable remainder.  We are unable to
+    // allocate even one block.  Return fc to the
+    // dictionary and return, leaving "fl" empty.
+    if (n == 0) {
+      returnChunkToDictionary(fc);
+      return;
+    }
+
     // First return the remainder, if any.
     // Note that we hold the lock until we decide if we're going to give
-    // back the remainder to the dictionary, since a contending allocator
+    // back the remainder to the dictionary, since a concurrent allocation
     // may otherwise see the heap as empty.  (We're willing to take that
     // hit if the block is a small block.)
     if (rem > 0) {
@@ -2743,18 +3099,16 @@
       rem_fc->linkNext(NULL);
       rem_fc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
       // Above must occur before BOT is updated below.
+      assert((ssize_t)n > 0 && prefix_size > 0 && rem_fc > fc, "Error");
       _bt.split_block((HeapWord*)fc, fc->size(), prefix_size);
       if (rem >= IndexSetSize) {
         returnChunkToDictionary(rem_fc);
-        dictionary()->dictCensusUpdate(fc->size(),
-                                       true /*split*/,
-                                       true /*birth*/);
+        dictionary()->dictCensusUpdate(rem, true /*split*/, true /*birth*/);
         rem_fc = NULL;
       }
       // Otherwise, return it to the small list below.
     }
   }
-  //
   if (rem_fc != NULL) {
     MutexLockerEx x(_indexedFreeListParLocks[rem],
                     Mutex::_no_safepoint_check_flag);
@@ -2762,7 +3116,7 @@
     _indexedFreeList[rem].returnChunkAtHead(rem_fc);
     smallSplitBirth(rem);
   }
-
+  assert((ssize_t)n > 0 && fc != NULL, "Consistency");
   // Now do the splitting up.
   // Must do this in reverse order, so that anybody attempting to
   // access the main chunk sees it as a single free block until we
@@ -2792,13 +3146,15 @@
   _bt.verify_single_block((HeapWord*)fc, fc->size());
   fl->returnChunkAtHead(fc);

+  assert((ssize_t)n > 0 && (ssize_t)n == fl->count(), "Incorrect number of blocks");
   {
+    // Update the stats for this block size.
     MutexLockerEx x(_indexedFreeListParLocks[word_sz],
                     Mutex::_no_safepoint_check_flag);
-    ssize_t new_births = _indexedFreeList[word_sz].splitBirths() + n;
-    _indexedFreeList[word_sz].set_splitBirths(new_births);
-    ssize_t new_surplus = _indexedFreeList[word_sz].surplus() + n;
-    _indexedFreeList[word_sz].set_surplus(new_surplus);
+    const ssize_t births = _indexedFreeList[word_sz].splitBirths() + n;
+    _indexedFreeList[word_sz].set_splitBirths(births);
+    // ssize_t new_surplus = _indexedFreeList[word_sz].surplus() + n;
+    // _indexedFreeList[word_sz].set_surplus(new_surplus);
   }

   // TRAP
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Wed Jan 20 12:54:25 2010 -0800
@@ -25,8 +25,6 @@
 // Classes in support of keeping track of promotions into a non-Contiguous
 // space, in this case a CompactibleFreeListSpace.

-#define CFLS_LAB_REFILL_STATS 0
-
 // Forward declarations
 class CompactibleFreeListSpace;
 class BlkClosure;
@@ -89,6 +87,9 @@
     displacedHdr = (markOop*)&displacedHdr;
     nextSpoolBlock = NULL;
   }
+
+  void print_on(outputStream* st) const;
+  void print() const { print_on(gclog_or_tty); }
 };

 class PromotionInfo VALUE_OBJ_CLASS_SPEC {
@@ -121,7 +122,7 @@
     return _promoHead == NULL;
   }
   void startTrackingPromotions();
-  void stopTrackingPromotions();
+  void stopTrackingPromotions(uint worker_id = 0);
   bool tracking() const          { return _tracking;  }
   void track(PromotedObject* trackOop);      // keep track of a promoted oop
   // The following variant must be used when trackOop is not fully
@@ -161,6 +162,9 @@
     _nextIndex = 0;

   }
+
+  void print_on(outputStream* st) const;
+  void print_statistics(uint worker_id) const;
 };

 class LinearAllocBlock VALUE_OBJ_CLASS_SPEC {
@@ -243,6 +247,7 @@
   mutable Mutex _freelistLock;
   // locking verifier convenience function
   void assert_locked() const PRODUCT_RETURN;
+  void assert_locked(const Mutex* lock) const PRODUCT_RETURN;

   // Linear allocation blocks
   LinearAllocBlock _smallLinearAllocBlock;
@@ -281,13 +286,6 @@
   // Locks protecting the exact lists during par promotion allocation.
   Mutex* _indexedFreeListParLocks[IndexSetSize];

-#if CFLS_LAB_REFILL_STATS
-  // Some statistics.
-  jint  _par_get_chunk_from_small;
-  jint  _par_get_chunk_from_large;
-#endif
-
-
   // Attempt to obtain up to "n" blocks of the size "word_sz" (which is
   // required to be smaller than "IndexSetSize".)  If successful,
   // adds them to "fl", which is required to be an empty free list.
@@ -320,7 +318,7 @@
   // Helper function for getChunkFromIndexedFreeList.
   // Replenish the indexed free list for this "size".  Do not take from an
   // underpopulated size.
-  FreeChunk*  getChunkFromIndexedFreeListHelper(size_t size);
+  FreeChunk*  getChunkFromIndexedFreeListHelper(size_t size, bool replenish = true);

   // Get a chunk from the indexed free list.  If the indexed free list
   // does not have a free chunk, try to replenish the indexed free list
@@ -430,10 +428,6 @@
   void initialize_sequential_subtasks_for_marking(int n_threads,
          HeapWord* low = NULL);

-#if CFLS_LAB_REFILL_STATS
-  void print_par_alloc_stats();
-#endif
-
   // Space enquiries
   size_t used() const;
   size_t free() const;
@@ -617,6 +611,12 @@
   // Do some basic checks on the the free lists.
   void checkFreeListConsistency()         const PRODUCT_RETURN;

+  // Printing support
+  void dump_at_safepoint_with_locks(CMSCollector* c, outputStream* st);
+  void print_indexed_free_lists(outputStream* st) const;
+  void print_dictionary_free_lists(outputStream* st) const;
+  void print_promo_info_blocks(outputStream* st) const;
+
   NOT_PRODUCT (
     void initializeIndexedFreeListArrayReturnedBytes();
     size_t sumIndexedFreeListArrayReturnedBytes();
@@ -638,8 +638,9 @@

   // Statistics functions
   // Initialize census for lists before the sweep.
-  void beginSweepFLCensus(float sweep_current,
-                          float sweep_estimate);
+  void beginSweepFLCensus(float inter_sweep_current,
+                          float inter_sweep_estimate,
+                          float intra_sweep_estimate);
   // Set the surplus for each of the free lists.
   void setFLSurplus();
   // Set the hint for each of the free lists.
@@ -730,16 +731,17 @@
   FreeList _indexedFreeList[CompactibleFreeListSpace::IndexSetSize];

   // Initialized from a command-line arg.
-  size_t _blocks_to_claim;

-#if CFLS_LAB_REFILL_STATS
-  // Some statistics.
-  int _refills;
-  int _blocksTaken;
-  static int _tot_refills;
-  static int _tot_blocksTaken;
-  static int _next_threshold;
-#endif
+  // Allocation statistics in support of dynamic adjustment of
+  // #blocks to claim per get_from_global_pool() call below.
+  static AdaptiveWeightedAverage
+                 _blocks_to_claim  [CompactibleFreeListSpace::IndexSetSize];
+  static size_t _global_num_blocks [CompactibleFreeListSpace::IndexSetSize];
+  static int    _global_num_workers[CompactibleFreeListSpace::IndexSetSize];
+  size_t        _num_blocks        [CompactibleFreeListSpace::IndexSetSize];
+
+  // Internal work method
+  void get_from_global_pool(size_t word_sz, FreeList* fl);

 public:
   CFLS_LAB(CompactibleFreeListSpace* cfls);
@@ -748,7 +750,12 @@
   HeapWord* alloc(size_t word_sz);

   // Return any unused portions of the buffer to the global pool.
-  void retire();
+  void retire(int tid);
+
+  // Dynamic OldPLABSize sizing
+  static void compute_desired_plab_size();
+  // When the settings are modified from default static initialization
+  static void modify_initialization(size_t n, unsigned wt);
 };

 size_t PromotionInfo::refillSize() const {
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -253,7 +253,6 @@
   }
 }

-
 void ConcurrentMarkSweepGeneration::ref_processor_init() {
   assert(collector() != NULL, "no collector");
   collector()->ref_processor_init();
@@ -341,6 +340,14 @@
   _icms_duty_cycle = CMSIncrementalDutyCycle;
 }

+double CMSStats::cms_free_adjustment_factor(size_t free) const {
+  // TBD: CR 6909490
+  return 1.0;
+}
+
+void CMSStats::adjust_cms_free_adjustment_factor(bool fail, size_t free) {
+}
+
 // If promotion failure handling is on use
 // the padded average size of the promotion for each
 // young generation collection.
@@ -361,7 +368,11 @@

     // Adjust by the safety factor.
     double cms_free_dbl = (double)cms_free;
-    cms_free_dbl = cms_free_dbl * (100.0 - CMSIncrementalSafetyFactor) / 100.0;
+    double cms_adjustment = (100.0 - CMSIncrementalSafetyFactor)/100.0;
+    // Apply a further correction factor which tries to adjust
+    // for recent occurance of concurrent mode failures.
+    cms_adjustment = cms_adjustment * cms_free_adjustment_factor(cms_free);
+    cms_free_dbl = cms_free_dbl * cms_adjustment;

     if (PrintGCDetails && Verbose) {
       gclog_or_tty->print_cr("CMSStats::time_until_cms_gen_full: cms_free "
@@ -395,6 +406,8 @@
   // late.
   double work = cms_duration() + gc0_period();
   double deadline = time_until_cms_gen_full();
+  // If a concurrent mode failure occurred recently, we want to be
+  // more conservative and halve our expected time_until_cms_gen_full()
   if (work > deadline) {
     if (Verbose && PrintGCDetails) {
       gclog_or_tty->print(
@@ -556,7 +569,8 @@
   _should_unload_classes(false),
   _concurrent_cycles_since_last_unload(0),
   _roots_scanning_options(0),
-  _sweep_estimate(CMS_SweepWeight, CMS_SweepPadding)
+  _inter_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding),
+  _intra_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding)
 {
   if (ExplicitGCInvokesConcurrentAndUnloadsClasses) {
     ExplicitGCInvokesConcurrent = true;
@@ -773,7 +787,7 @@
   NOT_PRODUCT(_overflow_counter = CMSMarkStackOverflowInterval;)
   _gc_counters = new CollectorCounters("CMS", 1);
   _completed_initialization = true;
-  _sweep_timer.start();  // start of time
+  _inter_sweep_timer.start();  // start of time
 }

 const char* ConcurrentMarkSweepGeneration::name() const {
@@ -900,6 +914,14 @@
   return result;
 }

+// At a promotion failure dump information on block layout in heap
+// (cms old generation).
+void ConcurrentMarkSweepGeneration::promotion_failure_occurred() {
+  if (CMSDumpAtPromotionFailure) {
+    cmsSpace()->dump_at_safepoint_with_locks(collector(), gclog_or_tty);
+  }
+}
+
 CompactibleSpace*
 ConcurrentMarkSweepGeneration::first_compaction_space() const {
   return _cmsSpace;
@@ -1368,12 +1390,7 @@
 ConcurrentMarkSweepGeneration::
 par_promote_alloc_done(int thread_num) {
   CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
-  ps->lab.retire();
-#if CFLS_LAB_REFILL_STATS
-  if (thread_num == 0) {
-    _cmsSpace->print_par_alloc_stats();
-  }
-#endif
+  ps->lab.retire(thread_num);
 }

 void
@@ -1974,11 +1991,14 @@
   // We must adjust the allocation statistics being maintained
   // in the free list space. We do so by reading and clearing
   // the sweep timer and updating the block flux rate estimates below.
-  assert(_sweep_timer.is_active(), "We should never see the timer inactive");
-  _sweep_timer.stop();
-  // Note that we do not use this sample to update the _sweep_estimate.
-  _cmsGen->cmsSpace()->beginSweepFLCensus((float)(_sweep_timer.seconds()),
-                                          _sweep_estimate.padded_average());
+  assert(!_intra_sweep_timer.is_active(), "_intra_sweep_timer should be inactive");
+  if (_inter_sweep_timer.is_active()) {
+    _inter_sweep_timer.stop();
+    // Note that we do not use this sample to update the _inter_sweep_estimate.
+    _cmsGen->cmsSpace()->beginSweepFLCensus((float)(_inter_sweep_timer.seconds()),
+                                            _inter_sweep_estimate.padded_average(),
+                                            _intra_sweep_estimate.padded_average());
+  }

   GenMarkSweep::invoke_at_safepoint(_cmsGen->level(),
     ref_processor(), clear_all_soft_refs);
@@ -2015,10 +2035,10 @@
   }

   // Adjust the per-size allocation stats for the next epoch.
-  _cmsGen->cmsSpace()->endSweepFLCensus(sweepCount() /* fake */);
-  // Restart the "sweep timer" for next epoch.
-  _sweep_timer.reset();
-  _sweep_timer.start();
+  _cmsGen->cmsSpace()->endSweepFLCensus(sweep_count() /* fake */);
+  // Restart the "inter sweep timer" for the next epoch.
+  _inter_sweep_timer.reset();
+  _inter_sweep_timer.start();

   // Sample collection pause time and reset for collection interval.
   if (UseAdaptiveSizePolicy) {
@@ -2676,7 +2696,7 @@
   // Also reset promotion tracking in par gc thread states.
   if (ParallelGCThreads > 0) {
     for (uint i = 0; i < ParallelGCThreads; i++) {
-      _par_gc_thread_states[i]->promo.stopTrackingPromotions();
+      _par_gc_thread_states[i]->promo.stopTrackingPromotions(i);
     }
   }
 }
@@ -2771,7 +2791,7 @@
   bool do_bit(size_t offset) {
     HeapWord* addr = _marks->offsetToHeapWord(offset);
     if (!_marks->isMarked(addr)) {
-      oop(addr)->print();
+      oop(addr)->print_on(gclog_or_tty);
       gclog_or_tty->print_cr(" ("INTPTR_FORMAT" should have been marked)", addr);
       _failed = true;
     }
@@ -2820,7 +2840,7 @@
   // Clear any marks from a previous round
   verification_mark_bm()->clear_all();
   assert(verification_mark_stack()->isEmpty(), "markStack should be empty");
-  assert(overflow_list_is_empty(), "overflow list should be empty");
+  verify_work_stacks_empty();

   GenCollectedHeap* gch = GenCollectedHeap::heap();
   gch->ensure_parsability(false);  // fill TLABs, but no need to retire them
@@ -2893,8 +2913,8 @@
   verification_mark_bm()->iterate(&vcl);
   if (vcl.failed()) {
     gclog_or_tty->print("Verification failed");
-    Universe::heap()->print();
-    fatal(" ... aborting");
+    Universe::heap()->print_on(gclog_or_tty);
+    fatal("CMS: failed marking verification after remark");
   }
 }

@@ -3314,7 +3334,7 @@
     Universe::heap()->barrier_set()->resize_covered_region(mr);
     // Hmmmm... why doesn't CFLS::set_end verify locking?
     // This is quite ugly; FIX ME XXX
-    _cmsSpace->assert_locked();
+    _cmsSpace->assert_locked(freelistLock());
     _cmsSpace->set_end((HeapWord*)_virtual_space.high());

     // update the space and generation capacity counters
@@ -5868,9 +5888,9 @@
   check_correct_thread_executing();
   verify_work_stacks_empty();
   verify_overflow_empty();
-  incrementSweepCount();
-  _sweep_timer.stop();
-  _sweep_estimate.sample(_sweep_timer.seconds());
+  increment_sweep_count();
+  _inter_sweep_timer.stop();
+  _inter_sweep_estimate.sample(_inter_sweep_timer.seconds());
   size_policy()->avg_cms_free_at_sweep()->sample(_cmsGen->free());

   // PermGen verification support: If perm gen sweeping is disabled in
@@ -5893,6 +5913,9 @@
     }
   }

+  assert(!_intra_sweep_timer.is_active(), "Should not be active");
+  _intra_sweep_timer.reset();
+  _intra_sweep_timer.start();
   if (asynch) {
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
     CMSPhaseAccounting pa(this, "sweep", !PrintGCDetails);
@@ -5937,8 +5960,11 @@
   verify_work_stacks_empty();
   verify_overflow_empty();

-  _sweep_timer.reset();
-  _sweep_timer.start();
+  _intra_sweep_timer.stop();
+  _intra_sweep_estimate.sample(_intra_sweep_timer.seconds());
+
+  _inter_sweep_timer.reset();
+  _inter_sweep_timer.start();

   update_time_of_last_gc(os::javaTimeMillis());

@@ -5981,11 +6007,11 @@
 // FIX ME!!! Looks like this belongs in CFLSpace, with
 // CMSGen merely delegating to it.
 void ConcurrentMarkSweepGeneration::setNearLargestChunk() {
-  double nearLargestPercent = 0.999;
+  double nearLargestPercent = FLSLargestBlockCoalesceProximity;
   HeapWord*  minAddr        = _cmsSpace->bottom();
   HeapWord*  largestAddr    =
     (HeapWord*) _cmsSpace->dictionary()->findLargestDict();
-  if (largestAddr == 0) {
+  if (largestAddr == NULL) {
     // The dictionary appears to be empty.  In this case
     // try to coalesce at the end of the heap.
     largestAddr = _cmsSpace->end();
@@ -5993,6 +6019,13 @@
   size_t largestOffset     = pointer_delta(largestAddr, minAddr);
   size_t nearLargestOffset =
     (size_t)((double)largestOffset * nearLargestPercent) - MinChunkSize;
+  if (PrintFLSStatistics != 0) {
+    gclog_or_tty->print_cr(
+      "CMS: Large Block: " PTR_FORMAT ";"
+      " Proximity: " PTR_FORMAT " -> " PTR_FORMAT,
+      largestAddr,
+      _cmsSpace->nearLargestChunk(), minAddr + nearLargestOffset);
+  }
   _cmsSpace->set_nearLargestChunk(minAddr + nearLargestOffset);
 }

@@ -6072,9 +6105,11 @@
   assert_lock_strong(gen->freelistLock());
   assert_lock_strong(bitMapLock());

-  assert(!_sweep_timer.is_active(), "Was switched off in an outer context");
-  gen->cmsSpace()->beginSweepFLCensus((float)(_sweep_timer.seconds()),
-                                      _sweep_estimate.padded_average());
+  assert(!_inter_sweep_timer.is_active(), "Was switched off in an outer context");
+  assert(_intra_sweep_timer.is_active(),  "Was switched on  in an outer context");
+  gen->cmsSpace()->beginSweepFLCensus((float)(_inter_sweep_timer.seconds()),
+                                      _inter_sweep_estimate.padded_average(),
+                                      _intra_sweep_estimate.padded_average());
   gen->setNearLargestChunk();

   {
@@ -6087,7 +6122,7 @@
     // end-of-sweep-census below will be off by a little bit.
   }
   gen->cmsSpace()->sweep_completed();
-  gen->cmsSpace()->endSweepFLCensus(sweepCount());
+  gen->cmsSpace()->endSweepFLCensus(sweep_count());
   if (should_unload_classes()) {                // unloaded classes this cycle,
     _concurrent_cycles_since_last_unload = 0;   // ... reset count
   } else {                                      // did not unload classes,
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Wed Jan 20 12:54:25 2010 -0800
@@ -355,6 +355,11 @@
                                              unsigned int new_duty_cycle);
   unsigned int icms_update_duty_cycle_impl();

+  // In support of adjusting of cms trigger ratios based on history
+  // of concurrent mode failure.
+  double cms_free_adjustment_factor(size_t free) const;
+  void   adjust_cms_free_adjustment_factor(bool fail, size_t free);
+
  public:
   CMSStats(ConcurrentMarkSweepGeneration* cms_gen,
            unsigned int alpha = CMSExpAvgFactor);
@@ -570,8 +575,11 @@
   // appropriately.
   void check_gc_time_limit();
   // XXX Move these to CMSStats ??? FIX ME !!!
-  elapsedTimer _sweep_timer;
-  AdaptivePaddedAverage _sweep_estimate;
+  elapsedTimer _inter_sweep_timer;   // time between sweeps
+  elapsedTimer _intra_sweep_timer;   // time _in_ sweeps
+  // padded decaying average estimates of the above
+  AdaptivePaddedAverage _inter_sweep_estimate;
+  AdaptivePaddedAverage _intra_sweep_estimate;

  protected:
   ConcurrentMarkSweepGeneration* _cmsGen;  // old gen (CMS)
@@ -625,6 +633,7 @@
   // . _collectorState <= Idling ==  post-sweep && pre-mark
   // . _collectorState in (Idling, Sweeping) == {initial,final}marking ||
   //                                            precleaning || abortablePrecleanb
+ public:
   enum CollectorState {
     Resizing            = 0,
     Resetting           = 1,
@@ -636,6 +645,7 @@
     FinalMarking        = 7,
     Sweeping            = 8
   };
+ protected:
   static CollectorState _collectorState;

   // State related to prologue/epilogue invocation for my generations
@@ -655,7 +665,7 @@

   int    _numYields;
   size_t _numDirtyCards;
-  uint   _sweepCount;
+  size_t _sweep_count;
   // number of full gc's since the last concurrent gc.
   uint   _full_gcs_since_conc_gc;

@@ -905,7 +915,7 @@

   // Check that the currently executing thread is the expected
   // one (foreground collector or background collector).
-  void check_correct_thread_executing()        PRODUCT_RETURN;
+  static void check_correct_thread_executing() PRODUCT_RETURN;
   // XXXPERM void print_statistics()           PRODUCT_RETURN;

   bool is_cms_reachable(HeapWord* addr);
@@ -930,8 +940,8 @@
   static void set_foregroundGCShouldWait(bool v) { _foregroundGCShouldWait = v; }
   static bool foregroundGCIsActive() { return _foregroundGCIsActive; }
   static void set_foregroundGCIsActive(bool v) { _foregroundGCIsActive = v; }
-  uint  sweepCount() const             { return _sweepCount; }
-  void incrementSweepCount()           { _sweepCount++; }
+  size_t sweep_count() const             { return _sweep_count; }
+  void   increment_sweep_count()         { _sweep_count++; }

   // Timers/stats for gc scheduling and incremental mode pacing.
   CMSStats& stats() { return _stats; }
@@ -1165,6 +1175,11 @@
   virtual bool promotion_attempt_is_safe(size_t promotion_in_bytes,
     bool younger_handles_promotion_failure) const;

+  // Inform this (non-young) generation that a promotion failure was
+  // encountered during a collection of a younger generation that
+  // promotes into this generation.
+  virtual void promotion_failure_occurred();
+
   bool should_collect(bool full, size_t size, bool tlab);
   virtual bool should_concurrent_collect() const;
   virtual bool is_too_full() const;
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp	Wed Jan 20 12:54:25 2010 -0800
@@ -55,7 +55,8 @@
   virtual void       dictCensusUpdate(size_t size, bool split, bool birth) = 0;
   virtual bool       coalDictOverPopulated(size_t size) = 0;
   virtual void       beginSweepDictCensus(double coalSurplusPercent,
-                       float sweep_current, float sweep_ewstimate) = 0;
+                       float inter_sweep_current, float inter_sweep_estimate,
+                       float intra__sweep_current) = 0;
   virtual void       endSweepDictCensus(double splitSurplusPercent) = 0;
   virtual FreeChunk* findLargestDict() const = 0;
   // verify that the given chunk is in the dictionary.
@@ -79,6 +80,7 @@
   }

   virtual void       printDictCensus() const = 0;
+  virtual void       print_free_lists(outputStream* st) const = 0;

   virtual void       verify()         const = 0;
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -67,3 +67,8 @@
   }
 }
 #endif
+
+void FreeChunk::print_on(outputStream* st) {
+  st->print_cr("Next: " PTR_FORMAT " Prev: " PTR_FORMAT " %s",
+    next(), prev(), cantCoalesce() ? "[can't coalesce]" : "");
+}
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp	Wed Jan 20 12:54:25 2010 -0800
@@ -129,6 +129,8 @@
   void verifyList()         const PRODUCT_RETURN;
   void mangleAllocated(size_t size) PRODUCT_RETURN;
   void mangleFreed(size_t size)     PRODUCT_RETURN;
+
+  void print_on(outputStream* st);
 };

 // Alignment helpers etc.
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -81,8 +81,8 @@
   set_hint(hint);
 }

-void FreeList::init_statistics() {
-  _allocation_stats.initialize();
+void FreeList::init_statistics(bool split_birth) {
+  _allocation_stats.initialize(split_birth);
 }

 FreeChunk* FreeList::getChunkAtHead() {
@@ -292,14 +292,31 @@
 }

 #ifndef PRODUCT
+void FreeList::verify_stats() const {
+  // The +1 of the LH comparand is to allow some "looseness" in
+  // checking: we usually call this interface when adding a block
+  // and we'll subsequently update the stats; we cannot update the
+  // stats beforehand because in the case of the large-block BT
+  // dictionary for example, this might be the first block and
+  // in that case there would be no place that we could record
+  // the stats (which are kept in the block itself).
+  assert(_allocation_stats.prevSweep() + _allocation_stats.splitBirths() + 1   // Total Stock + 1
+          >= _allocation_stats.splitDeaths() + (ssize_t)count(), "Conservation Principle");
+}
+
 void FreeList::assert_proper_lock_protection_work() const {
-#ifdef ASSERT
-  if (_protecting_lock != NULL &&
-      SharedHeap::heap()->n_par_threads() > 0) {
-    // Should become an assert.
-    guarantee(_protecting_lock->owned_by_self(), "FreeList RACE DETECTED");
+  assert(_protecting_lock != NULL, "Don't call this directly");
+  assert(ParallelGCThreads > 0, "Don't call this directly");
+  Thread* thr = Thread::current();
+  if (thr->is_VM_thread() || thr->is_ConcurrentGC_thread()) {
+    // assert that we are holding the freelist lock
+  } else if (thr->is_GC_task_thread()) {
+    assert(_protecting_lock->owned_by_self(), "FreeList RACE DETECTED");
+  } else if (thr->is_Java_thread()) {
+    assert(!SafepointSynchronize::is_at_safepoint(), "Should not be executing");
+  } else {
+    ShouldNotReachHere();  // unaccounted thread type?
   }
-#endif
 }
 #endif
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.hpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.hpp	Wed Jan 20 12:54:25 2010 -0800
@@ -35,18 +35,26 @@
 // for that implementation.

 class Mutex;
+class TreeList;

 class FreeList VALUE_OBJ_CLASS_SPEC {
   friend class CompactibleFreeListSpace;
   friend class VMStructs;
-  friend class printTreeCensusClosure;
-  FreeChunk*    _head;          // List of free chunks
+  friend class PrintTreeCensusClosure;
+
+ protected:
+  TreeList* _parent;
+  TreeList* _left;
+  TreeList* _right;
+
+ private:
+  FreeChunk*    _head;          // Head of list of free chunks
   FreeChunk*    _tail;          // Tail of list of free chunks
-  size_t        _size;          // Size in Heap words of each chunks
+  size_t        _size;          // Size in Heap words of each chunk
   ssize_t       _count;         // Number of entries in list
   size_t        _hint;          // next larger size list with a positive surplus

-  AllocationStats _allocation_stats;            // statistics for smart allocation
+  AllocationStats _allocation_stats; // allocation-related statistics

 #ifdef ASSERT
   Mutex*        _protecting_lock;
@@ -63,9 +71,12 @@

   // Initialize the allocation statistics.
  protected:
-  void init_statistics();
+  void init_statistics(bool split_birth = false);
   void set_count(ssize_t v) { _count = v;}
-  void increment_count()    { _count++; }
+  void increment_count()    {
+    _count++;
+  }
+
   void decrement_count() {
     _count--;
     assert(_count >= 0, "Count should not be negative");
@@ -167,11 +178,13 @@
     _allocation_stats.set_desired(v);
   }
   void compute_desired(float inter_sweep_current,
-                       float inter_sweep_estimate) {
+                       float inter_sweep_estimate,
+                       float intra_sweep_estimate) {
     assert_proper_lock_protection();
     _allocation_stats.compute_desired(_count,
                                       inter_sweep_current,
-                                      inter_sweep_estimate);
+                                      inter_sweep_estimate,
+                                      intra_sweep_estimate);
   }
   ssize_t coalDesired() const {
     return _allocation_stats.coalDesired();
@@ -306,6 +319,9 @@
   // found.  Return NULL if "fc" is not found.
   bool verifyChunkInFreeLists(FreeChunk* fc) const;

+  // Stats verification
+  void verify_stats() const PRODUCT_RETURN;
+
   // Printing support
   static void print_labels_on(outputStream* st, const char* c);
   void print_on(outputStream* st, const char* c = NULL) const;
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -1441,6 +1441,7 @@
 }

 jint G1CollectedHeap::initialize() {
+  CollectedHeap::pre_initialize();
   os::enable_vtime();

   // Necessary to satisfy locking discipline assertions.
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Jan 20 12:54:25 2010 -0800
@@ -1007,6 +1007,10 @@
     return true;
   }

+  virtual bool card_mark_must_follow_store() const {
+    return true;
+  }
+
   bool is_in_young(oop obj) {
     HeapRegion* hr = heap_region_containing(obj);
     return hr != NULL && hr->is_young();
--- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -73,7 +73,12 @@

 void PtrQueue::locking_enqueue_completed_buffer(void** buf) {
   assert(_lock->owned_by_self(), "Required.");
+
+  // We have to unlock _lock (which may be Shared_DirtyCardQ_lock) before
+  // we acquire DirtyCardQ_CBL_mon inside enqeue_complete_buffer as they
+  // have the same rank and we may get the "possible deadlock" message
   _lock->unlock();
+
   qset()->enqueue_complete_buffer(buf);
   // We must relock only because the caller will unlock, for the normal
   // case.
@@ -140,7 +145,36 @@
   // holding the lock if there is one).
   if (_buf != NULL) {
     if (_lock) {
-      locking_enqueue_completed_buffer(_buf);
+      assert(_lock->owned_by_self(), "Required.");
+
+      // The current PtrQ may be the shared dirty card queue and
+      // may be being manipulated by more than one worker thread
+      // during a pause. Since the enqueuing of the completed
+      // buffer unlocks the Shared_DirtyCardQ_lock more than one
+      // worker thread can 'race' on reading the shared queue attributes
+      // (_buf and _index) and multiple threads can call into this
+      // routine for the same buffer. This will cause the completed
+      // buffer to be added to the CBL multiple times.
+
+      // We "claim" the current buffer by caching value of _buf in
+      // a local and clearing the field while holding _lock. When
+      // _lock is released (while enqueueing the completed buffer)
+      // the thread that acquires _lock will skip this code,
+      // preventing the subsequent the multiple enqueue, and
+      // install a newly allocated buffer below.
+
+      void** buf = _buf;   // local pointer to completed buffer
+      _buf = NULL;         // clear shared _buf field
+
+      locking_enqueue_completed_buffer(buf);  // enqueue completed buffer
+
+      // While the current thread was enqueuing the buffer another thread
+      // may have a allocated a new buffer and inserted it into this pointer
+      // queue. If that happens then we just return so that the current
+      // thread doesn't overwrite the buffer allocated by the other thread
+      // and potentially losing some dirtied cards.
+
+      if (_buf != NULL) return;
     } else {
       if (qset()->process_or_enqueue_complete_buffer(_buf)) {
         // Recycle the buffer. No allocation.
--- a/src/share/vm/gc_implementation/g1/ptrQueue.inline.hpp	Tue Jan 19 15:54:42 2010 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-/*
- * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- */
-
-void PtrQueue::handle_zero_index() {
-  assert(0 == _index, "Precondition.");
-  // This thread records the full buffer and allocates a new one (while
-  // holding the lock if there is one).
-  void** buf = _buf;
-  _buf = qset()->allocate_buffer();
-  _sz = qset()->buffer_size();
-  _index = _sz;
-  assert(0 <= _index && _index <= _sz, "Invariant.");
-  if (buf != NULL) {
-    if (_lock) {
-      locking_enqueue_completed_buffer(buf);
-    } else {
-      qset()->enqueue_complete_buffer(buf);
-    }
-  }
-}
--- a/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep	Wed Jan 20 12:54:25 2010 -0800
@@ -221,6 +221,7 @@
 freeList.cpp                            globals.hpp
 freeList.cpp                            mutex.hpp
 freeList.cpp                            sharedHeap.hpp
+freeList.cpp                            vmThread.hpp

 freeList.hpp                            allocationStats.hpp
--- a/src/share/vm/gc_implementation/includeDB_gc_serial	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/includeDB_gc_serial	Wed Jan 20 12:54:25 2010 -0800
@@ -71,6 +71,7 @@
 gcUtil.hpp                              allocation.hpp
 gcUtil.hpp                              debug.hpp
 gcUtil.hpp                              globalDefinitions.hpp
+gcUtil.hpp                              ostream.hpp
 gcUtil.hpp				timer.hpp

 generationCounters.cpp                  generationCounters.hpp
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -50,6 +50,7 @@
                       work_queue_set_, &term_),
   _is_alive_closure(gen_), _scan_weak_ref_closure(gen_, this),
   _keep_alive_closure(&_scan_weak_ref_closure),
+  _promotion_failure_size(0),
   _pushes(0), _pops(0), _steals(0), _steal_attempts(0), _term_attempts(0),
   _strong_roots_time(0.0), _term_time(0.0)
 {
@@ -249,6 +250,16 @@
   }
 }

+void ParScanThreadState::print_and_clear_promotion_failure_size() {
+  if (_promotion_failure_size != 0) {
+    if (PrintPromotionFailure) {
+      gclog_or_tty->print(" (%d: promotion failure size = " SIZE_FORMAT ") ",
+        _thread_num, _promotion_failure_size);
+    }
+    _promotion_failure_size = 0;
+  }
+}
+
 class ParScanThreadStateSet: private ResourceArray {
 public:
   // Initializes states for the specified number of threads;
@@ -260,11 +271,11 @@
                         GrowableArray<oop>**    overflow_stacks_,
                         size_t                  desired_plab_sz,
                         ParallelTaskTerminator& term);
-  inline ParScanThreadState& thread_sate(int i);
+  inline ParScanThreadState& thread_state(int i);
   int pushes() { return _pushes; }
   int pops()   { return _pops; }
   int steals() { return _steals; }
-  void reset();
+  void reset(bool promotion_failed);
   void flush();
 private:
   ParallelTaskTerminator& _term;
@@ -295,22 +306,31 @@
   }
 }

-inline ParScanThreadState& ParScanThreadStateSet::thread_sate(int i)
+inline ParScanThreadState& ParScanThreadStateSet::thread_state(int i)
 {
   assert(i >= 0 && i < length(), "sanity check!");
   return ((ParScanThreadState*)_data)[i];
 }


-void ParScanThreadStateSet::reset()
+void ParScanThreadStateSet::reset(bool promotion_failed)
 {
   _term.reset_for_reuse();
+  if (promotion_failed) {
+    for (int i = 0; i < length(); ++i) {
+      thread_state(i).print_and_clear_promotion_failure_size();
+    }
+  }
 }

 void ParScanThreadStateSet::flush()
 {
+  // Work in this loop should be kept as lightweight as
+  // possible since this might otherwise become a bottleneck
+  // to scaling. Should we add heavy-weight work into this
+  // loop, consider parallelizing the loop into the worker threads.
   for (int i = 0; i < length(); ++i) {
-    ParScanThreadState& par_scan_state = thread_sate(i);
+    ParScanThreadState& par_scan_state = thread_state(i);

     // Flush stats related to To-space PLAB activity and
     // retire the last buffer.
@@ -362,6 +382,14 @@
       }
     }
   }
+  if (UseConcMarkSweepGC && ParallelGCThreads > 0) {
+    // We need to call this even when ResizeOldPLAB is disabled
+    // so as to avoid breaking some asserts. While we may be able
+    // to avoid this by reorganizing the code a bit, I am loathe
+    // to do that unless we find cases where ergo leads to bad
+    // performance.
+    CFLS_LAB::compute_desired_plab_size();
+  }
 }

 ParScanClosure::ParScanClosure(ParNewGeneration* g,
@@ -475,7 +503,7 @@

   Generation* old_gen = gch->next_gen(_gen);

-  ParScanThreadState& par_scan_state = _state_set->thread_sate(i);
+  ParScanThreadState& par_scan_state = _state_set->thread_state(i);
   par_scan_state.set_young_old_boundary(_young_old_boundary);

   par_scan_state.start_strong_roots();
@@ -659,7 +687,7 @@
 {
   ResourceMark rm;
   HandleMark hm;
-  ParScanThreadState& par_scan_state = _state_set.thread_sate(i);
+  ParScanThreadState& par_scan_state = _state_set.thread_state(i);
   par_scan_state.set_young_old_boundary(_young_old_boundary);
   _task.work(i, par_scan_state.is_alive_closure(),
              par_scan_state.keep_alive_closure(),
@@ -693,7 +721,7 @@
   ParNewRefProcTaskProxy rp_task(task, _generation, *_generation.next_gen(),
                                  _generation.reserved().end(), _state_set);
   workers->run_task(&rp_task);
-  _state_set.reset();
+  _state_set.reset(_generation.promotion_failed());
 }

 void ParNewRefProcTaskExecutor::execute(EnqueueTask& task)
@@ -813,7 +841,7 @@
     GenCollectedHeap::StrongRootsScope srs(gch);
     tsk.work(0);
   }
-  thread_state_set.reset();
+  thread_state_set.reset(promotion_failed());

   if (PAR_STATS_ENABLED && ParallelGCVerbose) {
     gclog_or_tty->print("Thread totals:\n"
@@ -882,6 +910,8 @@
     swap_spaces();  // Make life simpler for CMS || rescan; see 6483690.
     from()->set_next_compaction_space(to());
     gch->set_incremental_collection_will_fail();
+    // Inform the next generation that a promotion failure occurred.
+    _next_gen->promotion_failure_occurred();

     // Reset the PromotionFailureALot counters.
     NOT_PRODUCT(Universe::heap()->reset_promotion_should_fail();)
@@ -1029,6 +1059,8 @@
       new_obj = old;

       preserve_mark_if_necessary(old, m);
+      // Log the size of the maiden promotion failure
+      par_scan_state->log_promotion_failure(sz);
     }

     old->forward_to(new_obj);
@@ -1150,6 +1182,8 @@
       failed_to_promote = true;

       preserve_mark_if_necessary(old, m);
+      // Log the size of the maiden promotion failure
+      par_scan_state->log_promotion_failure(sz);
     }
   } else {
     // Is in to-space; do copying ourselves.
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Wed Jan 20 12:54:25 2010 -0800
@@ -97,6 +97,9 @@
   int _pushes, _pops, _steals, _steal_attempts, _term_attempts;
   int _overflow_pushes, _overflow_refills, _overflow_refill_objs;

+  // Stats for promotion failure
+  size_t _promotion_failure_size;
+
   // Timing numbers.
   double _start;
   double _start_strong_roots;
@@ -169,6 +172,15 @@
   // Undo the most recent allocation ("obj", of "word_sz").
   void undo_alloc_in_to_space(HeapWord* obj, size_t word_sz);

+  // Promotion failure stats
+  size_t promotion_failure_size() { return promotion_failure_size(); }
+  void log_promotion_failure(size_t sz) {
+    if (_promotion_failure_size == 0) {
+      _promotion_failure_size = sz;
+    }
+  }
+  void print_and_clear_promotion_failure_size();
+
   int pushes() { return _pushes; }
   int pops()   { return _pops; }
   int steals() { return _steals; }
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -51,6 +51,8 @@
 }

 jint ParallelScavengeHeap::initialize() {
+  CollectedHeap::pre_initialize();
+
   // Cannot be initialized until after the flags are parsed
   GenerationSizer flag_parser;

@@ -717,10 +719,6 @@
   return young_gen()->allocate(size, true);
 }

-void ParallelScavengeHeap::fill_all_tlabs(bool retire) {
-  CollectedHeap::fill_all_tlabs(retire);
-}
-
 void ParallelScavengeHeap::accumulate_statistics_all_tlabs() {
   CollectedHeap::accumulate_statistics_all_tlabs();
 }
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Wed Jan 20 12:54:25 2010 -0800
@@ -54,7 +54,6 @@
  protected:
   static inline size_t total_invocations();
   HeapWord* allocate_new_tlab(size_t size);
-  void fill_all_tlabs(bool retire);

  public:
   ParallelScavengeHeap() : CollectedHeap() {
@@ -191,6 +190,10 @@
     return true;
   }

+  virtual bool card_mark_must_follow_store() const {
+    return false;
+  }
+
   // Return true if we don't we need a store barrier for
   // initializing stores to an object at this address.
   virtual bool can_elide_initializing_store_barrier(oop new_obj);
--- a/src/share/vm/gc_implementation/shared/allocationStats.hpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/shared/allocationStats.hpp	Wed Jan 20 12:54:25 2010 -0800
@@ -31,7 +31,7 @@
   // beginning of this sweep:
   //   Count(end_last_sweep) - Count(start_this_sweep)
   //     + splitBirths(between) - splitDeaths(between)
-  // The above number divided by the time since the start [END???] of the
+  // The above number divided by the time since the end of the
   // previous sweep gives us a time rate of demand for blocks
   // of this size. We compute a padded average of this rate as
   // our current estimate for the time rate of demand for blocks
@@ -41,7 +41,7 @@
   // estimates.
   AdaptivePaddedAverage _demand_rate_estimate;

-  ssize_t     _desired;          // Estimate computed as described above
+  ssize_t     _desired;         // Demand stimate computed as described above
   ssize_t     _coalDesired;     // desired +/- small-percent for tuning coalescing

   ssize_t     _surplus;         // count - (desired +/- small-percent),
@@ -53,9 +53,9 @@
   ssize_t     _coalDeaths;      // loss from coalescing
   ssize_t     _splitBirths;     // additional chunks from splitting
   ssize_t     _splitDeaths;     // loss from splitting
-  size_t     _returnedBytes;    // number of bytes returned to list.
+  size_t      _returnedBytes;   // number of bytes returned to list.
  public:
-  void initialize() {
+  void initialize(bool split_birth = false) {
     AdaptivePaddedAverage* dummy =
       new (&_demand_rate_estimate) AdaptivePaddedAverage(CMS_FLSWeight,
                                                          CMS_FLSPadding);
@@ -67,7 +67,7 @@
     _beforeSweep = 0;
     _coalBirths = 0;
     _coalDeaths = 0;
-    _splitBirths = 0;
+    _splitBirths = split_birth? 1 : 0;
     _splitDeaths = 0;
     _returnedBytes = 0;
   }
@@ -75,10 +75,12 @@
   AllocationStats() {
     initialize();
   }
+
   // The rate estimate is in blocks per second.
   void compute_desired(size_t count,
                        float inter_sweep_current,
-                       float inter_sweep_estimate) {
+                       float inter_sweep_estimate,
+                       float intra_sweep_estimate) {
     // If the latest inter-sweep time is below our granularity
     // of measurement, we may call in here with
     // inter_sweep_current == 0. However, even for suitably small
@@ -88,12 +90,31 @@
     // vulnerable to noisy glitches. In such cases, we
     // ignore the current sample and use currently available
     // historical estimates.
+    // XXX NEEDS TO BE FIXED
+    // assert(prevSweep() + splitBirths() >= splitDeaths() + (ssize_t)count, "Conservation Principle");
+    //     ^^^^^^^^^^^^^^^^^^^^^^^^^^^    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    //     "Total Stock"                  "Not used at this block size"
     if (inter_sweep_current > _threshold) {
-      ssize_t demand = prevSweep() - count + splitBirths() - splitDeaths();
+      ssize_t demand = prevSweep() - (ssize_t)count + splitBirths() - splitDeaths();
+      // XXX NEEDS TO BE FIXED
+      // assert(demand >= 0, "Demand should be non-negative");
+      // Defensive: adjust for imprecision in event counting
+      if (demand < 0) {
+        demand = 0;
+      }
+      float old_rate = _demand_rate_estimate.padded_average();
       float rate = ((float)demand)/inter_sweep_current;
       _demand_rate_estimate.sample(rate);
-      _desired = (ssize_t)(_demand_rate_estimate.padded_average()
-                           *inter_sweep_estimate);
+      float new_rate = _demand_rate_estimate.padded_average();
+      ssize_t old_desired = _desired;
+      _desired = (ssize_t)(new_rate * (inter_sweep_estimate
+                                       + CMSExtrapolateSweep
+                                         ? intra_sweep_estimate
+                                         : 0.0));
+      if (PrintFLSStatistics > 1) {
+        gclog_or_tty->print_cr("demand: %d, old_rate: %f, current_rate: %f, new_rate: %f, old_desired: %d, new_desired: %d",
+                                demand,     old_rate,     rate,             new_rate,     old_desired,     _desired);
+      }
     }
   }
--- a/src/share/vm/gc_implementation/shared/gcUtil.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/shared/gcUtil.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -52,11 +52,35 @@
   _last_sample = new_sample;
 }

+void AdaptiveWeightedAverage::print() const {
+  print_on(tty);
+}
+
+void AdaptiveWeightedAverage::print_on(outputStream* st) const {
+  guarantee(false, "NYI");
+}
+
+void AdaptivePaddedAverage::print() const {
+  print_on(tty);
+}
+
+void AdaptivePaddedAverage::print_on(outputStream* st) const {
+  guarantee(false, "NYI");
+}
+
+void AdaptivePaddedNoZeroDevAverage::print() const {
+  print_on(tty);
+}
+
+void AdaptivePaddedNoZeroDevAverage::print_on(outputStream* st) const {
+  guarantee(false, "NYI");
+}
+
 void AdaptivePaddedAverage::sample(float new_sample) {
-  // Compute our parent classes sample information
+  // Compute new adaptive weighted average based on new sample.
   AdaptiveWeightedAverage::sample(new_sample);

-  // Now compute the deviation and the new padded sample
+  // Now update the deviation and the padded average.
   float new_avg = average();
   float new_dev = compute_adaptive_average(fabsd(new_sample - new_avg),
                                            deviation());
--- a/src/share/vm/gc_implementation/shared/gcUtil.hpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_implementation/shared/gcUtil.hpp	Wed Jan 20 12:54:25 2010 -0800
@@ -54,8 +54,8 @@

  public:
   // Input weight must be between 0 and 100
-  AdaptiveWeightedAverage(unsigned weight) :
-    _average(0.0), _sample_count(0), _weight(weight), _last_sample(0.0) {
+  AdaptiveWeightedAverage(unsigned weight, float avg = 0.0) :
+    _average(avg), _sample_count(0), _weight(weight), _last_sample(0.0) {
   }

   void clear() {
@@ -64,6 +64,13 @@
     _last_sample = 0;
   }

+  // Useful for modifying static structures after startup.
+  void  modify(size_t avg, unsigned wt, bool force = false)  {
+    assert(force, "Are you sure you want to call this?");
+    _average = (float)avg;
+    _weight  = wt;
+  }
+
   // Accessors
   float    average() const       { return _average;       }
   unsigned weight()  const       { return _weight;        }
@@ -83,6 +90,10 @@
     // Convert to float and back to avoid integer overflow.
     return (size_t)exp_avg((float)avg, (float)sample, weight);
   }
+
+  // Printing
+  void print_on(outputStream* st) const;
+  void print() const;
 };


@@ -129,6 +140,10 @@

   // Override
   void  sample(float new_sample);
+
+  // Printing
+  void print_on(outputStream* st) const;
+  void print() const;
 };

 // A weighted average that includes a deviation from the average,
@@ -146,7 +161,12 @@
     AdaptivePaddedAverage(weight, padding)  {}
   // Override
   void  sample(float new_sample);
+
+  // Printing
+  void print_on(outputStream* st) const;
+  void print() const;
 };
+
 // Use a least squares fit to a set of data to generate a linear
 // equation.
 //              y = intercept + slope * x
--- a/src/share/vm/gc_interface/collectedHeap.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_interface/collectedHeap.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -59,8 +59,18 @@
                 PerfDataManager::create_string_variable(SUN_GC, "lastCause",
                              80, GCCause::to_string(_gc_lastcause), CHECK);
   }
+  _defer_initial_card_mark = false; // strengthened by subclass in pre_initialize() below.
 }

+void CollectedHeap::pre_initialize() {
+  // Used for ReduceInitialCardMarks (when COMPILER2 is used);
+  // otherwise remains unused.
+#ifdef COMPLER2
+  _defer_initial_card_mark = ReduceInitialCardMarks && (DeferInitialCardMark || card_mark_must_follow_store());
+#else
+  assert(_defer_initial_card_mark == false, "Who would set it?");
+#endif
+}

 #ifndef PRODUCT
 void CollectedHeap::check_for_bad_heap_word_value(HeapWord* addr, size_t size) {
@@ -140,12 +150,13 @@
 void CollectedHeap::flush_deferred_store_barrier(JavaThread* thread) {
   MemRegion deferred = thread->deferred_card_mark();
   if (!deferred.is_empty()) {
+    assert(_defer_initial_card_mark, "Otherwise should be empty");
     {
       // Verify that the storage points to a parsable object in heap
       DEBUG_ONLY(oop old_obj = oop(deferred.start());)
       assert(is_in(old_obj), "Not in allocated heap");
       assert(!can_elide_initializing_store_barrier(old_obj),
-             "Else should have been filtered in defer_store_barrier()");
+             "Else should have been filtered in new_store_pre_barrier()");
       assert(!is_in_permanent(old_obj), "Sanity: not expected");
       assert(old_obj->is_oop(true), "Not an oop");
       assert(old_obj->is_parsable(), "Will not be concurrently parsable");
@@ -174,9 +185,7 @@
 //     so long as the card-mark is completed before the next
 //     scavenge. For all these cases, we can do a card mark
 //     at the point at which we do a slow path allocation
-//     in the old gen. For uniformity, however, we end
-//     up using the same scheme (see below) for all three
-//     cases (deferring the card-mark appropriately).
+//     in the old gen, i.e. in this call.
 // (b) GenCollectedHeap(ConcurrentMarkSweepGeneration) requires
 //     in addition that the card-mark for an old gen allocated
 //     object strictly follow any associated initializing stores.
@@ -199,12 +208,13 @@
 //     but, like in CMS, because of the presence of concurrent refinement
 //     (much like CMS' precleaning), must strictly follow the oop-store.
 //     Thus, using the same protocol for maintaining the intended
-//     invariants turns out, serendepitously, to be the same for all
-//     three collectors/heap types above.
+//     invariants turns out, serendepitously, to be the same for both
+//     G1 and CMS.
 //
-// For each future collector, this should be reexamined with
-// that specific collector in mind.
-oop CollectedHeap::defer_store_barrier(JavaThread* thread, oop new_obj) {
+// For any future collector, this code should be reexamined with
+// that specific collector in mind, and the documentation above suitably
+// extended and updated.
+oop CollectedHeap::new_store_pre_barrier(JavaThread* thread, oop new_obj) {
   // If a previous card-mark was deferred, flush it now.
   flush_deferred_store_barrier(thread);
   if (can_elide_initializing_store_barrier(new_obj)) {
@@ -212,10 +222,17 @@
     // following the flush above.
     assert(thread->deferred_card_mark().is_empty(), "Error");
   } else {
-    // Remember info for the newly deferred store barrier
-    MemRegion deferred = MemRegion((HeapWord*)new_obj, new_obj->size());
-    assert(!deferred.is_empty(), "Error");
-    thread->set_deferred_card_mark(deferred);
+    MemRegion mr((HeapWord*)new_obj, new_obj->size());
+    assert(!mr.is_empty(), "Error");
+    if (_defer_initial_card_mark) {
+      // Defer the card mark
+      thread->set_deferred_card_mark(mr);
+    } else {
+      // Do the card mark
+      BarrierSet* bs = barrier_set();
+      assert(bs->has_write_region_opt(), "No write_region() on BarrierSet");
+      bs->write_region(mr);
+    }
   }
   return new_obj;
 }
@@ -241,9 +258,9 @@
   assert(Universe::heap()->is_in_reserved(start + words - 1), "not in heap");
 }

-void CollectedHeap::zap_filler_array(HeapWord* start, size_t words)
+void CollectedHeap::zap_filler_array(HeapWord* start, size_t words, bool zap)
 {
-  if (ZapFillerObjects) {
+  if (ZapFillerObjects && zap) {
     Copy::fill_to_words(start + filler_array_hdr_size(),
                         words - filler_array_hdr_size(), 0XDEAFBABE);
   }
@@ -251,7 +268,7 @@
 #endif // ASSERT

 void
-CollectedHeap::fill_with_array(HeapWord* start, size_t words)
+CollectedHeap::fill_with_array(HeapWord* start, size_t words, bool zap)
 {
   assert(words >= filler_array_min_size(), "too small for an array");
   assert(words <= filler_array_max_size(), "too big for a single object");
@@ -262,16 +279,16 @@
   // Set the length first for concurrent GC.
   ((arrayOop)start)->set_length((int)len);
   post_allocation_setup_common(Universe::intArrayKlassObj(), start, words);
-  DEBUG_ONLY(zap_filler_array(start, words);)
+  DEBUG_ONLY(zap_filler_array(start, words, zap);)
 }

 void
-CollectedHeap::fill_with_object_impl(HeapWord* start, size_t words)
+CollectedHeap::fill_with_object_impl(HeapWord* start, size_t words, bool zap)
 {
   assert(words <= filler_array_max_size(), "too big for a single object");

   if (words >= filler_array_min_size()) {
-    fill_with_array(start, words);
+    fill_with_array(start, words, zap);
   } else if (words > 0) {
     assert(words == min_fill_size(), "unaligned size");
     post_allocation_setup_common(SystemDictionary::Object_klass(), start,
@@ -279,14 +296,14 @@
   }
 }

-void CollectedHeap::fill_with_object(HeapWord* start, size_t words)
+void CollectedHeap::fill_with_object(HeapWord* start, size_t words, bool zap)
 {
   DEBUG_ONLY(fill_args_check(start, words);)
   HandleMark hm;  // Free handles before leaving.
-  fill_with_object_impl(start, words);
+  fill_with_object_impl(start, words, zap);
 }

-void CollectedHeap::fill_with_objects(HeapWord* start, size_t words)
+void CollectedHeap::fill_with_objects(HeapWord* start, size_t words, bool zap)
 {
   DEBUG_ONLY(fill_args_check(start, words);)
   HandleMark hm;  // Free handles before leaving.
@@ -299,13 +316,13 @@
   const size_t max = filler_array_max_size();
   while (words > max) {
     const size_t cur = words - max >= min ? max : max - min;
-    fill_with_array(start, cur);
+    fill_with_array(start, cur, zap);
     start += cur;
     words -= cur;
   }
 #endif

-  fill_with_object_impl(start, words);
+  fill_with_object_impl(start, words, zap);
 }

 HeapWord* CollectedHeap::allocate_new_tlab(size_t size) {
@@ -313,22 +330,6 @@
   return NULL;
 }

-void CollectedHeap::fill_all_tlabs(bool retire) {
-  assert(UseTLAB, "should not reach here");
-  // See note in ensure_parsability() below.
-  assert(SafepointSynchronize::is_at_safepoint() ||
-         !is_init_completed(),
-         "should only fill tlabs at safepoint");
-  // The main thread starts allocating via a TLAB even before it
-  // has added itself to the threads list at vm boot-up.
-  assert(Threads::first() != NULL,
-         "Attempt to fill tlabs before main thread has been added"
-         " to threads list is doomed to failure!");
-  for(JavaThread *thread = Threads::first(); thread; thread = thread->next()) {
-     thread->tlab().make_parsable(retire);
-  }
-}
-
 void CollectedHeap::ensure_parsability(bool retire_tlabs) {
   // The second disjunct in the assertion below makes a concession
   // for the start-up verification done while the VM is being
@@ -343,8 +344,24 @@
          "Should only be called at a safepoint or at start-up"
          " otherwise concurrent mutator activity may make heap "
          " unparsable again");
-  if (UseTLAB) {
-    fill_all_tlabs(retire_tlabs);
+  const bool use_tlab = UseTLAB;
+  const bool deferred = _defer_initial_card_mark;
+  // The main thread starts allocating via a TLAB even before it
+  // has added itself to the threads list at vm boot-up.
+  assert(!use_tlab || Threads::first() != NULL,
+         "Attempt to fill tlabs before main thread has been added"
+         " to threads list is doomed to failure!");
+  for (JavaThread *thread = Threads::first(); thread; thread = thread->next()) {
+     if (use_tlab) thread->tlab().make_parsable(retire_tlabs);
+#ifdef COMPILER2
+     // The deferred store barriers must all have been flushed to the
+     // card-table (or other remembered set structure) before GC starts
+     // processing the card-table (or other remembered set).
+     if (deferred) flush_deferred_store_barrier(thread);
+#else
+     assert(!deferred, "Should be false");
+     assert(thread->deferred_card_mark().is_empty(), "Should be empty");
+#endif
   }
 }
--- a/src/share/vm/gc_interface/collectedHeap.hpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/gc_interface/collectedHeap.hpp	Wed Jan 20 12:54:25 2010 -0800
@@ -51,6 +51,9 @@
   // Used for filler objects (static, but initialized in ctor).
   static size_t _filler_array_max_size;

+  // Used in support of ReduceInitialCardMarks; only consulted if COMPILER2 is being used
+  bool _defer_initial_card_mark;
+
  protected:
   MemRegion _reserved;
   BarrierSet* _barrier_set;
@@ -70,13 +73,16 @@
   // Constructor
   CollectedHeap();

+  // Do common initializations that must follow instance construction,
+  // for example, those needing virtual calls.
+  // This code could perhaps be moved into initialize() but would
+  // be slightly more awkward because we want the latter to be a
+  // pure virtual.
+  void pre_initialize();
+
   // Create a new tlab
   virtual HeapWord* allocate_new_tlab(size_t size);

-  // Fix up tlabs to make the heap well-formed again,
-  // optionally retiring the tlabs.
-  virtual void fill_all_tlabs(bool retire);
-
   // Accumulate statistics on all tlabs.
   virtual void accumulate_statistics_all_tlabs();

@@ -127,14 +133,14 @@
   static inline size_t filler_array_max_size();

   DEBUG_ONLY(static void fill_args_check(HeapWord* start, size_t words);)
-  DEBUG_ONLY(static void zap_filler_array(HeapWord* start, size_t words);)
+  DEBUG_ONLY(static void zap_filler_array(HeapWord* start, size_t words, bool zap = true);)

   // Fill with a single array; caller must ensure filler_array_min_size() <=
   // words <= filler_array_max_size().
-  static inline void fill_with_array(HeapWord* start, size_t words);
+  static inline void fill_with_array(HeapWord* start, size_t words, bool zap = true);

   // Fill with a single object (either an int array or a java.lang.Object).
-  static inline void fill_with_object_impl(HeapWord* start, size_t words);
+  static inline void fill_with_object_impl(HeapWord* start, size_t words, bool zap = true);

   // Verification functions
   virtual void check_for_bad_heap_word_value(HeapWord* addr, size_t size)
@@ -338,14 +344,14 @@
     return size_t(align_object_size(oopDesc::header_size()));
   }

-  static void fill_with_objects(HeapWord* start, size_t words);
+  static void fill_with_objects(HeapWord* start, size_t words, bool zap = true);

-  static void fill_with_object(HeapWord* start, size_t words);
-  static void fill_with_object(MemRegion region) {
-    fill_with_object(region.start(), region.word_size());
+  static void fill_with_object(HeapWord* start, size_t words, bool zap = true);
+  static void fill_with_object(MemRegion region, bool zap = true) {
+    fill_with_object(region.start(), region.word_size(), zap);
   }
-  static void fill_with_object(HeapWord* start, HeapWord* end) {
-    fill_with_object(start, pointer_delta(end, start));
+  static void fill_with_object(HeapWord* start, HeapWord* end, bool zap = true) {
+    fill_with_object(start, pointer_delta(end, start), zap);
   }

   // Some heaps may offer a contiguous region for shared non-blocking
@@ -431,14 +437,25 @@
   // promises to call this function on such a slow-path-allocated
   // object before performing initializations that have elided
   // store barriers. Returns new_obj, or maybe a safer copy thereof.
-  virtual oop defer_store_barrier(JavaThread* thread, oop new_obj);
+  virtual oop new_store_pre_barrier(JavaThread* thread, oop new_obj);

   // Answers whether an initializing store to a new object currently
-  // allocated at the given address doesn't need a (deferred) store
+  // allocated at the given address doesn't need a store
   // barrier. Returns "true" if it doesn't need an initializing
   // store barrier; answers "false" if it does.
   virtual bool can_elide_initializing_store_barrier(oop new_obj) = 0;

+  // If a compiler is eliding store barriers for TLAB-allocated objects,
+  // we will be informed of a slow-path allocation by a call
+  // to new_store_pre_barrier() above. Such a call precedes the
+  // initialization of the object itself, and no post-store-barriers will
+  // be issued. Some heap types require that the barrier strictly follows
+  // the initializing stores. (This is currently implemented by deferring the
+  // barrier until the next slow-path allocation or gc-related safepoint.)
+  // This interface answers whether a particular heap type needs the card
+  // mark to be thus strictly sequenced after the stores.
+  virtual bool card_mark_must_follow_store() const = 0;
+
   // If the CollectedHeap was asked to defer a store barrier above,
   // this informs it to flush such a deferred store barrier to the
   // remembered set.
--- a/src/share/vm/includeDB_gc_parallel	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/includeDB_gc_parallel	Wed Jan 20 12:54:25 2010 -0800
@@ -21,6 +21,8 @@
 // have any questions.
 //

+arguments.cpp                           compactibleFreeListSpace.hpp
+
 assembler_<arch>.cpp                    g1SATBCardTableModRefBS.hpp
 assembler_<arch>.cpp                    g1CollectedHeap.inline.hpp
 assembler_<arch>.cpp                    heapRegion.hpp
--- a/src/share/vm/memory/defNewGeneration.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/memory/defNewGeneration.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -609,7 +609,7 @@

     remove_forwarding_pointers();
     if (PrintGCDetails) {
-      gclog_or_tty->print(" (promotion failed)");
+      gclog_or_tty->print(" (promotion failed) ");
     }
     // Add to-space to the list of space to compact
     // when a promotion failure has occurred.  In that
@@ -620,6 +620,9 @@
     from()->set_next_compaction_space(to());
     gch->set_incremental_collection_will_fail();

+    // Inform the next generation that a promotion failure occurred.
+    _next_gen->promotion_failure_occurred();
+
     // Reset the PromotionFailureALot counters.
     NOT_PRODUCT(Universe::heap()->reset_promotion_should_fail();)
   }
@@ -679,6 +682,11 @@

 void DefNewGeneration::handle_promotion_failure(oop old) {
   preserve_mark_if_necessary(old, old->mark());
+  if (!_promotion_failed && PrintPromotionFailure) {
+    gclog_or_tty->print(" (promotion failure size = " SIZE_FORMAT ") ",
+                        old->size());
+  }
+
   // forward to self
   old->forward_to(old);
   _promotion_failed = true;
--- a/src/share/vm/memory/genCollectedHeap.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/memory/genCollectedHeap.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -51,6 +51,8 @@
 }

 jint GenCollectedHeap::initialize() {
+  CollectedHeap::pre_initialize();
+
   int i;
   _n_gens = gen_policy()->number_of_generations();

@@ -129,6 +131,7 @@

   _rem_set = collector_policy()->create_rem_set(_reserved, n_covered_regions);
   set_barrier_set(rem_set()->bs());
+
   _gch = this;

   for (i = 0; i < _n_gens; i++) {
--- a/src/share/vm/memory/genCollectedHeap.hpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/memory/genCollectedHeap.hpp	Wed Jan 20 12:54:25 2010 -0800
@@ -260,6 +260,10 @@
     return true;
   }

+  virtual bool card_mark_must_follow_store() const {
+    return UseConcMarkSweepGC;
+  }
+
   // We don't need barriers for stores to objects in the
   // young gen and, a fortiori, for initializing stores to
   // objects therein. This applies to {DefNew,ParNew}+{Tenured,CMS}
--- a/src/share/vm/memory/generation.hpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/memory/generation.hpp	Wed Jan 20 12:54:25 2010 -0800
@@ -181,6 +181,12 @@
   virtual bool promotion_attempt_is_safe(size_t promotion_in_bytes,
     bool younger_handles_promotion_failure) const;

+  // For a non-young generation, this interface can be used to inform a
+  // generation that a promotion attempt into that generation failed.
+  // Typically used to enable diagnostic output for post-mortem analysis,
+  // but other uses of the interface are not ruled out.
+  virtual void promotion_failure_occurred() { /* does nothing */ }
+
   // Return an estimate of the maximum allocation that could be performed
   // in the generation without triggering any collection or expansion
   // activity.  It is "unsafe" because no locks are taken; the result
--- a/src/share/vm/memory/threadLocalAllocBuffer.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/memory/threadLocalAllocBuffer.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -100,7 +100,7 @@
 void ThreadLocalAllocBuffer::make_parsable(bool retire) {
   if (end() != NULL) {
     invariants();
-    CollectedHeap::fill_with_object(top(), hard_end());
+    CollectedHeap::fill_with_object(top(), hard_end(), retire);

     if (retire || ZeroTLAB) {  // "Reset" the TLAB
       set_start(NULL);
--- a/src/share/vm/memory/threadLocalAllocBuffer.inline.hpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/memory/threadLocalAllocBuffer.inline.hpp	Wed Jan 20 12:54:25 2010 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright 1999-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1999-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,8 +27,13 @@
   HeapWord* obj = top();
   if (pointer_delta(end(), obj) >= size) {
     // successful thread-local allocation
-
-    DEBUG_ONLY(Copy::fill_to_words(obj, size, badHeapWordVal));
+#ifdef ASSERT
+    // Skip mangling the space corresponding to the object header to
+    // ensure that the returned space is not considered parsable by
+    // any concurrent GC thread.
+    size_t hdr_size = CollectedHeap::min_fill_size();
+    Copy::fill_to_words(obj + hdr_size, size - hdr_size, badHeapWordVal);
+#endif // ASSERT
     // This addition is safe because we know that top is
     // at least size below end, so the add can't wrap.
     set_top(obj + size);
--- a/src/share/vm/opto/graphKit.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/opto/graphKit.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -3259,9 +3259,10 @@
   if (use_ReduceInitialCardMarks()
       && obj == just_allocated_object(control())) {
     // We can skip marks on a freshly-allocated object in Eden.
-    // Keep this code in sync with maybe_defer_card_mark() in runtime.cpp.
-    // That routine informs GC to take appropriate compensating steps
-    // so as to make this card-mark elision safe.
+    // Keep this code in sync with new_store_pre_barrier() in runtime.cpp.
+    // That routine informs GC to take appropriate compensating steps,
+    // upon a slow-path allocation, so as to make this card-mark
+    // elision safe.
     return;
   }
--- a/src/share/vm/opto/runtime.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/opto/runtime.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -143,7 +143,7 @@
 // We failed the fast-path allocation.  Now we need to do a scavenge or GC
 // and try allocation again.

-void OptoRuntime::maybe_defer_card_mark(JavaThread* thread) {
+void OptoRuntime::new_store_pre_barrier(JavaThread* thread) {
   // After any safepoint, just before going back to compiled code,
   // we inform the GC that we will be doing initializing writes to
   // this object in the future without emitting card-marks, so
@@ -156,7 +156,7 @@
   assert(Universe::heap()->can_elide_tlab_store_barriers(),
          "compiler must check this first");
   // GC may decide to give back a safer copy of new_obj.
-  new_obj = Universe::heap()->defer_store_barrier(thread, new_obj);
+  new_obj = Universe::heap()->new_store_pre_barrier(thread, new_obj);
   thread->set_vm_result(new_obj);
 }

@@ -200,7 +200,7 @@

   if (GraphKit::use_ReduceInitialCardMarks()) {
     // inform GC that we won't do card marks for initializing writes.
-    maybe_defer_card_mark(thread);
+    new_store_pre_barrier(thread);
   }
 JRT_END

@@ -239,7 +239,7 @@

   if (GraphKit::use_ReduceInitialCardMarks()) {
     // inform GC that we won't do card marks for initializing writes.
-    maybe_defer_card_mark(thread);
+    new_store_pre_barrier(thread);
   }
 JRT_END
--- a/src/share/vm/opto/runtime.hpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/opto/runtime.hpp	Wed Jan 20 12:54:25 2010 -0800
@@ -133,8 +133,9 @@
   // Allocate storage for a objArray or typeArray
   static void new_array_C(klassOopDesc* array_klass, int len, JavaThread *thread);

-  // Post-slow-path-allocation step for implementing ReduceInitialCardMarks:
-  static void maybe_defer_card_mark(JavaThread* thread);
+  // Post-slow-path-allocation, pre-initializing-stores step for
+  // implementing ReduceInitialCardMarks
+  static void new_store_pre_barrier(JavaThread* thread);

   // Allocate storage for a multi-dimensional arrays
   // Note: needs to be fixed for arbitrary number of dimensions
--- a/src/share/vm/prims/jvmtiEnv.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/prims/jvmtiEnv.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -592,7 +592,6 @@
     break;
   case JVMTI_VERBOSE_GC:
     PrintGC = value != 0;
-    TraceClassUnloading = value != 0;
     break;
   case JVMTI_VERBOSE_JNI:
     PrintJNIResolving = value != 0;
--- a/src/share/vm/runtime/arguments.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/runtime/arguments.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -948,6 +948,7 @@
   }
 }

+#ifndef KERNEL
 // If the user has chosen ParallelGCThreads > 0, we set UseParNewGC
 // if it's not explictly set or unset. If the user has chosen
 // UseParNewGC and not explicitly set ParallelGCThreads we
@@ -1177,8 +1178,7 @@
       // the value (either from the command line or ergonomics) of
       // OldPLABSize.  Following OldPLABSize is an ergonomics decision.
       FLAG_SET_ERGO(uintx, CMSParPromoteBlocksToClaim, OldPLABSize);
-    }
-    else {
+    } else {
       // OldPLABSize and CMSParPromoteBlocksToClaim are both set.
       // CMSParPromoteBlocksToClaim is a collector-specific flag, so
       // we'll let it to take precedence.
@@ -1188,7 +1188,23 @@
                   " CMSParPromoteBlocksToClaim will take precedence.\n");
     }
   }
+  if (!FLAG_IS_DEFAULT(ResizeOldPLAB) && !ResizeOldPLAB) {
+    // OldPLAB sizing manually turned off: Use a larger default setting,
+    // unless it was manually specified. This is because a too-low value
+    // will slow down scavenges.
+    if (FLAG_IS_DEFAULT(CMSParPromoteBlocksToClaim)) {
+      FLAG_SET_ERGO(uintx, CMSParPromoteBlocksToClaim, 50); // default value before 6631166
+    }
+  }
+  // Overwrite OldPLABSize which is the variable we will internally use everywhere.
+  FLAG_SET_ERGO(uintx, OldPLABSize, CMSParPromoteBlocksToClaim);
+  // If either of the static initialization defaults have changed, note this
+  // modification.
+  if (!FLAG_IS_DEFAULT(CMSParPromoteBlocksToClaim) || !FLAG_IS_DEFAULT(OldPLABWeight)) {
+    CFLS_LAB::modify_initialization(OldPLABSize, OldPLABWeight);
+  }
 }
+#endif // KERNEL

 inline uintx max_heap_for_compressed_oops() {
   LP64_ONLY(return oopDesc::OopEncodingHeapMax - MaxPermSize - os::vm_page_size());
@@ -1850,7 +1866,6 @@
         FLAG_SET_CMDLINE(bool, TraceClassUnloading, true);
       } else if (!strcmp(tail, ":gc")) {
         FLAG_SET_CMDLINE(bool, PrintGC, true);
-        FLAG_SET_CMDLINE(bool, TraceClassUnloading, true);
       } else if (!strcmp(tail, ":jni")) {
         FLAG_SET_CMDLINE(bool, PrintJNIResolving, true);
       }
@@ -2370,22 +2385,25 @@
                   "ExtendedDTraceProbes flag is only applicable on Solaris\n");
       return JNI_EINVAL;
 #endif // ndef SOLARIS
-    } else
 #ifdef ASSERT
-    if (match_option(option, "-XX:+FullGCALot", &tail)) {
+    } else if (match_option(option, "-XX:+FullGCALot", &tail)) {
       FLAG_SET_CMDLINE(bool, FullGCALot, true);
       // disable scavenge before parallel mark-compact
       FLAG_SET_CMDLINE(bool, ScavengeBeforeFullGC, false);
-    } else
 #endif
-    if (match_option(option, "-XX:ParCMSPromoteBlocksToClaim=", &tail)) {
+    } else if (match_option(option, "-XX:CMSParPromoteBlocksToClaim=", &tail)) {
       julong cms_blocks_to_claim = (julong)atol(tail);
       FLAG_SET_CMDLINE(uintx, CMSParPromoteBlocksToClaim, cms_blocks_to_claim);
       jio_fprintf(defaultStream::error_stream(),
-        "Please use -XX:CMSParPromoteBlocksToClaim in place of "
+        "Please use -XX:OldPLABSize in place of "
+        "-XX:CMSParPromoteBlocksToClaim in the future\n");
+    } else if (match_option(option, "-XX:ParCMSPromoteBlocksToClaim=", &tail)) {
+      julong cms_blocks_to_claim = (julong)atol(tail);
+      FLAG_SET_CMDLINE(uintx, CMSParPromoteBlocksToClaim, cms_blocks_to_claim);
+      jio_fprintf(defaultStream::error_stream(),
+        "Please use -XX:OldPLABSize in place of "
         "-XX:ParCMSPromoteBlocksToClaim in the future\n");
-    } else
-    if (match_option(option, "-XX:ParallelGCOldGenAllocBufferSize=", &tail)) {
+    } else if (match_option(option, "-XX:ParallelGCOldGenAllocBufferSize=", &tail)) {
       julong old_plab_size = 0;
       ArgsRange errcode = parse_memory_size(tail, &old_plab_size, 1);
       if (errcode != arg_in_range) {
@@ -2398,8 +2416,7 @@
       jio_fprintf(defaultStream::error_stream(),
                   "Please use -XX:OldPLABSize in place of "
                   "-XX:ParallelGCOldGenAllocBufferSize in the future\n");
-    } else
-    if (match_option(option, "-XX:ParallelGCToSpaceAllocBufferSize=", &tail)) {
+    } else if (match_option(option, "-XX:ParallelGCToSpaceAllocBufferSize=", &tail)) {
       julong young_plab_size = 0;
       ArgsRange errcode = parse_memory_size(tail, &young_plab_size, 1);
       if (errcode != arg_in_range) {
@@ -2412,8 +2429,7 @@
       jio_fprintf(defaultStream::error_stream(),
                   "Please use -XX:YoungPLABSize in place of "
                   "-XX:ParallelGCToSpaceAllocBufferSize in the future\n");
-    } else
-    if (match_option(option, "-XX:", &tail)) { // -XX:xxxx
+    } else if (match_option(option, "-XX:", &tail)) { // -XX:xxxx
       // Skip -XX:Flags= since that case has already been handled
       if (strncmp(tail, "Flags=", strlen("Flags=")) != 0) {
         if (!process_argument(tail, args->ignoreUnrecognized, origin)) {
@@ -2716,9 +2732,6 @@
   if (PrintGCDetails) {
     // Turn on -verbose:gc options as well
     PrintGC = true;
-    if (FLAG_IS_DEFAULT(TraceClassUnloading)) {
-      TraceClassUnloading = true;
-    }
   }

 #if defined(_LP64) && defined(COMPILER1)
@@ -2749,6 +2762,7 @@
     return JNI_EINVAL;
   }

+#ifndef KERNEL
   if (UseConcMarkSweepGC) {
     // Set flags for CMS and ParNew.  Check UseConcMarkSweep first
     // to ensure that when both UseConcMarkSweepGC and UseParNewGC
@@ -2766,6 +2780,7 @@
       set_g1_gc_flags();
     }
   }
+#endif // KERNEL

 #ifdef SERIALGC
   assert(verify_serial_gc_flags(), "SerialGC unset");
--- a/src/share/vm/runtime/globals.hpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/runtime/globals.hpp	Wed Jan 20 12:54:25 2010 -0800
@@ -1352,10 +1352,46 @@
   product(uintx, ParGCDesiredObjsFromOverflowList, 20,                      \
           "The desired number of objects to claim from the overflow list")  \
                                                                             \
-  product(uintx, CMSParPromoteBlocksToClaim, 50,                            \
+  product(uintx, CMSParPromoteBlocksToClaim, 16,                             \
           "Number of blocks to attempt to claim when refilling CMS LAB for "\
           "parallel GC.")                                                   \
                                                                             \
+  product(uintx, OldPLABWeight, 50,                                         \
+          "Percentage (0-100) used to weight the current sample when"       \
+          "computing exponentially decaying average for resizing CMSParPromoteBlocksToClaim.") \
+                                                                            \
+  product(bool, ResizeOldPLAB, true,                                        \
+          "Dynamically resize (old gen) promotion labs")                    \
+                                                                            \
+  product(bool, PrintOldPLAB, false,                                        \
+          "Print (old gen) promotion labs sizing decisions")                \
+                                                                            \
+  product(uintx, CMSOldPLABMin, 16,                                         \
+          "Min size of CMS gen promotion lab caches per worker per blksize")\
+                                                                            \
+  product(uintx, CMSOldPLABMax, 1024,                                       \
+          "Max size of CMS gen promotion lab caches per worker per blksize")\
+                                                                            \
+  product(uintx, CMSOldPLABNumRefills, 4,                                   \
+          "Nominal number of refills of CMS gen promotion lab cache"        \
+          " per worker per block size")                                     \
+                                                                            \
+  product(bool, CMSOldPLABResizeQuicker, false,                             \
+          "Whether to react on-the-fly during a scavenge to a sudden"       \
+          " change in block demand rate")                                   \
+                                                                            \
+  product(uintx, CMSOldPLABToleranceFactor, 4,                              \
+          "The tolerance of the phase-change detector for on-the-fly"       \
+          " PLAB resizing during a scavenge")                               \
+                                                                            \
+  product(uintx, CMSOldPLABReactivityFactor, 2,                             \
+          "The gain in the feedback loop for on-the-fly PLAB resizing"      \
+          " during a scavenge")                                             \
+                                                                            \
+  product(uintx, CMSOldPLABReactivityCeiling, 10,                           \
+          "The clamping of the gain in the feedback loop for on-the-fly"    \
+          " PLAB resizing during a scavenge")                               \
+                                                                            \
   product(bool, AlwaysPreTouch, false,                                      \
           "It forces all freshly committed pages to be pre-touched.")       \
                                                                             \
@@ -1397,27 +1433,54 @@
           "Percentage (0-100) by which the CMS incremental mode duty cycle" \
           " is shifted to the right within the period between young GCs")   \
                                                                             \
-  product(uintx, CMSExpAvgFactor, 25,                                       \
-          "Percentage (0-100) used to weight the current sample when "      \
-          "computing exponential averages for CMS statistics")              \
-                                                                            \
-  product(uintx, CMS_FLSWeight, 50,                                         \
-          "Percentage (0-100) used to weight the current sample when "      \
-          "computing exponentially decating averages for CMS FLS statistics") \
-                                                                            \
-  product(uintx, CMS_FLSPadding, 2,                                         \
-          "The multiple of deviation from mean to use for buffering "       \
+  product(uintx, CMSExpAvgFactor, 50,                                       \
+          "Percentage (0-100) used to weight the current sample when"       \
+          "computing exponential averages for CMS statistics.")             \
+                                                                            \
+  product(uintx, CMS_FLSWeight, 75,                                         \
+          "Percentage (0-100) used to weight the current sample when"       \
+          "computing exponentially decating averages for CMS FLS statistics.") \
+                                                                            \
+  product(uintx, CMS_FLSPadding, 1,                                         \
+          "The multiple of deviation from mean to use for buffering"        \
           "against volatility in free list demand.")                        \
                                                                             \
   product(uintx, FLSCoalescePolicy, 2,                                      \
           "CMS: Aggression level for coalescing, increasing from 0 to 4")   \
                                                                             \
-  product(uintx, CMS_SweepWeight, 50,                                       \
+  product(bool, FLSAlwaysCoalesceLarge, false,                              \
+          "CMS: Larger free blocks are always available for coalescing")    \
+                                                                            \
+  product(double, FLSLargestBlockCoalesceProximity, 0.99,                   \
+          "CMS: the smaller the percentage the greater the coalition force")\
+                                                                            \
+  product(double, CMSSmallCoalSurplusPercent, 1.05,                         \
+          "CMS: the factor by which to inflate estimated demand of small"   \
+          " block sizes to prevent coalescing with an adjoining block")     \
+                                                                            \
+  product(double, CMSLargeCoalSurplusPercent, 0.95,                         \
+          "CMS: the factor by which to inflate estimated demand of large"   \
+          " block sizes to prevent coalescing with an adjoining block")     \
+                                                                            \
+  product(double, CMSSmallSplitSurplusPercent, 1.10,                        \
+          "CMS: the factor by which to inflate estimated demand of small"   \
+          " block sizes to prevent splitting to supply demand for smaller"  \
+          " blocks")                                                        \
+                                                                            \
+  product(double, CMSLargeSplitSurplusPercent, 1.00,                        \
+          "CMS: the factor by which to inflate estimated demand of large"   \
+          " block sizes to prevent splitting to supply demand for smaller"  \
+          " blocks")                                                        \
+                                                                            \
+  product(bool, CMSExtrapolateSweep, false,                                 \
+          "CMS: cushion for block demand during sweep")                     \
+                                                                            \
+  product(uintx, CMS_SweepWeight, 75,                                       \
           "Percentage (0-100) used to weight the current sample when "      \
           "computing exponentially decaying average for inter-sweep "       \
           "duration")                                                       \
                                                                             \
-  product(uintx, CMS_SweepPadding, 2,                                       \
+  product(uintx, CMS_SweepPadding, 1,                                       \
           "The multiple of deviation from mean to use for buffering "       \
           "against volatility in inter-sweep duration.")                    \
                                                                             \
@@ -1456,6 +1519,13 @@
   product(uintx, CMSIndexedFreeListReplenish, 4,                            \
           "Replenish and indexed free list with this number of chunks")     \
                                                                             \
+  product(bool, CMSReplenishIntermediate, true,                             \
+          "Replenish all intermediate free-list caches")                    \
+                                                                            \
+  product(bool, CMSSplitIndexedFreeListBlocks, true,                        \
+          "When satisfying batched demand, splot blocks from the "          \
+          "IndexedFreeList whose size is a multiple of requested size")     \
+                                                                            \
   product(bool, CMSLoopWarn, false,                                         \
           "Warn in case of excessive CMS looping")                          \
                                                                             \
@@ -1590,6 +1660,18 @@
           "Bitmap operations should process at most this many bits"         \
           "between yields")                                                 \
                                                                             \
+  product(bool, CMSDumpAtPromotionFailure, false,                           \
+          "Dump useful information about the state of the CMS old "         \
+          " generation upon a promotion failure.")                          \
+                                                                            \
+  product(bool, CMSPrintChunksInDump, false,                                \
+          "In a dump enabled by CMSDumpAtPromotionFailure, include "        \
+          " more detailed information about the free chunks.")              \
+                                                                            \
+  product(bool, CMSPrintObjectsInDump, false,                               \
+          "In a dump enabled by CMSDumpAtPromotionFailure, include "        \
+          " more detailed information about the allocated objects.")        \
+                                                                            \
   diagnostic(bool, FLSVerifyAllHeapReferences, false,                       \
           "Verify that all refs across the FLS boundary "                   \
           " are to valid objects")                                          \
@@ -1674,6 +1756,10 @@
           "The youngest generation collection does not require "            \
           "a guarantee of full promotion of all live objects.")             \
                                                                             \
+  product(bool, PrintPromotionFailure, false,                               \
+          "Print additional diagnostic information following "              \
+          " promotion failure")                                             \
+                                                                            \
   notproduct(bool, PromotionFailureALot, false,                             \
           "Use promotion failure handling on every youngest generation "    \
           "collection")                                                     \
@@ -1926,6 +2012,10 @@
   diagnostic(bool, GCParallelVerificationEnabled, true,                     \
           "Enable parallel memory system verification")                     \
                                                                             \
+  diagnostic(bool, DeferInitialCardMark, false,                             \
+          "When +ReduceInitialCardMarks, explicitly defer any that "        \
+           "may arise from new_pre_store_barrier")                          \
+                                                                            \
   diagnostic(bool, VerifyRememberedSets, false,                             \
           "Verify GC remembered sets")                                      \
                                                                             \
--- a/src/share/vm/runtime/thread.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/runtime/thread.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -2357,9 +2357,8 @@
 };

 void JavaThread::oops_do(OopClosure* f, CodeBlobClosure* cf) {
-  // Flush deferred store-barriers, if any, associated with
-  // initializing stores done by this JavaThread in the current epoch.
-  Universe::heap()->flush_deferred_store_barrier(this);
+  // Verify that the deferred card marks have been flushed.
+  assert(deferred_card_mark().is_empty(), "Should be empty during GC");

   // The ThreadProfiler oops_do is done from FlatProfiler::oops_do
   // since there may be more than one thread using each ThreadProfiler.
--- a/src/share/vm/runtime/vmStructs.cpp	Tue Jan 19 15:54:42 2010 -0800
+++ b/src/share/vm/runtime/vmStructs.cpp	Wed Jan 20 12:54:25 2010 -0800
@@ -309,6 +309,7 @@
   nonstatic_field(CollectedHeap,               _reserved,                                     MemRegion)                             \
   nonstatic_field(SharedHeap,                  _perm_gen,                                     PermGen*)                              \
   nonstatic_field(CollectedHeap,               _barrier_set,                                  BarrierSet*)                           \
+  nonstatic_field(CollectedHeap,               _defer_initial_card_mark,                      bool)                                  \
   nonstatic_field(CollectedHeap,               _is_gc_active,                                 bool)                                  \
   nonstatic_field(CompactibleSpace,            _compaction_top,                               HeapWord*)                             \
   nonstatic_field(CompactibleSpace,            _first_dead,                                   HeapWord*)                             \