# HG changeset patch
# User trims
# Date 1263939798 28800
# Node ID 5b00c9feb9eab81999dc1c97a1e2a2a88316afba
# Parent  3003ddd1d4330b06cb4691ae74d600d3685899eb# Parent  c4d722788ed65bd3199b6b3bf50b2a596cb8fd6a
Merge

diff -r 3003ddd1d433 -r 5b00c9feb9ea src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Jan 15 14:28:16 2010 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Jan 19 14:23:18 2010 -0800
@@ -1441,6 +1441,7 @@
 }
 
 jint G1CollectedHeap::initialize() {
+  CollectedHeap::pre_initialize();
   os::enable_vtime();
 
   // Necessary to satisfy locking discipline assertions.
diff -r 3003ddd1d433 -r 5b00c9feb9ea src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Jan 15 14:28:16 2010 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Tue Jan 19 14:23:18 2010 -0800
@@ -1007,6 +1007,10 @@
     return true;
   }
 
+  virtual bool card_mark_must_follow_store() const {
+    return true;
+  }
+
   bool is_in_young(oop obj) {
     HeapRegion* hr = heap_region_containing(obj);
     return hr != NULL && hr->is_young();
diff -r 3003ddd1d433 -r 5b00c9feb9ea src/share/vm/gc_implementation/g1/ptrQueue.cpp
--- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Fri Jan 15 14:28:16 2010 -0800
+++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp	Tue Jan 19 14:23:18 2010 -0800
@@ -73,7 +73,12 @@
 
 void PtrQueue::locking_enqueue_completed_buffer(void** buf) {
   assert(_lock->owned_by_self(), "Required.");
+
+  // We have to unlock _lock (which may be Shared_DirtyCardQ_lock) before
+  // we acquire DirtyCardQ_CBL_mon inside enqeue_complete_buffer as they
+  // have the same rank and we may get the "possible deadlock" message
   _lock->unlock();
+
   qset()->enqueue_complete_buffer(buf);
   // We must relock only because the caller will unlock, for the normal
   // case.
@@ -140,7 +145,36 @@
   // holding the lock if there is one).
   if (_buf != NULL) {
     if (_lock) {
-      locking_enqueue_completed_buffer(_buf);
+      assert(_lock->owned_by_self(), "Required.");
+
+      // The current PtrQ may be the shared dirty card queue and
+      // may be being manipulated by more than one worker thread
+      // during a pause. Since the enqueuing of the completed
+      // buffer unlocks the Shared_DirtyCardQ_lock more than one
+      // worker thread can 'race' on reading the shared queue attributes
+      // (_buf and _index) and multiple threads can call into this
+      // routine for the same buffer. This will cause the completed
+      // buffer to be added to the CBL multiple times.
+
+      // We "claim" the current buffer by caching value of _buf in
+      // a local and clearing the field while holding _lock. When
+      // _lock is released (while enqueueing the completed buffer)
+      // the thread that acquires _lock will skip this code,
+      // preventing the subsequent the multiple enqueue, and
+      // install a newly allocated buffer below.
+
+      void** buf = _buf;   // local pointer to completed buffer
+      _buf = NULL;         // clear shared _buf field
+
+      locking_enqueue_completed_buffer(buf);  // enqueue completed buffer
+
+      // While the current thread was enqueuing the buffer another thread
+      // may have a allocated a new buffer and inserted it into this pointer
+      // queue. If that happens then we just return so that the current
+      // thread doesn't overwrite the buffer allocated by the other thread
+      // and potentially losing some dirtied cards.
+
+      if (_buf != NULL) return;
     } else {
       if (qset()->process_or_enqueue_complete_buffer(_buf)) {
         // Recycle the buffer. No allocation.
diff -r 3003ddd1d433 -r 5b00c9feb9ea src/share/vm/gc_implementation/g1/ptrQueue.inline.hpp
--- a/src/share/vm/gc_implementation/g1/ptrQueue.inline.hpp	Fri Jan 15 14:28:16 2010 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-/*
- * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- */
-
-void PtrQueue::handle_zero_index() {
-  assert(0 == _index, "Precondition.");
-  // This thread records the full buffer and allocates a new one (while
-  // holding the lock if there is one).
-  void** buf = _buf;
-  _buf = qset()->allocate_buffer();
-  _sz = qset()->buffer_size();
-  _index = _sz;
-  assert(0 <= _index && _index <= _sz, "Invariant.");
-  if (buf != NULL) {
-    if (_lock) {
-      locking_enqueue_completed_buffer(buf);
-    } else {
-      qset()->enqueue_complete_buffer(buf);
-    }
-  }
-}
diff -r 3003ddd1d433 -r 5b00c9feb9ea src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Fri Jan 15 14:28:16 2010 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Tue Jan 19 14:23:18 2010 -0800
@@ -51,6 +51,8 @@
 }
 
 jint ParallelScavengeHeap::initialize() {
+  CollectedHeap::pre_initialize();
+
   // Cannot be initialized until after the flags are parsed
   GenerationSizer flag_parser;
 
@@ -717,10 +719,6 @@
   return young_gen()->allocate(size, true);
 }
 
-void ParallelScavengeHeap::fill_all_tlabs(bool retire) {
-  CollectedHeap::fill_all_tlabs(retire);
-}
-
 void ParallelScavengeHeap::accumulate_statistics_all_tlabs() {
   CollectedHeap::accumulate_statistics_all_tlabs();
 }
diff -r 3003ddd1d433 -r 5b00c9feb9ea src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Fri Jan 15 14:28:16 2010 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Tue Jan 19 14:23:18 2010 -0800
@@ -54,7 +54,6 @@
  protected:
   static inline size_t total_invocations();
   HeapWord* allocate_new_tlab(size_t size);
-  void fill_all_tlabs(bool retire);
 
  public:
   ParallelScavengeHeap() : CollectedHeap() {
@@ -191,6 +190,10 @@
     return true;
   }
 
+  virtual bool card_mark_must_follow_store() const {
+    return false;
+  }
+
   // Return true if we don't we need a store barrier for
   // initializing stores to an object at this address.
   virtual bool can_elide_initializing_store_barrier(oop new_obj);
diff -r 3003ddd1d433 -r 5b00c9feb9ea src/share/vm/gc_interface/collectedHeap.cpp
--- a/src/share/vm/gc_interface/collectedHeap.cpp	Fri Jan 15 14:28:16 2010 -0800
+++ b/src/share/vm/gc_interface/collectedHeap.cpp	Tue Jan 19 14:23:18 2010 -0800
@@ -59,8 +59,18 @@
                 PerfDataManager::create_string_variable(SUN_GC, "lastCause",
                              80, GCCause::to_string(_gc_lastcause), CHECK);
   }
+  _defer_initial_card_mark = false; // strengthened by subclass in pre_initialize() below.
 }
 
+void CollectedHeap::pre_initialize() {
+  // Used for ReduceInitialCardMarks (when COMPILER2 is used);
+  // otherwise remains unused.
+#ifdef COMPLER2
+  _defer_initial_card_mark = ReduceInitialCardMarks && (DeferInitialCardMark || card_mark_must_follow_store());
+#else
+  assert(_defer_initial_card_mark == false, "Who would set it?");
+#endif
+}
 
 #ifndef PRODUCT
 void CollectedHeap::check_for_bad_heap_word_value(HeapWord* addr, size_t size) {
@@ -140,12 +150,13 @@
 void CollectedHeap::flush_deferred_store_barrier(JavaThread* thread) {
   MemRegion deferred = thread->deferred_card_mark();
   if (!deferred.is_empty()) {
+    assert(_defer_initial_card_mark, "Otherwise should be empty");
     {
       // Verify that the storage points to a parsable object in heap
       DEBUG_ONLY(oop old_obj = oop(deferred.start());)
       assert(is_in(old_obj), "Not in allocated heap");
       assert(!can_elide_initializing_store_barrier(old_obj),
-             "Else should have been filtered in defer_store_barrier()");
+             "Else should have been filtered in new_store_pre_barrier()");
       assert(!is_in_permanent(old_obj), "Sanity: not expected");
       assert(old_obj->is_oop(true), "Not an oop");
       assert(old_obj->is_parsable(), "Will not be concurrently parsable");
@@ -174,9 +185,7 @@
 //     so long as the card-mark is completed before the next
 //     scavenge. For all these cases, we can do a card mark
 //     at the point at which we do a slow path allocation
-//     in the old gen. For uniformity, however, we end
-//     up using the same scheme (see below) for all three
-//     cases (deferring the card-mark appropriately).
+//     in the old gen, i.e. in this call.
 // (b) GenCollectedHeap(ConcurrentMarkSweepGeneration) requires
 //     in addition that the card-mark for an old gen allocated
 //     object strictly follow any associated initializing stores.
@@ -199,12 +208,13 @@
 //     but, like in CMS, because of the presence of concurrent refinement
 //     (much like CMS' precleaning), must strictly follow the oop-store.
 //     Thus, using the same protocol for maintaining the intended
-//     invariants turns out, serendepitously, to be the same for all
-//     three collectors/heap types above.
+//     invariants turns out, serendepitously, to be the same for both
+//     G1 and CMS.
 //
-// For each future collector, this should be reexamined with
-// that specific collector in mind.
-oop CollectedHeap::defer_store_barrier(JavaThread* thread, oop new_obj) {
+// For any future collector, this code should be reexamined with
+// that specific collector in mind, and the documentation above suitably
+// extended and updated.
+oop CollectedHeap::new_store_pre_barrier(JavaThread* thread, oop new_obj) {
   // If a previous card-mark was deferred, flush it now.
   flush_deferred_store_barrier(thread);
   if (can_elide_initializing_store_barrier(new_obj)) {
@@ -212,10 +222,17 @@
     // following the flush above.
     assert(thread->deferred_card_mark().is_empty(), "Error");
   } else {
-    // Remember info for the newly deferred store barrier
-    MemRegion deferred = MemRegion((HeapWord*)new_obj, new_obj->size());
-    assert(!deferred.is_empty(), "Error");
-    thread->set_deferred_card_mark(deferred);
+    MemRegion mr((HeapWord*)new_obj, new_obj->size());
+    assert(!mr.is_empty(), "Error");
+    if (_defer_initial_card_mark) {
+      // Defer the card mark
+      thread->set_deferred_card_mark(mr);
+    } else {
+      // Do the card mark
+      BarrierSet* bs = barrier_set();
+      assert(bs->has_write_region_opt(), "No write_region() on BarrierSet");
+      bs->write_region(mr);
+    }
   }
   return new_obj;
 }
@@ -241,9 +258,9 @@
   assert(Universe::heap()->is_in_reserved(start + words - 1), "not in heap");
 }
 
-void CollectedHeap::zap_filler_array(HeapWord* start, size_t words)
+void CollectedHeap::zap_filler_array(HeapWord* start, size_t words, bool zap)
 {
-  if (ZapFillerObjects) {
+  if (ZapFillerObjects && zap) {
     Copy::fill_to_words(start + filler_array_hdr_size(),
                         words - filler_array_hdr_size(), 0XDEAFBABE);
   }
@@ -251,7 +268,7 @@
 #endif // ASSERT
 
 void
-CollectedHeap::fill_with_array(HeapWord* start, size_t words)
+CollectedHeap::fill_with_array(HeapWord* start, size_t words, bool zap)
 {
   assert(words >= filler_array_min_size(), "too small for an array");
   assert(words <= filler_array_max_size(), "too big for a single object");
@@ -262,16 +279,16 @@
   // Set the length first for concurrent GC.
   ((arrayOop)start)->set_length((int)len);
   post_allocation_setup_common(Universe::intArrayKlassObj(), start, words);
-  DEBUG_ONLY(zap_filler_array(start, words);)
+  DEBUG_ONLY(zap_filler_array(start, words, zap);)
 }
 
 void
-CollectedHeap::fill_with_object_impl(HeapWord* start, size_t words)
+CollectedHeap::fill_with_object_impl(HeapWord* start, size_t words, bool zap)
 {
   assert(words <= filler_array_max_size(), "too big for a single object");
 
   if (words >= filler_array_min_size()) {
-    fill_with_array(start, words);
+    fill_with_array(start, words, zap);
   } else if (words > 0) {
     assert(words == min_fill_size(), "unaligned size");
     post_allocation_setup_common(SystemDictionary::Object_klass(), start,
@@ -279,14 +296,14 @@
   }
 }
 
-void CollectedHeap::fill_with_object(HeapWord* start, size_t words)
+void CollectedHeap::fill_with_object(HeapWord* start, size_t words, bool zap)
 {
   DEBUG_ONLY(fill_args_check(start, words);)
   HandleMark hm;  // Free handles before leaving.
-  fill_with_object_impl(start, words);
+  fill_with_object_impl(start, words, zap);
 }
 
-void CollectedHeap::fill_with_objects(HeapWord* start, size_t words)
+void CollectedHeap::fill_with_objects(HeapWord* start, size_t words, bool zap)
 {
   DEBUG_ONLY(fill_args_check(start, words);)
   HandleMark hm;  // Free handles before leaving.
@@ -299,13 +316,13 @@
   const size_t max = filler_array_max_size();
   while (words > max) {
     const size_t cur = words - max >= min ? max : max - min;
-    fill_with_array(start, cur);
+    fill_with_array(start, cur, zap);
     start += cur;
     words -= cur;
   }
 #endif
 
-  fill_with_object_impl(start, words);
+  fill_with_object_impl(start, words, zap);
 }
 
 HeapWord* CollectedHeap::allocate_new_tlab(size_t size) {
@@ -313,22 +330,6 @@
   return NULL;
 }
 
-void CollectedHeap::fill_all_tlabs(bool retire) {
-  assert(UseTLAB, "should not reach here");
-  // See note in ensure_parsability() below.
-  assert(SafepointSynchronize::is_at_safepoint() ||
-         !is_init_completed(),
-         "should only fill tlabs at safepoint");
-  // The main thread starts allocating via a TLAB even before it
-  // has added itself to the threads list at vm boot-up.
-  assert(Threads::first() != NULL,
-         "Attempt to fill tlabs before main thread has been added"
-         " to threads list is doomed to failure!");
-  for(JavaThread *thread = Threads::first(); thread; thread = thread->next()) {
-     thread->tlab().make_parsable(retire);
-  }
-}
-
 void CollectedHeap::ensure_parsability(bool retire_tlabs) {
   // The second disjunct in the assertion below makes a concession
   // for the start-up verification done while the VM is being
@@ -343,8 +344,24 @@
          "Should only be called at a safepoint or at start-up"
          " otherwise concurrent mutator activity may make heap "
          " unparsable again");
-  if (UseTLAB) {
-    fill_all_tlabs(retire_tlabs);
+  const bool use_tlab = UseTLAB;
+  const bool deferred = _defer_initial_card_mark;
+  // The main thread starts allocating via a TLAB even before it
+  // has added itself to the threads list at vm boot-up.
+  assert(!use_tlab || Threads::first() != NULL,
+         "Attempt to fill tlabs before main thread has been added"
+         " to threads list is doomed to failure!");
+  for (JavaThread *thread = Threads::first(); thread; thread = thread->next()) {
+     if (use_tlab) thread->tlab().make_parsable(retire_tlabs);
+#ifdef COMPILER2
+     // The deferred store barriers must all have been flushed to the
+     // card-table (or other remembered set structure) before GC starts
+     // processing the card-table (or other remembered set).
+     if (deferred) flush_deferred_store_barrier(thread);
+#else
+     assert(!deferred, "Should be false");
+     assert(thread->deferred_card_mark().is_empty(), "Should be empty");
+#endif
   }
 }
 
diff -r 3003ddd1d433 -r 5b00c9feb9ea src/share/vm/gc_interface/collectedHeap.hpp
--- a/src/share/vm/gc_interface/collectedHeap.hpp	Fri Jan 15 14:28:16 2010 -0800
+++ b/src/share/vm/gc_interface/collectedHeap.hpp	Tue Jan 19 14:23:18 2010 -0800
@@ -51,6 +51,9 @@
   // Used for filler objects (static, but initialized in ctor).
   static size_t _filler_array_max_size;
 
+  // Used in support of ReduceInitialCardMarks; only consulted if COMPILER2 is being used
+  bool _defer_initial_card_mark;
+
  protected:
   MemRegion _reserved;
   BarrierSet* _barrier_set;
@@ -70,13 +73,16 @@
   // Constructor
   CollectedHeap();
 
+  // Do common initializations that must follow instance construction,
+  // for example, those needing virtual calls.
+  // This code could perhaps be moved into initialize() but would
+  // be slightly more awkward because we want the latter to be a
+  // pure virtual.
+  void pre_initialize();
+
   // Create a new tlab
   virtual HeapWord* allocate_new_tlab(size_t size);
 
-  // Fix up tlabs to make the heap well-formed again,
-  // optionally retiring the tlabs.
-  virtual void fill_all_tlabs(bool retire);
-
   // Accumulate statistics on all tlabs.
   virtual void accumulate_statistics_all_tlabs();
 
@@ -127,14 +133,14 @@
   static inline size_t filler_array_max_size();
 
   DEBUG_ONLY(static void fill_args_check(HeapWord* start, size_t words);)
-  DEBUG_ONLY(static void zap_filler_array(HeapWord* start, size_t words);)
+  DEBUG_ONLY(static void zap_filler_array(HeapWord* start, size_t words, bool zap = true);)
 
   // Fill with a single array; caller must ensure filler_array_min_size() <=
   // words <= filler_array_max_size().
-  static inline void fill_with_array(HeapWord* start, size_t words);
+  static inline void fill_with_array(HeapWord* start, size_t words, bool zap = true);
 
   // Fill with a single object (either an int array or a java.lang.Object).
-  static inline void fill_with_object_impl(HeapWord* start, size_t words);
+  static inline void fill_with_object_impl(HeapWord* start, size_t words, bool zap = true);
 
   // Verification functions
   virtual void check_for_bad_heap_word_value(HeapWord* addr, size_t size)
@@ -338,14 +344,14 @@
     return size_t(align_object_size(oopDesc::header_size()));
   }
 
-  static void fill_with_objects(HeapWord* start, size_t words);
+  static void fill_with_objects(HeapWord* start, size_t words, bool zap = true);
 
-  static void fill_with_object(HeapWord* start, size_t words);
-  static void fill_with_object(MemRegion region) {
-    fill_with_object(region.start(), region.word_size());
+  static void fill_with_object(HeapWord* start, size_t words, bool zap = true);
+  static void fill_with_object(MemRegion region, bool zap = true) {
+    fill_with_object(region.start(), region.word_size(), zap);
   }
-  static void fill_with_object(HeapWord* start, HeapWord* end) {
-    fill_with_object(start, pointer_delta(end, start));
+  static void fill_with_object(HeapWord* start, HeapWord* end, bool zap = true) {
+    fill_with_object(start, pointer_delta(end, start), zap);
   }
 
   // Some heaps may offer a contiguous region for shared non-blocking
@@ -431,14 +437,25 @@
   // promises to call this function on such a slow-path-allocated
   // object before performing initializations that have elided
   // store barriers. Returns new_obj, or maybe a safer copy thereof.
-  virtual oop defer_store_barrier(JavaThread* thread, oop new_obj);
+  virtual oop new_store_pre_barrier(JavaThread* thread, oop new_obj);
 
   // Answers whether an initializing store to a new object currently
-  // allocated at the given address doesn't need a (deferred) store
+  // allocated at the given address doesn't need a store
   // barrier. Returns "true" if it doesn't need an initializing
   // store barrier; answers "false" if it does.
   virtual bool can_elide_initializing_store_barrier(oop new_obj) = 0;
 
+  // If a compiler is eliding store barriers for TLAB-allocated objects,
+  // we will be informed of a slow-path allocation by a call
+  // to new_store_pre_barrier() above. Such a call precedes the
+  // initialization of the object itself, and no post-store-barriers will
+  // be issued. Some heap types require that the barrier strictly follows
+  // the initializing stores. (This is currently implemented by deferring the
+  // barrier until the next slow-path allocation or gc-related safepoint.)
+  // This interface answers whether a particular heap type needs the card
+  // mark to be thus strictly sequenced after the stores.
+  virtual bool card_mark_must_follow_store() const = 0;
+
   // If the CollectedHeap was asked to defer a store barrier above,
   // this informs it to flush such a deferred store barrier to the
   // remembered set.
diff -r 3003ddd1d433 -r 5b00c9feb9ea src/share/vm/memory/genCollectedHeap.cpp
--- a/src/share/vm/memory/genCollectedHeap.cpp	Fri Jan 15 14:28:16 2010 -0800
+++ b/src/share/vm/memory/genCollectedHeap.cpp	Tue Jan 19 14:23:18 2010 -0800
@@ -51,6 +51,8 @@
 }
 
 jint GenCollectedHeap::initialize() {
+  CollectedHeap::pre_initialize();
+
   int i;
   _n_gens = gen_policy()->number_of_generations();
 
@@ -129,6 +131,7 @@
 
   _rem_set = collector_policy()->create_rem_set(_reserved, n_covered_regions);
   set_barrier_set(rem_set()->bs());
+
   _gch = this;
 
   for (i = 0; i < _n_gens; i++) {
diff -r 3003ddd1d433 -r 5b00c9feb9ea src/share/vm/memory/genCollectedHeap.hpp
--- a/src/share/vm/memory/genCollectedHeap.hpp	Fri Jan 15 14:28:16 2010 -0800
+++ b/src/share/vm/memory/genCollectedHeap.hpp	Tue Jan 19 14:23:18 2010 -0800
@@ -260,6 +260,10 @@
     return true;
   }
 
+  virtual bool card_mark_must_follow_store() const {
+    return UseConcMarkSweepGC;
+  }
+
   // We don't need barriers for stores to objects in the
   // young gen and, a fortiori, for initializing stores to
   // objects therein. This applies to {DefNew,ParNew}+{Tenured,CMS}
diff -r 3003ddd1d433 -r 5b00c9feb9ea src/share/vm/memory/threadLocalAllocBuffer.cpp
--- a/src/share/vm/memory/threadLocalAllocBuffer.cpp	Fri Jan 15 14:28:16 2010 -0800
+++ b/src/share/vm/memory/threadLocalAllocBuffer.cpp	Tue Jan 19 14:23:18 2010 -0800
@@ -100,7 +100,7 @@
 void ThreadLocalAllocBuffer::make_parsable(bool retire) {
   if (end() != NULL) {
     invariants();
-    CollectedHeap::fill_with_object(top(), hard_end());
+    CollectedHeap::fill_with_object(top(), hard_end(), retire);
 
     if (retire || ZeroTLAB) {  // "Reset" the TLAB
       set_start(NULL);
diff -r 3003ddd1d433 -r 5b00c9feb9ea src/share/vm/memory/threadLocalAllocBuffer.inline.hpp
--- a/src/share/vm/memory/threadLocalAllocBuffer.inline.hpp	Fri Jan 15 14:28:16 2010 -0800
+++ b/src/share/vm/memory/threadLocalAllocBuffer.inline.hpp	Tue Jan 19 14:23:18 2010 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright 1999-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1999-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,8 +27,13 @@
   HeapWord* obj = top();
   if (pointer_delta(end(), obj) >= size) {
     // successful thread-local allocation
-
-    DEBUG_ONLY(Copy::fill_to_words(obj, size, badHeapWordVal));
+#ifdef ASSERT
+    // Skip mangling the space corresponding to the object header to
+    // ensure that the returned space is not considered parsable by
+    // any concurrent GC thread.
+    size_t hdr_size = CollectedHeap::min_fill_size();
+    Copy::fill_to_words(obj + hdr_size, size - hdr_size, badHeapWordVal);
+#endif // ASSERT
     // This addition is safe because we know that top is
     // at least size below end, so the add can't wrap.
     set_top(obj + size);
diff -r 3003ddd1d433 -r 5b00c9feb9ea src/share/vm/opto/graphKit.cpp
--- a/src/share/vm/opto/graphKit.cpp	Fri Jan 15 14:28:16 2010 -0800
+++ b/src/share/vm/opto/graphKit.cpp	Tue Jan 19 14:23:18 2010 -0800
@@ -3259,9 +3259,10 @@
   if (use_ReduceInitialCardMarks()
       && obj == just_allocated_object(control())) {
     // We can skip marks on a freshly-allocated object in Eden.
-    // Keep this code in sync with maybe_defer_card_mark() in runtime.cpp.
-    // That routine informs GC to take appropriate compensating steps
-    // so as to make this card-mark elision safe.
+    // Keep this code in sync with new_store_pre_barrier() in runtime.cpp.
+    // That routine informs GC to take appropriate compensating steps,
+    // upon a slow-path allocation, so as to make this card-mark
+    // elision safe.
     return;
   }
 
diff -r 3003ddd1d433 -r 5b00c9feb9ea src/share/vm/opto/runtime.cpp
--- a/src/share/vm/opto/runtime.cpp	Fri Jan 15 14:28:16 2010 -0800
+++ b/src/share/vm/opto/runtime.cpp	Tue Jan 19 14:23:18 2010 -0800
@@ -143,7 +143,7 @@
 // We failed the fast-path allocation.  Now we need to do a scavenge or GC
 // and try allocation again.
 
-void OptoRuntime::maybe_defer_card_mark(JavaThread* thread) {
+void OptoRuntime::new_store_pre_barrier(JavaThread* thread) {
   // After any safepoint, just before going back to compiled code,
   // we inform the GC that we will be doing initializing writes to
   // this object in the future without emitting card-marks, so
@@ -156,7 +156,7 @@
   assert(Universe::heap()->can_elide_tlab_store_barriers(),
          "compiler must check this first");
   // GC may decide to give back a safer copy of new_obj.
-  new_obj = Universe::heap()->defer_store_barrier(thread, new_obj);
+  new_obj = Universe::heap()->new_store_pre_barrier(thread, new_obj);
   thread->set_vm_result(new_obj);
 }
 
@@ -200,7 +200,7 @@
 
   if (GraphKit::use_ReduceInitialCardMarks()) {
     // inform GC that we won't do card marks for initializing writes.
-    maybe_defer_card_mark(thread);
+    new_store_pre_barrier(thread);
   }
 JRT_END
 
@@ -239,7 +239,7 @@
 
   if (GraphKit::use_ReduceInitialCardMarks()) {
     // inform GC that we won't do card marks for initializing writes.
-    maybe_defer_card_mark(thread);
+    new_store_pre_barrier(thread);
   }
 JRT_END
 
diff -r 3003ddd1d433 -r 5b00c9feb9ea src/share/vm/opto/runtime.hpp
--- a/src/share/vm/opto/runtime.hpp	Fri Jan 15 14:28:16 2010 -0800
+++ b/src/share/vm/opto/runtime.hpp	Tue Jan 19 14:23:18 2010 -0800
@@ -133,8 +133,9 @@
   // Allocate storage for a objArray or typeArray
   static void new_array_C(klassOopDesc* array_klass, int len, JavaThread *thread);
 
-  // Post-slow-path-allocation step for implementing ReduceInitialCardMarks:
-  static void maybe_defer_card_mark(JavaThread* thread);
+  // Post-slow-path-allocation, pre-initializing-stores step for
+  // implementing ReduceInitialCardMarks
+  static void new_store_pre_barrier(JavaThread* thread);
 
   // Allocate storage for a multi-dimensional arrays
   // Note: needs to be fixed for arbitrary number of dimensions
diff -r 3003ddd1d433 -r 5b00c9feb9ea src/share/vm/runtime/globals.hpp
--- a/src/share/vm/runtime/globals.hpp	Fri Jan 15 14:28:16 2010 -0800
+++ b/src/share/vm/runtime/globals.hpp	Tue Jan 19 14:23:18 2010 -0800
@@ -2012,6 +2012,10 @@
   diagnostic(bool, GCParallelVerificationEnabled, true,                     \
           "Enable parallel memory system verification")                     \
                                                                             \
+  diagnostic(bool, DeferInitialCardMark, false,                             \
+          "When +ReduceInitialCardMarks, explicitly defer any that "        \
+           "may arise from new_pre_store_barrier")                          \
+                                                                            \
   diagnostic(bool, VerifyRememberedSets, false,                             \
           "Verify GC remembered sets")                                      \
                                                                             \
diff -r 3003ddd1d433 -r 5b00c9feb9ea src/share/vm/runtime/thread.cpp
--- a/src/share/vm/runtime/thread.cpp	Fri Jan 15 14:28:16 2010 -0800
+++ b/src/share/vm/runtime/thread.cpp	Tue Jan 19 14:23:18 2010 -0800
@@ -2357,9 +2357,8 @@
 };
 
 void JavaThread::oops_do(OopClosure* f, CodeBlobClosure* cf) {
-  // Flush deferred store-barriers, if any, associated with
-  // initializing stores done by this JavaThread in the current epoch.
-  Universe::heap()->flush_deferred_store_barrier(this);
+  // Verify that the deferred card marks have been flushed.
+  assert(deferred_card_mark().is_empty(), "Should be empty during GC");
 
   // The ThreadProfiler oops_do is done from FlatProfiler::oops_do
   // since there may be more than one thread using each ThreadProfiler.
diff -r 3003ddd1d433 -r 5b00c9feb9ea src/share/vm/runtime/vmStructs.cpp
--- a/src/share/vm/runtime/vmStructs.cpp	Fri Jan 15 14:28:16 2010 -0800
+++ b/src/share/vm/runtime/vmStructs.cpp	Tue Jan 19 14:23:18 2010 -0800
@@ -309,6 +309,7 @@
   nonstatic_field(CollectedHeap,               _reserved,                                     MemRegion)                             \
   nonstatic_field(SharedHeap,                  _perm_gen,                                     PermGen*)                              \
   nonstatic_field(CollectedHeap,               _barrier_set,                                  BarrierSet*)                           \
+  nonstatic_field(CollectedHeap,               _defer_initial_card_mark,                      bool)                                  \
   nonstatic_field(CollectedHeap,               _is_gc_active,                                 bool)                                  \
   nonstatic_field(CompactibleSpace,            _compaction_top,                               HeapWord*)                             \
   nonstatic_field(CompactibleSpace,            _first_dead,                                   HeapWord*)                             \