Mercurial > hg > graal-jvmci-8

--- a/.hgtags	Mon Feb 09 13:47:26 2009 -0800
+++ b/.hgtags	Sun Feb 15 20:09:02 2009 -0800
@@ -18,3 +18,5 @@
 f9d938ede1960d18cb7cf23c645b026519c1a678 jdk7-b41
 ad8c8ca4ab0f4c86e74c061958f44a8f4a930f2c jdk7-b42
 fc6a5ae3fef5ebacfa896dbb3ae37715e388e282 jdk7-b43
+809e899c638bd9b21836abf9d09ab2a30ff3900b jdk7-b44
+945bf754069766e76873c53102fae48abf04cf5b jdk7-b45
--- a/src/os/linux/vm/os_linux.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/os/linux/vm/os_linux.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -1432,6 +1432,10 @@
   return buf;
 }

+struct tm* os::localtime_pd(const time_t* clock, struct tm*  res) {
+  return localtime_r(clock, res);
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // runtime exit support
--- a/src/os/solaris/vm/os_solaris.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/os/solaris/vm/os_solaris.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -323,6 +323,10 @@
   return (size_t)(base - bottom);
 }

+struct tm* os::localtime_pd(const time_t* clock, struct tm*  res) {
+  return localtime_r(clock, res);
+}
+
 // interruptible infrastructure

 // setup_interruptible saves the thread state before going into an
--- a/src/os/windows/vm/os_windows.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/os/windows/vm/os_windows.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -327,6 +327,14 @@
   return sz;
 }

+struct tm* os::localtime_pd(const time_t* clock, struct tm* res) {
+  const struct tm* time_struct_ptr = localtime(clock);
+  if (time_struct_ptr != NULL) {
+    *res = *time_struct_ptr;
+    return res;
+  }
+  return NULL;
+}

 LONG WINAPI topLevelExceptionFilter(struct _EXCEPTION_POINTERS* exceptionInfo);
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -8508,7 +8508,7 @@
   size_t i = num;
   oop  cur = _overflow_list;
   const markOop proto = markOopDesc::prototype();
-  NOT_PRODUCT(size_t n = 0;)
+  NOT_PRODUCT(ssize_t n = 0;)
   for (oop next; i > 0 && cur != NULL; cur = next, i--) {
     next = oop(cur->mark());
     cur->set_mark(proto);   // until proven otherwise
@@ -8525,45 +8525,131 @@
   return !stack->isEmpty();
 }

-// Multi-threaded; use CAS to break off a prefix
+#define BUSY  (oop(0x1aff1aff))
+// (MT-safe) Get a prefix of at most "num" from the list.
+// The overflow list is chained through the mark word of
+// each object in the list. We fetch the entire list,
+// break off a prefix of the right size and return the
+// remainder. If other threads try to take objects from
+// the overflow list at that time, they will wait for
+// some time to see if data becomes available. If (and
+// only if) another thread places one or more object(s)
+// on the global list before we have returned the suffix
+// to the global list, we will walk down our local list
+// to find its end and append the global list to
+// our suffix before returning it. This suffix walk can
+// prove to be expensive (quadratic in the amount of traffic)
+// when there are many objects in the overflow list and
+// there is much producer-consumer contention on the list.
+// *NOTE*: The overflow list manipulation code here and
+// in ParNewGeneration:: are very similar in shape,
+// except that in the ParNew case we use the old (from/eden)
+// copy of the object to thread the list via its klass word.
+// Because of the common code, if you make any changes in
+// the code below, please check the ParNew version to see if
+// similar changes might be needed.
+// CR 6797058 has been filed to consolidate the common code.
 bool CMSCollector::par_take_from_overflow_list(size_t num,
                                                OopTaskQueue* work_q) {
-  assert(work_q->size() == 0, "That's the current policy");
+  assert(work_q->size() == 0, "First empty local work queue");
   assert(num < work_q->max_elems(), "Can't bite more than we can chew");
   if (_overflow_list == NULL) {
     return false;
   }
   // Grab the entire list; we'll put back a suffix
-  oop prefix = (oop)Atomic::xchg_ptr(NULL, &_overflow_list);
-  if (prefix == NULL) {  // someone grabbed it before we did ...
-    // ... we could spin for a short while, but for now we don't
-    return false;
-  }
+  oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
+  Thread* tid = Thread::current();
+  size_t CMSOverflowSpinCount = (size_t)ParallelGCThreads;
+  size_t sleep_time_millis = MAX2((size_t)1, num/100);
+  // If the list is busy, we spin for a short while,
+  // sleeping between attempts to get the list.
+  for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) {
+    os::sleep(tid, sleep_time_millis, false);
+    if (_overflow_list == NULL) {
+      // Nothing left to take
+      return false;
+    } else if (_overflow_list != BUSY) {
+      // Try and grab the prefix
+      prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
+    }
+  }
+  // If the list was found to be empty, or we spun long
+  // enough, we give up and return empty-handed. If we leave
+  // the list in the BUSY state below, it must be the case that
+  // some other thread holds the overflow list and will set it
+  // to a non-BUSY state in the future.
+  if (prefix == NULL || prefix == BUSY) {
+     // Nothing to take or waited long enough
+     if (prefix == NULL) {
+       // Write back the NULL in case we overwrote it with BUSY above
+       // and it is still the same value.
+       (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
+     }
+     return false;
+  }
+  assert(prefix != NULL && prefix != BUSY, "Error");
   size_t i = num;
   oop cur = prefix;
+  // Walk down the first "num" objects, unless we reach the end.
   for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--);
-  if (cur->mark() != NULL) {
+  if (cur->mark() == NULL) {
+    // We have "num" or fewer elements in the list, so there
+    // is nothing to return to the global list.
+    // Write back the NULL in lieu of the BUSY we wrote
+    // above, if it is still the same value.
+    if (_overflow_list == BUSY) {
+      (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
+    }
+  } else {
+    // Chop off the suffix and rerturn it to the global list.
+    assert(cur->mark() != BUSY, "Error");
     oop suffix_head = cur->mark(); // suffix will be put back on global list
     cur->set_mark(NULL);           // break off suffix
-    // Find tail of suffix so we can prepend suffix to global list
-    for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark()));
-    oop suffix_tail = cur;
-    assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
-           "Tautology");
+    // It's possible that the list is still in the empty(busy) state
+    // we left it in a short while ago; in that case we may be
+    // able to place back the suffix without incurring the cost
+    // of a walk down the list.
     oop observed_overflow_list = _overflow_list;
-    do {
-      cur = observed_overflow_list;
-      suffix_tail->set_mark(markOop(cur));
+    oop cur_overflow_list = observed_overflow_list;
+    bool attached = false;
+    while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
       observed_overflow_list =
-        (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur);
-    } while (cur != observed_overflow_list);
+        (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
+      if (cur_overflow_list == observed_overflow_list) {
+        attached = true;
+        break;
+      } else cur_overflow_list = observed_overflow_list;
+    }
+    if (!attached) {
+      // Too bad, someone else sneaked in (at least) an element; we'll need
+      // to do a splice. Find tail of suffix so we can prepend suffix to global
+      // list.
+      for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark()));
+      oop suffix_tail = cur;
+      assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
+             "Tautology");
+      observed_overflow_list = _overflow_list;
+      do {
+        cur_overflow_list = observed_overflow_list;
+        if (cur_overflow_list != BUSY) {
+          // Do the splice ...
+          suffix_tail->set_mark(markOop(cur_overflow_list));
+        } else { // cur_overflow_list == BUSY
+          suffix_tail->set_mark(NULL);
+        }
+        // ... and try to place spliced list back on overflow_list ...
+        observed_overflow_list =
+          (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
+      } while (cur_overflow_list != observed_overflow_list);
+      // ... until we have succeeded in doing so.
+    }
   }

   // Push the prefix elements on work_q
   assert(prefix != NULL, "control point invariant");
   const markOop proto = markOopDesc::prototype();
   oop next;
-  NOT_PRODUCT(size_t n = 0;)
+  NOT_PRODUCT(ssize_t n = 0;)
   for (cur = prefix; cur != NULL; cur = next) {
     next = oop(cur->mark());
     cur->set_mark(proto);   // until proven otherwise
@@ -8597,11 +8683,16 @@
   oop cur_overflow_list;
   do {
     cur_overflow_list = observed_overflow_list;
-    p->set_mark(markOop(cur_overflow_list));
+    if (cur_overflow_list != BUSY) {
+      p->set_mark(markOop(cur_overflow_list));
+    } else {
+      p->set_mark(NULL);
+    }
     observed_overflow_list =
       (oop) Atomic::cmpxchg_ptr(p, &_overflow_list, cur_overflow_list);
   } while (cur_overflow_list != observed_overflow_list);
 }
+#undef BUSY

 // Single threaded
 // General Note on GrowableArray: pushes may silently fail
@@ -8610,7 +8701,7 @@
 // a lot of code in the JVM. The prudent thing for GrowableArray
 // to do (for now) is to exit with an error. However, that may
 // be too draconian in some cases because the caller may be
-// able to recover without much harm. For suych cases, we
+// able to recover without much harm. For such cases, we
 // should probably introduce a "soft_push" method which returns
 // an indication of success or failure with the assumption that
 // the caller may be able to recover from a failure; code in
@@ -8618,8 +8709,6 @@
 // failures where possible, thus, incrementally hardening the VM
 // in such low resource situations.
 void CMSCollector::preserve_mark_work(oop p, markOop m) {
-  int PreserveMarkStackSize = 128;
-
   if (_preserved_oop_stack == NULL) {
     assert(_preserved_mark_stack == NULL,
            "bijection with preserved_oop_stack");
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -595,7 +595,7 @@
   size_t        _ser_kac_preclean_ovflw;
   size_t        _ser_kac_ovflw;
   size_t        _par_kac_ovflw;
-  NOT_PRODUCT(size_t _num_par_pushes;)
+  NOT_PRODUCT(ssize_t _num_par_pushes;)

   // ("Weak") Reference processing support
   ReferenceProcessor*            _ref_processor;
--- a/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -24,7 +24,7 @@

 // We need to sort heap regions by collection desirability.

-class CSetChooserCache {
+class CSetChooserCache VALUE_OBJ_CLASS_SPEC {
 private:
   enum {
     CacheLength = 16
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -33,7 +33,7 @@
   PYA_cancel     // It's been completed by somebody else: cancel.
 };

-class ConcurrentG1Refine {
+class ConcurrentG1Refine: public CHeapObj {
   ConcurrentG1RefineThread* _cg1rThread;

   volatile jint _pya;
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -30,7 +30,7 @@
 // A generic CM bit map.  This is essentially a wrapper around the BitMap
 // class, with one bit per (1<<_shifter) HeapWords.

-class CMBitMapRO {
+class CMBitMapRO VALUE_OBJ_CLASS_SPEC {
  protected:
   HeapWord* _bmStartWord;      // base address of range covered by map
   size_t    _bmWordSize;       // map size (in #HeapWords covered)
@@ -139,7 +139,7 @@

 // Represents a marking stack used by the CM collector.
 // Ideally this should be GrowableArray<> just like MSC's marking stack(s).
-class CMMarkStack {
+class CMMarkStack VALUE_OBJ_CLASS_SPEC {
   ConcurrentMark* _cm;
   oop*   _base;      // bottom of stack
   jint   _index;     // one more than last occupied index
@@ -237,7 +237,7 @@
   void oops_do(OopClosure* f);
 };

-class CMRegionStack {
+class CMRegionStack VALUE_OBJ_CLASS_SPEC {
   MemRegion* _base;
   jint _capacity;
   jint _index;
@@ -312,7 +312,7 @@

 class ConcurrentMarkThread;

-class ConcurrentMark {
+class ConcurrentMark: public CHeapObj {
   friend class ConcurrentMarkThread;
   friend class CMTask;
   friend class CMBitMapClosure;
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -141,7 +141,7 @@
     _scan_only_head(NULL), _scan_only_tail(NULL), _curr_scan_only(NULL),
     _length(0), _scan_only_length(0),
     _last_sampled_rs_lengths(0),
-    _survivor_head(NULL), _survivors_tail(NULL), _survivor_length(0)
+    _survivor_head(NULL), _survivor_tail(NULL), _survivor_length(0)
 {
   guarantee( check_list_empty(false), "just making sure..." );
 }
@@ -159,16 +159,15 @@
 }

 void YoungList::add_survivor_region(HeapRegion* hr) {
-  assert(!hr->is_survivor(), "should not already be for survived");
+  assert(hr->is_survivor(), "should be flagged as survivor region");
   assert(hr->get_next_young_region() == NULL, "cause it should!");

   hr->set_next_young_region(_survivor_head);
   if (_survivor_head == NULL) {
-    _survivors_tail = hr;
+    _survivor_tail = hr;
   }
   _survivor_head = hr;

-  hr->set_survivor();
   ++_survivor_length;
 }

@@ -239,7 +238,7 @@

   empty_list(_survivor_head);
   _survivor_head = NULL;
-  _survivors_tail = NULL;
+  _survivor_tail = NULL;
   _survivor_length = 0;

   _last_sampled_rs_lengths = 0;
@@ -391,6 +390,7 @@

   // Add survivor regions to SurvRateGroup.
   _g1h->g1_policy()->note_start_adding_survivor_regions();
+  _g1h->g1_policy()->finished_recalculating_age_indexes(true /* is_survivors */);
   for (HeapRegion* curr = _survivor_head;
        curr != NULL;
        curr = curr->get_next_young_region()) {
@@ -401,7 +401,7 @@
   if (_survivor_head != NULL) {
     _head           = _survivor_head;
     _length         = _survivor_length + _scan_only_length;
-    _survivors_tail->set_next_young_region(_scan_only_head);
+    _survivor_tail->set_next_young_region(_scan_only_head);
   } else {
     _head           = _scan_only_head;
     _length         = _scan_only_length;
@@ -418,9 +418,9 @@
   _curr_scan_only   = NULL;

   _survivor_head    = NULL;
-  _survivors_tail   = NULL;
+  _survivor_tail   = NULL;
   _survivor_length  = 0;
-  _g1h->g1_policy()->finished_recalculating_age_indexes();
+  _g1h->g1_policy()->finished_recalculating_age_indexes(false /* is_survivors */);

   assert(check_list_well_formed(), "young list should be well formed");
 }
@@ -553,7 +553,7 @@
   if (_gc_alloc_region_counts[purpose] < g1_policy()->max_regions(purpose)) {
     alloc_region = newAllocRegion_work(word_size, true, zero_filled);
     if (purpose == GCAllocForSurvived && alloc_region != NULL) {
-      _young_list->add_survivor_region(alloc_region);
+      alloc_region->set_survivor();
     }
     ++_gc_alloc_region_counts[purpose];
   } else {
@@ -949,6 +949,10 @@
     GCOverheadReporter::recordSTWEnd(end);
     g1_policy()->record_full_collection_end();

+#ifdef TRACESPINNING
+    ParallelTaskTerminator::print_termination_counts();
+#endif
+
     gc_epilogue(true);

     // Abandon concurrent refinement.  This must happen last: in the
@@ -2593,6 +2597,9 @@
         _young_list->print();
 #endif // SCAN_ONLY_VERBOSE

+        g1_policy()->record_survivor_regions(_young_list->survivor_length(),
+                                             _young_list->first_survivor_region(),
+                                             _young_list->last_survivor_region());
         _young_list->reset_auxilary_lists();
       }
     } else {
@@ -2619,7 +2626,9 @@
 #endif // SCAN_ONLY_VERBOSE

     double end_time_sec = os::elapsedTime();
-    g1_policy()->record_pause_time((end_time_sec - start_time_sec)*1000.0);
+    if (!evacuation_failed()) {
+      g1_policy()->record_pause_time((end_time_sec - start_time_sec)*1000.0);
+    }
     GCOverheadReporter::recordSTWEnd(end_time_sec);
     g1_policy()->record_collection_pause_end(popular_region != NULL,
                                              abandoned);
@@ -2642,8 +2651,13 @@
       }
     }

-    if (mark_in_progress())
+    if (mark_in_progress()) {
       concurrent_mark()->update_g1_committed();
+    }
+
+#ifdef TRACESPINNING
+    ParallelTaskTerminator::print_termination_counts();
+#endif

     gc_epilogue(false);
   }
@@ -2754,6 +2768,13 @@
     _gc_alloc_region_list = r->next_gc_alloc_region();
     r->set_next_gc_alloc_region(NULL);
     r->set_is_gc_alloc_region(false);
+    if (r->is_survivor()) {
+      if (r->is_empty()) {
+        r->set_not_young();
+      } else {
+        _young_list->add_survivor_region(r);
+      }
+    }
     if (r->is_empty()) {
       ++_free_regions;
     }
@@ -3150,6 +3171,20 @@
   return block;
 }

+void G1CollectedHeap::retire_alloc_region(HeapRegion* alloc_region,
+                                            bool par) {
+  // Another thread might have obtained alloc_region for the given
+  // purpose, and might be attempting to allocate in it, and might
+  // succeed.  Therefore, we can't do the "finalization" stuff on the
+  // region below until we're sure the last allocation has happened.
+  // We ensure this by allocating the remaining space with a garbage
+  // object.
+  if (par) par_allocate_remaining_space(alloc_region);
+  // Now we can do the post-GC stuff on the region.
+  alloc_region->note_end_of_copying();
+  g1_policy()->record_after_bytes(alloc_region->used());
+}
+
 HeapWord*
 G1CollectedHeap::allocate_during_gc_slow(GCAllocPurpose purpose,
                                          HeapRegion*    alloc_region,
@@ -3167,16 +3202,7 @@
     // Otherwise, continue; this new region is empty, too.
   }
   assert(alloc_region != NULL, "We better have an allocation region");
-  // Another thread might have obtained alloc_region for the given
-  // purpose, and might be attempting to allocate in it, and might
-  // succeed.  Therefore, we can't do the "finalization" stuff on the
-  // region below until we're sure the last allocation has happened.
-  // We ensure this by allocating the remaining space with a garbage
-  // object.
-  if (par) par_allocate_remaining_space(alloc_region);
-  // Now we can do the post-GC stuff on the region.
-  alloc_region->note_end_of_copying();
-  g1_policy()->record_after_bytes(alloc_region->used());
+  retire_alloc_region(alloc_region, par);

   if (_gc_alloc_region_counts[purpose] >= g1_policy()->max_regions(purpose)) {
     // Cannot allocate more regions for the given purpose.
@@ -3185,7 +3211,7 @@
     if (purpose != alt_purpose) {
       HeapRegion* alt_region = _gc_alloc_regions[alt_purpose];
       // Has not the alternative region been aliased?
-      if (alloc_region != alt_region) {
+      if (alloc_region != alt_region && alt_region != NULL) {
         // Try to allocate in the alternative region.
         if (par) {
           block = alt_region->par_allocate(word_size);
@@ -3194,9 +3220,10 @@
         }
         // Make an alias.
         _gc_alloc_regions[purpose] = _gc_alloc_regions[alt_purpose];
-      }
-      if (block != NULL) {
-        return block;
+        if (block != NULL) {
+          return block;
+        }
+        retire_alloc_region(alt_region, par);
       }
       // Both the allocation region and the alternative one are full
       // and aliased, replace them with a new allocation region.
@@ -3497,6 +3524,7 @@
   OverflowQueue* _overflowed_refs;

   G1ParGCAllocBuffer _alloc_buffers[GCAllocPurposeCount];
+  ageTable           _age_table;

   size_t           _alloc_buffer_waste;
   size_t           _undo_waste;
@@ -3538,6 +3566,7 @@
       _refs(g1h->task_queue(queue_num)),
       _hash_seed(17), _queue_num(queue_num),
       _term_attempts(0),
+      _age_table(false),
 #if G1_DETAILED_STATS
       _pushes(0), _pops(0), _steals(0),
       _steal_attempts(0),  _overflow_pushes(0),
@@ -3572,8 +3601,9 @@

   RefToScanQueue*   refs()            { return _refs;             }
   OverflowQueue*    overflowed_refs() { return _overflowed_refs;  }
-
-  inline G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) {
+  ageTable*         age_table()       { return &_age_table;       }
+
+  G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) {
     return &_alloc_buffers[purpose];
   }

@@ -3834,7 +3864,9 @@
           (!from_region->is_young() && young_index == 0), "invariant" );
   G1CollectorPolicy* g1p = _g1->g1_policy();
   markOop m = old->mark();
-  GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, m->age(),
+  int age = m->has_displaced_mark_helper() ? m->displaced_mark_helper()->age()
+                                           : m->age();
+  GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, age,
                                                              word_sz);
   HeapWord* obj_ptr = _par_scan_state->allocate(alloc_purpose, word_sz);
   oop       obj     = oop(obj_ptr);
@@ -3872,9 +3904,12 @@
         obj->incr_age();
       } else {
         m = m->incr_age();
+        obj->set_mark(m);
       }
+      _par_scan_state->age_table()->add(obj, word_sz);
+    } else {
+      obj->set_mark(m);
     }
-    obj->set_mark(m);

     // preserve "next" mark bit
     if (_g1->mark_in_progress() && !_g1->is_obj_ill(old)) {
@@ -4129,6 +4164,9 @@
       _g1h->g1_policy()->record_obj_copy_time(i, elapsed_ms-term_ms);
       _g1h->g1_policy()->record_termination_time(i, term_ms);
     }
+    if (G1UseSurvivorSpace) {
+      _g1h->g1_policy()->record_thread_age_table(pss.age_table());
+    }
     _g1h->update_surviving_young_words(pss.surviving_young_words()+1);

     // Clean up any par-expanded rem sets.
@@ -4368,7 +4406,7 @@
   // Is this the right thing to do here?  We don't save marks
   // on individual heap regions when we allocate from
   // them in parallel, so this seems like the correct place for this.
-  all_alloc_regions_note_end_of_copying();
+  retire_all_alloc_regions();
   {
     G1IsAliveClosure is_alive(this);
     G1KeepAliveClosure keep_alive(this);
@@ -5008,7 +5046,7 @@
   return no_allocs;
 }

-void G1CollectedHeap::all_alloc_regions_note_end_of_copying() {
+void G1CollectedHeap::retire_all_alloc_regions() {
   for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
     HeapRegion* r = _gc_alloc_regions[ap];
     if (r != NULL) {
@@ -5021,8 +5059,7 @@
         }
       }
       if (!has_processed_alias) {
-        r->note_end_of_copying();
-        g1_policy()->record_after_bytes(r->used());
+        retire_alloc_region(r, false /* par */);
       }
     }
   }
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -90,7 +90,7 @@
   HeapRegion* _curr_scan_only;

   HeapRegion* _survivor_head;
-  HeapRegion* _survivors_tail;
+  HeapRegion* _survivor_tail;
   size_t      _survivor_length;

   void          empty_list(HeapRegion* list);
@@ -105,6 +105,7 @@
   bool          is_empty() { return _length == 0; }
   size_t        length() { return _length; }
   size_t        scan_only_length() { return _scan_only_length; }
+  size_t        survivor_length() { return _survivor_length; }

   void rs_length_sampling_init();
   bool rs_length_sampling_more();
@@ -120,6 +121,7 @@
   HeapRegion* first_region() { return _head; }
   HeapRegion* first_scan_only_region() { return _scan_only_head; }
   HeapRegion* first_survivor_region() { return _survivor_head; }
+  HeapRegion* last_survivor_region() { return _survivor_tail; }
   HeapRegion* par_get_next_scan_only_region() {
     MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
     HeapRegion* ret = _curr_scan_only;
@@ -219,7 +221,7 @@
   // The to-space memory regions into which objects are being copied during
   // a GC.
   HeapRegion* _gc_alloc_regions[GCAllocPurposeCount];
-  uint _gc_alloc_region_counts[GCAllocPurposeCount];
+  size_t _gc_alloc_region_counts[GCAllocPurposeCount];

   // A list of the regions that have been set to be alloc regions in the
   // current collection.
@@ -281,8 +283,8 @@
   // Returns "true" iff none of the gc alloc regions have any allocations
   // since the last call to "save_marks".
   bool all_alloc_regions_no_allocs_since_save_marks();
-  // Calls "note_end_of_copying on all gc alloc_regions.
-  void all_alloc_regions_note_end_of_copying();
+  // Perform finalization stuff on all allocation regions.
+  void retire_all_alloc_regions();

   // The number of regions allocated to hold humongous objects.
   int         _num_humongous_regions;
@@ -351,6 +353,10 @@
   // that parallel threads might be attempting allocations.
   void par_allocate_remaining_space(HeapRegion* r);

+  // Retires an allocation region when it is full or at the end of a
+  // GC pause.
+  void  retire_alloc_region(HeapRegion* alloc_region, bool par);
+
   // Helper function for two callbacks below.
   // "full", if true, indicates that the GC is for a System.gc() request,
   // and should collect the entire heap.  If "clear_all_soft_refs" is true,
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -196,8 +196,13 @@
   _short_lived_surv_rate_group(new SurvRateGroup(this, "Short Lived",
                                                  G1YoungSurvRateNumRegionsSummary)),
   _survivor_surv_rate_group(new SurvRateGroup(this, "Survivor",
-                                              G1YoungSurvRateNumRegionsSummary))
+                                              G1YoungSurvRateNumRegionsSummary)),
   // add here any more surv rate groups
+  _recorded_survivor_regions(0),
+  _recorded_survivor_head(NULL),
+  _recorded_survivor_tail(NULL),
+  _survivors_age_table(true)
+
 {
   _recent_prev_end_times_for_all_gcs_sec->add(os::elapsedTime());
   _prev_collection_pause_end_ms = os::elapsedTime() * 1000.0;
@@ -272,6 +277,15 @@
   _concurrent_mark_cleanup_times_ms->add(0.20);
   _tenuring_threshold = MaxTenuringThreshold;

+  if (G1UseSurvivorSpace) {
+    // if G1FixedSurvivorSpaceSize is 0 which means the size is not
+    // fixed, then _max_survivor_regions will be calculated at
+    // calculate_young_list_target_config during initialization
+    _max_survivor_regions = G1FixedSurvivorSpaceSize / HeapRegion::GrainBytes;
+  } else {
+    _max_survivor_regions = 0;
+  }
+
   initialize_all();
 }

@@ -283,6 +297,9 @@
 void G1CollectorPolicy::initialize_flags() {
   set_min_alignment(HeapRegion::GrainBytes);
   set_max_alignment(GenRemSet::max_alignment_constraint(rem_set_name()));
+  if (SurvivorRatio < 1) {
+    vm_exit_during_initialization("Invalid survivor ratio specified");
+  }
   CollectorPolicy::initialize_flags();
 }

@@ -301,6 +318,8 @@
                                   "-XX:+UseConcMarkSweepGC.");
   }

+  initialize_gc_policy_counters();
+
   if (G1Gen) {
     _in_young_gc_mode = true;

@@ -322,6 +341,12 @@
   }
 }

+// Create the jstat counters for the policy.
+void G1CollectorPolicy::initialize_gc_policy_counters()
+{
+  _gc_policy_counters = new GCPolicyCounters("GarbageFirst", 1, 2 + G1Gen);
+}
+
 void G1CollectorPolicy::calculate_young_list_min_length() {
   _young_list_min_length = 0;

@@ -352,6 +377,7 @@
     guarantee( so_length < _young_list_target_length, "invariant" );
     _young_list_so_prefix_length = so_length;
   }
+  calculate_survivors_policy();
 }

 // This method calculate the optimal scan-only set for a fixed young
@@ -448,6 +474,9 @@
   if (full_young_gcs() && _free_regions_at_end_of_collection > 0) {
     // we are in fully-young mode and there are free regions in the heap

+    double survivor_regions_evac_time =
+        predict_survivor_regions_evac_time();
+
     size_t min_so_length = 0;
     size_t max_so_length = 0;

@@ -497,9 +526,8 @@
       scanned_cards = predict_non_young_card_num(adj_rs_lengths);
     // calculate this once, so that we don't have to recalculate it in
     // the innermost loop
-    double base_time_ms = predict_base_elapsed_time_ms(pending_cards,
-                                                       scanned_cards);
-
+    double base_time_ms = predict_base_elapsed_time_ms(pending_cards, scanned_cards)
+                          + survivor_regions_evac_time;
     // the result
     size_t final_young_length = 0;
     size_t final_so_length = 0;
@@ -548,14 +576,14 @@
     bool done = false;
     // this is the outermost loop
     while (!done) {
-#if 0
+#ifdef TRACE_CALC_YOUNG_CONFIG
       // leave this in for debugging, just in case
       gclog_or_tty->print_cr("searching between " SIZE_FORMAT " and " SIZE_FORMAT
                              ", incr " SIZE_FORMAT ", pass %s",
                              from_so_length, to_so_length, so_length_incr,
                              (pass == pass_type_coarse) ? "coarse" :
                              (pass == pass_type_fine) ? "fine" : "final");
-#endif // 0
+#endif // TRACE_CALC_YOUNG_CONFIG

       size_t so_length = from_so_length;
       size_t init_free_regions =
@@ -651,11 +679,11 @@
           guarantee( so_length_incr == so_coarse_increments, "invariant" );
           guarantee( final_so_length >= min_so_length, "invariant" );

-#if 0
+#ifdef TRACE_CALC_YOUNG_CONFIG
           // leave this in for debugging, just in case
           gclog_or_tty->print_cr("  coarse pass: SO length " SIZE_FORMAT,
                                  final_so_length);
-#endif // 0
+#endif // TRACE_CALC_YOUNG_CONFIG

           from_so_length =
             (final_so_length - min_so_length > so_coarse_increments) ?
@@ -687,12 +715,12 @@
             // of the optimal
             size_t new_so_length = 950 * final_so_length / 1000;

-#if 0
+#ifdef TRACE_CALC_YOUNG_CONFIG
             // leave this in for debugging, just in case
             gclog_or_tty->print_cr("  fine pass: SO length " SIZE_FORMAT
                                    ", setting it to " SIZE_FORMAT,
                                     final_so_length, new_so_length);
-#endif // 0
+#endif // TRACE_CALC_YOUNG_CONFIG

             from_so_length = new_so_length;
             to_so_length = new_so_length;
@@ -719,7 +747,8 @@
     }

     // we should have at least one region in the target young length
-    _young_list_target_length = MAX2((size_t) 1, final_young_length);
+    _young_list_target_length =
+        MAX2((size_t) 1, final_young_length + _recorded_survivor_regions);
     if (final_so_length >= final_young_length)
       // and we need to ensure that the S-O length is not greater than
       // the target young length (this is being a bit careful)
@@ -734,7 +763,7 @@
     double end_time_sec = os::elapsedTime();
     double elapsed_time_ms = (end_time_sec - start_time_sec) * 1000.0;

-#if 0
+#ifdef TRACE_CALC_YOUNG_CONFIG
     // leave this in for debugging, just in case
     gclog_or_tty->print_cr("target = %1.1lf ms, young = " SIZE_FORMAT
                            ", SO = " SIZE_FORMAT ", "
@@ -747,9 +776,9 @@
                            calculations,
                            full_young_gcs() ? "full" : "partial",
                            should_initiate_conc_mark() ? " i-m" : "",
-                           in_marking_window(),
-                           in_marking_window_im());
-#endif // 0
+                           _in_marking_window,
+                           _in_marking_window_im);
+#endif // TRACE_CALC_YOUNG_CONFIG

     if (_young_list_target_length < _young_list_min_length) {
       // bummer; this means that, if we do a pause when the optimal
@@ -768,14 +797,14 @@
         // S-O length
         so_length = calculate_optimal_so_length(_young_list_min_length);

-#if 0
+#ifdef TRACE_CALC_YOUNG_CONFIG
       // leave this in for debugging, just in case
       gclog_or_tty->print_cr("adjusted target length from "
                              SIZE_FORMAT " to " SIZE_FORMAT
                              ", SO " SIZE_FORMAT,
                              _young_list_target_length, _young_list_min_length,
                              so_length);
-#endif // 0
+#endif // TRACE_CALC_YOUNG_CONFIG

       _young_list_target_length =
         MAX2(_young_list_min_length, (size_t)1);
@@ -785,12 +814,12 @@
     // we are in a partially-young mode or we've run out of regions (due
     // to evacuation failure)

-#if 0
+#ifdef TRACE_CALC_YOUNG_CONFIG
     // leave this in for debugging, just in case
     gclog_or_tty->print_cr("(partial) setting target to " SIZE_FORMAT
                            ", SO " SIZE_FORMAT,
                            _young_list_min_length, 0);
-#endif // 0
+#endif // TRACE_CALC_YOUNG_CONFIG

     // we'll do the pause as soon as possible and with no S-O prefix
     // (see above for the reasons behind the latter)
@@ -884,6 +913,16 @@
   return true;
 }

+double G1CollectorPolicy::predict_survivor_regions_evac_time() {
+  double survivor_regions_evac_time = 0.0;
+  for (HeapRegion * r = _recorded_survivor_head;
+       r != NULL && r != _recorded_survivor_tail->get_next_young_region();
+       r = r->get_next_young_region()) {
+    survivor_regions_evac_time += predict_region_elapsed_time_ms(r, true);
+  }
+  return survivor_regions_evac_time;
+}
+
 void G1CollectorPolicy::check_prediction_validity() {
   guarantee( adaptive_young_list_length(), "should not call this otherwise" );

@@ -995,11 +1034,15 @@
   _short_lived_surv_rate_group->start_adding_regions();
   // also call this on any additional surv rate groups

+  record_survivor_regions(0, NULL, NULL);
+
   _prev_region_num_young   = _region_num_young;
   _prev_region_num_tenured = _region_num_tenured;

   _free_regions_at_end_of_collection = _g1->free_regions();
   _scan_only_regions_at_end_of_collection = 0;
+  // Reset survivors SurvRateGroup.
+  _survivor_surv_rate_group->reset();
   calculate_young_list_min_length();
   calculate_young_list_target_config();
  }
@@ -1104,6 +1147,10 @@
   _short_lived_surv_rate_group->record_scan_only_prefix(short_lived_so_length);
   tag_scan_only(short_lived_so_length);

+  if (G1UseSurvivorSpace) {
+    _survivors_age_table.clear();
+  }
+
   assert( verify_young_ages(), "region age verification" );
 }

@@ -1965,9 +2012,6 @@
   // </NEW PREDICTION>

   _target_pause_time_ms = -1.0;
-
-  // TODO: calculate tenuring threshold
-  _tenuring_threshold = MaxTenuringThreshold;
 }

 // <NEW PREDICTION>
@@ -2058,7 +2102,7 @@
     guarantee( hr->is_young() && hr->age_in_surv_rate_group() != -1,
                "invariant" );
     int age = hr->age_in_surv_rate_group();
-    double yg_surv_rate = predict_yg_surv_rate(age);
+    double yg_surv_rate = predict_yg_surv_rate(age, hr->surv_rate_group());
     bytes_to_copy = (size_t) ((double) hr->used() * yg_surv_rate);
   }

@@ -2091,7 +2135,7 @@
   }
 #if PREDICTIONS_VERBOSE
   if (young) {
-    _recorded_young_bytes += hr->asSpace()->used();
+    _recorded_young_bytes += hr->used();
   } else {
     _recorded_marked_bytes += hr->max_live_bytes();
   }
@@ -2119,11 +2163,6 @@
       predict_non_young_card_num(_predicted_rs_lengths);
   _recorded_region_num = _recorded_young_regions + _recorded_non_young_regions;

-  _predicted_young_survival_ratio = 0.0;
-  for (int i = 0; i < _recorded_young_regions; ++i)
-    _predicted_young_survival_ratio += predict_yg_surv_rate(i);
-  _predicted_young_survival_ratio /= (double) _recorded_young_regions;
-
   _predicted_scan_only_scan_time_ms =
     predict_scan_only_time_ms(_recorded_scan_only_regions);
   _predicted_rs_update_time_ms =
@@ -2673,8 +2712,11 @@
   assert(in_young_gc_mode(), "should be in young GC mode");
   bool ret;
   size_t young_list_length = _g1->young_list_length();
-
-  if (young_list_length < _young_list_target_length) {
+  size_t young_list_max_length = _young_list_target_length;
+  if (G1FixedEdenSize) {
+    young_list_max_length -= _max_survivor_regions;
+  }
+  if (young_list_length < young_list_max_length) {
     ret = true;
     ++_region_num_young;
   } else {
@@ -2710,17 +2752,39 @@
 }


-uint G1CollectorPolicy::max_regions(int purpose) {
+size_t G1CollectorPolicy::max_regions(int purpose) {
   switch (purpose) {
     case GCAllocForSurvived:
-      return G1MaxSurvivorRegions;
+      return _max_survivor_regions;
     case GCAllocForTenured:
-      return UINT_MAX;
+      return REGIONS_UNLIMITED;
     default:
-      return UINT_MAX;
+      ShouldNotReachHere();
+      return REGIONS_UNLIMITED;
   };
 }

+// Calculates survivor space parameters.
+void G1CollectorPolicy::calculate_survivors_policy()
+{
+  if (!G1UseSurvivorSpace) {
+    return;
+  }
+  if (G1FixedSurvivorSpaceSize == 0) {
+    _max_survivor_regions = _young_list_target_length / SurvivorRatio;
+  } else {
+    _max_survivor_regions = G1FixedSurvivorSpaceSize / HeapRegion::GrainBytes;
+  }
+
+  if (G1FixedTenuringThreshold) {
+    _tenuring_threshold = MaxTenuringThreshold;
+  } else {
+    _tenuring_threshold = _survivors_age_table.compute_tenuring_threshold(
+        HeapRegion::GrainWords * _max_survivor_regions);
+  }
+}
+
+
 void
 G1CollectorPolicy_BestRegionsFirst::
 set_single_region_collection_set(HeapRegion* hr) {
@@ -2743,7 +2807,11 @@
   double max_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0;

   size_t young_list_length = _g1->young_list_length();
-  bool reached_target_length = young_list_length >= _young_list_target_length;
+  size_t young_list_max_length = _young_list_target_length;
+  if (G1FixedEdenSize) {
+    young_list_max_length -= _max_survivor_regions;
+  }
+  bool reached_target_length = young_list_length >= young_list_max_length;

   if (in_young_gc_mode()) {
     if (reached_target_length) {
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -49,7 +49,7 @@
 class MainBodySummary;
 class PopPreambleSummary;

-class PauseSummary {
+class PauseSummary: public CHeapObj {
   define_num_seq(total)
     define_num_seq(other)

@@ -58,7 +58,7 @@
   virtual PopPreambleSummary* pop_preamble_summary() { return NULL; }
 };

-class MainBodySummary {
+class MainBodySummary: public CHeapObj {
   define_num_seq(satb_drain) // optional
   define_num_seq(parallel) // parallel only
     define_num_seq(ext_root_scan)
@@ -75,7 +75,7 @@
   define_num_seq(clear_ct)  // parallel only
 };

-class PopPreambleSummary {
+class PopPreambleSummary: public CHeapObj {
   define_num_seq(pop_preamble)
     define_num_seq(pop_update_rs)
     define_num_seq(pop_scan_rs)
@@ -557,6 +557,8 @@
     return get_new_neg_prediction(_young_gc_eff_seq);
   }

+  double predict_survivor_regions_evac_time();
+
   // </NEW PREDICTION>

 public:
@@ -599,8 +601,8 @@

   // Returns an estimate of the survival rate of the region at yg-age
   // "yg_age".
-  double predict_yg_surv_rate(int age) {
-    TruncatedSeq* seq = _short_lived_surv_rate_group->get_seq(age);
+  double predict_yg_surv_rate(int age, SurvRateGroup* surv_rate_group) {
+    TruncatedSeq* seq = surv_rate_group->get_seq(age);
     if (seq->num() == 0)
       gclog_or_tty->print("BARF! age is %d", age);
     guarantee( seq->num() > 0, "invariant" );
@@ -610,6 +612,10 @@
     return pred;
   }

+  double predict_yg_surv_rate(int age) {
+    return predict_yg_surv_rate(age, _short_lived_surv_rate_group);
+  }
+
   double accum_yg_surv_rate_pred(int age) {
     return _short_lived_surv_rate_group->accum_surv_rate_pred(age);
   }
@@ -822,6 +828,9 @@

   virtual void init();

+  // Create jstat counters for the policy.
+  virtual void initialize_gc_policy_counters();
+
   virtual HeapWord* mem_allocate_work(size_t size,
                                       bool is_tlab,
                                       bool* gc_overhead_limit_was_exceeded);
@@ -1047,8 +1056,12 @@
   // Print stats on young survival ratio
   void print_yg_surv_rate_info() const;

-  void finished_recalculating_age_indexes() {
-    _short_lived_surv_rate_group->finished_recalculating_age_indexes();
+  void finished_recalculating_age_indexes(bool is_survivors) {
+    if (is_survivors) {
+      _survivor_surv_rate_group->finished_recalculating_age_indexes();
+    } else {
+      _short_lived_surv_rate_group->finished_recalculating_age_indexes();
+    }
     // do that for any other surv rate groups
   }

@@ -1097,6 +1110,17 @@
   // maximum amount of suvivors regions.
   int _tenuring_threshold;

+  // The limit on the number of regions allocated for survivors.
+  size_t _max_survivor_regions;
+
+  // The amount of survor regions after a collection.
+  size_t _recorded_survivor_regions;
+  // List of survivor regions.
+  HeapRegion* _recorded_survivor_head;
+  HeapRegion* _recorded_survivor_tail;
+
+  ageTable _survivors_age_table;
+
 public:

   inline GCAllocPurpose
@@ -1116,7 +1140,9 @@
     return GCAllocForTenured;
   }

-  uint max_regions(int purpose);
+  static const size_t REGIONS_UNLIMITED = ~(size_t)0;
+
+  size_t max_regions(int purpose);

   // The limit on regions for a particular purpose is reached.
   void note_alloc_region_limit_reached(int purpose) {
@@ -1132,6 +1158,23 @@
   void note_stop_adding_survivor_regions() {
     _survivor_surv_rate_group->stop_adding_regions();
   }
+
+  void record_survivor_regions(size_t      regions,
+                               HeapRegion* head,
+                               HeapRegion* tail) {
+    _recorded_survivor_regions = regions;
+    _recorded_survivor_head    = head;
+    _recorded_survivor_tail    = tail;
+  }
+
+  void record_thread_age_table(ageTable* age_table)
+  {
+    _survivors_age_table.merge_par(age_table);
+  }
+
+  // Calculates survivor space parameters.
+  void calculate_survivors_policy();
+
 };

 // This encapsulates a particular strategy for a g1 Collector.
--- a/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -28,7 +28,7 @@
 /***** ALL TIMES ARE IN SECS!!!!!!! *****/

 // this is the "interface"
-class G1MMUTracker {
+class G1MMUTracker: public CHeapObj {
 protected:
   double          _time_slice;
   double          _max_gc_time; // this is per time slice
@@ -67,7 +67,7 @@
   }
 };

-class G1MMUTrackerQueueElem {
+class G1MMUTrackerQueueElem VALUE_OBJ_CLASS_SPEC {
 private:
   double _start_time;
   double _end_time;
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -572,6 +572,9 @@
   }
   guarantee( _cards_scanned == NULL, "invariant" );
   _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers());
+  for (uint i = 0; i < n_workers(); ++i) {
+    _cards_scanned[i] = 0;
+  }
   _total_cards_scanned = 0;
 }
--- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -30,7 +30,7 @@
 class HRInto_G1RemSet;
 class ConcurrentG1Refine;

-class G1RemSet {
+class G1RemSet: public CHeapObj {
 protected:
   G1CollectedHeap* _g1;
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -281,7 +281,17 @@
   develop(bool, G1HRRSFlushLogBuffersOnVerify, false,                       \
           "Forces flushing of log buffers before verification.")            \
                                                                             \
-  product(intx, G1MaxSurvivorRegions, 0,                                    \
-          "The maximum number of survivor regions")
+  product(bool, G1UseSurvivorSpace, true,                                   \
+          "When true, use survivor space.")                                 \
+                                                                            \
+  product(bool, G1FixedTenuringThreshold, false,                            \
+          "When set, G1 will not adjust the tenuring threshold")            \
+                                                                            \
+  product(bool, G1FixedEdenSize, false,                                     \
+          "When set, G1 will not allocate unused survivor space regions")   \
+                                                                            \
+  product(uintx, G1FixedSurvivorSpaceSize, 0,                               \
+          "If non-0 is the size of the G1 survivor space, "                 \
+          "otherwise SurvivorRatio is used to determine the size")

 G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -566,7 +566,11 @@
   void note_end_of_copying() {
     assert(top() >= _next_top_at_mark_start,
            "Increase only");
-    _next_top_at_mark_start = top();
+    // Survivor regions will be scanned on the start of concurrent
+    // marking.
+    if (!is_survivor()) {
+      _next_top_at_mark_start = top();
+    }
   }

   // Returns "false" iff no object in the region was allocated when the
@@ -829,7 +833,7 @@

 // A linked lists of heap regions.  It leaves the "next" field
 // unspecified; that's up to subtypes.
-class RegionList {
+class RegionList VALUE_OBJ_CLASS_SPEC {
 protected:
   virtual HeapRegion* get_next(HeapRegion* chr) = 0;
   virtual void set_next(HeapRegion* chr,
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -65,9 +65,11 @@
   // We need access in order to union things into the base table.
   BitMap* bm() { return &_bm; }

+#if PRT_COUNT_OCCUPIED
   void recount_occupied() {
     _occupied = (jint) bm()->count_one_bits();
   }
+#endif

   PerRegionTable(HeapRegion* hr) :
     _hr(hr),
@@ -1144,7 +1146,9 @@
   size_t i = _outgoing_region_map.get_next_one_offset(0);
   while (i < _outgoing_region_map.size()) {
     HeapRegion* to_region = g1h->region_at(i);
-    to_region->rem_set()->clear_incoming_entry(hr());
+    if (!to_region->in_collection_set()) {
+      to_region->rem_set()->clear_incoming_entry(hr());
+    }
     i = _outgoing_region_map.get_next_one_offset(i+1);
   }
 }
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -58,7 +58,7 @@
 //      is represented.  If a deleted PRT is re-used, a thread adding a bit,
 //      thinking the PRT is for a different region, does no harm.

-class OtherRegionsTable: public CHeapObj {
+class OtherRegionsTable VALUE_OBJ_CLASS_SPEC {
   friend class HeapRegionRemSetIterator;

   G1CollectedHeap* _g1h;
--- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -29,7 +29,7 @@

 class PtrQueueSet;

-class PtrQueue: public CHeapObj {
+class PtrQueue VALUE_OBJ_CLASS_SPEC {

 protected:
   // The ptr queue set to which this queue belongs.
@@ -130,7 +130,7 @@
 // In particular, the individual queues allocate buffers from this shared
 // set, and return completed buffers to the set.
 // All these variables are are protected by the TLOQ_CBL_mon. XXX ???
-class PtrQueueSet: public CHeapObj {
+class PtrQueueSet VALUE_OBJ_CLASS_SPEC {

 protected:
--- a/src/share/vm/gc_implementation/g1/sparsePRT.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/sparsePRT.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -33,7 +33,7 @@
 // old versions synchronously.


-class SparsePRTEntry {
+class SparsePRTEntry: public CHeapObj {
 public:
   enum SomePublicConstants {
     CardsPerEntry = (short)4,
@@ -167,7 +167,7 @@
 };

   // ValueObj because will be embedded in HRRS iterator.
-class RSHashTableIter: public CHeapObj {
+class RSHashTableIter VALUE_OBJ_CLASS_SPEC {
     short _tbl_ind;
     short _bl_ind;
     short _card_ind;
@@ -213,7 +213,7 @@

 class SparsePRTIter;

-class SparsePRT : public CHeapObj {
+class SparsePRT VALUE_OBJ_CLASS_SPEC {
   //  Iterations are done on the _cur hash table, since they only need to
   //  see entries visible at the start of a collection pause.
   //  All other operations are done using the _next hash table.
--- a/src/share/vm/gc_implementation/g1/survRateGroup.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/survRateGroup.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -29,23 +29,14 @@
                              const char* name,
                              size_t summary_surv_rates_len) :
     _g1p(g1p), _name(name),
-    _all_regions_allocated(0),
-    _curr_length(0), _scan_only_prefix(0), _setup_seq_num(0),
-    _array_length(0), _surv_rate(NULL), _accum_surv_rate_pred(NULL),
-    _accum_surv_rate(0.0), _surv_rate_pred(NULL), _last_pred(0.0),
     _summary_surv_rates_len(summary_surv_rates_len),
     _summary_surv_rates_max_len(0),
-    _summary_surv_rates(NULL) {
-
-  // the following will set up the arrays with length 1
-  _curr_length = 1;
-  stop_adding_regions();
-  guarantee( _array_length == 1, "invariant" );
-  guarantee( _surv_rate_pred[0] != NULL, "invariant" );
-  _surv_rate_pred[0]->add(0.4);
-  all_surviving_words_recorded(false);
-  _curr_length = 0;
-
+    _summary_surv_rates(NULL),
+    _surv_rate(NULL),
+    _accum_surv_rate_pred(NULL),
+    _surv_rate_pred(NULL)
+{
+  reset();
   if (summary_surv_rates_len > 0) {
     size_t length = summary_surv_rates_len;
       _summary_surv_rates = NEW_C_HEAP_ARRAY(NumberSeq*, length);
@@ -60,61 +51,80 @@
   start_adding_regions();
 }

+
+void SurvRateGroup::reset()
+{
+  _all_regions_allocated = 0;
+  _scan_only_prefix      = 0;
+  _setup_seq_num         = 0;
+  _stats_arrays_length   = 0;
+  _accum_surv_rate       = 0.0;
+  _last_pred             = 0.0;
+  // the following will set up the arrays with length 1
+  _region_num            = 1;
+  stop_adding_regions();
+  guarantee( _stats_arrays_length == 1, "invariant" );
+  guarantee( _surv_rate_pred[0] != NULL, "invariant" );
+  _surv_rate_pred[0]->add(0.4);
+  all_surviving_words_recorded(false);
+  _region_num = 0;
+}
+
+
 void
 SurvRateGroup::start_adding_regions() {
-  _setup_seq_num   = _array_length;
-  _curr_length     = _scan_only_prefix;
+  _setup_seq_num   = _stats_arrays_length;
+  _region_num      = _scan_only_prefix;
   _accum_surv_rate = 0.0;

 #if 0
-  gclog_or_tty->print_cr("start adding regions, seq num %d, length %d",
-                         _setup_seq_num, _curr_length);
+  gclog_or_tty->print_cr("[%s] start adding regions, seq num %d, length %d",
+                         _name, _setup_seq_num, _region_num);
 #endif // 0
 }

 void
 SurvRateGroup::stop_adding_regions() {
-  size_t length = _curr_length;

 #if 0
-  gclog_or_tty->print_cr("stop adding regions, length %d", length);
+  gclog_or_tty->print_cr("[%s] stop adding regions, length %d", _name, _region_num);
 #endif // 0

-  if (length > _array_length) {
+  if (_region_num > _stats_arrays_length) {
     double* old_surv_rate = _surv_rate;
     double* old_accum_surv_rate_pred = _accum_surv_rate_pred;
     TruncatedSeq** old_surv_rate_pred = _surv_rate_pred;

-    _surv_rate = NEW_C_HEAP_ARRAY(double, length);
+    _surv_rate = NEW_C_HEAP_ARRAY(double, _region_num);
     if (_surv_rate == NULL) {
-      vm_exit_out_of_memory(sizeof(double) * length,
+      vm_exit_out_of_memory(sizeof(double) * _region_num,
                             "Not enough space for surv rate array.");
     }
-    _accum_surv_rate_pred = NEW_C_HEAP_ARRAY(double, length);
+    _accum_surv_rate_pred = NEW_C_HEAP_ARRAY(double, _region_num);
     if (_accum_surv_rate_pred == NULL) {
-      vm_exit_out_of_memory(sizeof(double) * length,
+      vm_exit_out_of_memory(sizeof(double) * _region_num,
                          "Not enough space for accum surv rate pred array.");
     }
-    _surv_rate_pred = NEW_C_HEAP_ARRAY(TruncatedSeq*, length);
+    _surv_rate_pred = NEW_C_HEAP_ARRAY(TruncatedSeq*, _region_num);
     if (_surv_rate == NULL) {
-      vm_exit_out_of_memory(sizeof(TruncatedSeq*) * length,
+      vm_exit_out_of_memory(sizeof(TruncatedSeq*) * _region_num,
                             "Not enough space for surv rate pred array.");
     }

-    for (size_t i = 0; i < _array_length; ++i)
+    for (size_t i = 0; i < _stats_arrays_length; ++i)
       _surv_rate_pred[i] = old_surv_rate_pred[i];

 #if 0
-    gclog_or_tty->print_cr("stop adding regions, new seqs %d to %d",
-                  _array_length, length - 1);
+    gclog_or_tty->print_cr("[%s] stop adding regions, new seqs %d to %d",
+                  _name, _array_length, _region_num - 1);
 #endif // 0

-    for (size_t i = _array_length; i < length; ++i) {
+    for (size_t i = _stats_arrays_length; i < _region_num; ++i) {
       _surv_rate_pred[i] = new TruncatedSeq(10);
       // _surv_rate_pred[i]->add(last_pred);
     }

-    _array_length = length;
+    _stats_arrays_length = _region_num;

     if (old_surv_rate != NULL)
       FREE_C_HEAP_ARRAY(double, old_surv_rate);
@@ -124,7 +134,7 @@
       FREE_C_HEAP_ARRAY(NumberSeq*, old_surv_rate_pred);
   }

-  for (size_t i = 0; i < _array_length; ++i)
+  for (size_t i = 0; i < _stats_arrays_length; ++i)
     _surv_rate[i] = 0.0;
 }

@@ -135,7 +145,7 @@

   double ret = _accum_surv_rate;
   if (adjustment > 0) {
-    TruncatedSeq* seq = get_seq(_curr_length+1);
+    TruncatedSeq* seq = get_seq(_region_num+1);
     double surv_rate = _g1p->get_new_prediction(seq);
     ret += surv_rate;
   }
@@ -145,23 +155,23 @@

 int
 SurvRateGroup::next_age_index() {
-  TruncatedSeq* seq = get_seq(_curr_length);
+  TruncatedSeq* seq = get_seq(_region_num);
   double surv_rate = _g1p->get_new_prediction(seq);
   _accum_surv_rate += surv_rate;

-  ++_curr_length;
+  ++_region_num;
   return (int) ++_all_regions_allocated;
 }

 void
 SurvRateGroup::record_scan_only_prefix(size_t scan_only_prefix) {
-  guarantee( scan_only_prefix <= _curr_length, "pre-condition" );
+  guarantee( scan_only_prefix <= _region_num, "pre-condition" );
   _scan_only_prefix = scan_only_prefix;
 }

 void
 SurvRateGroup::record_surviving_words(int age_in_group, size_t surv_words) {
-  guarantee( 0 <= age_in_group && (size_t) age_in_group < _curr_length,
+  guarantee( 0 <= age_in_group && (size_t) age_in_group < _region_num,
              "pre-condition" );
   guarantee( _surv_rate[age_in_group] <= 0.00001,
              "should only update each slot once" );
@@ -178,15 +188,15 @@

 void
 SurvRateGroup::all_surviving_words_recorded(bool propagate) {
-  if (propagate && _curr_length > 0) { // conservative
-    double surv_rate = _surv_rate_pred[_curr_length-1]->last();
+  if (propagate && _region_num > 0) { // conservative
+    double surv_rate = _surv_rate_pred[_region_num-1]->last();

 #if 0
     gclog_or_tty->print_cr("propagating %1.2lf from %d to %d",
                   surv_rate, _curr_length, _array_length - 1);
 #endif // 0

-    for (size_t i = _curr_length; i < _array_length; ++i) {
+    for (size_t i = _region_num; i < _stats_arrays_length; ++i) {
       guarantee( _surv_rate[i] <= 0.00001,
                  "the slot should not have been updated" );
       _surv_rate_pred[i]->add(surv_rate);
@@ -195,7 +205,7 @@

   double accum = 0.0;
   double pred = 0.0;
-  for (size_t i = 0; i < _array_length; ++i) {
+  for (size_t i = 0; i < _stats_arrays_length; ++i) {
     pred = _g1p->get_new_prediction(_surv_rate_pred[i]);
     if (pred > 1.0) pred = 1.0;
     accum += pred;
@@ -209,8 +219,8 @@
 void
 SurvRateGroup::print() {
   gclog_or_tty->print_cr("Surv Rate Group: %s (%d entries, %d scan-only)",
-                _name, _curr_length, _scan_only_prefix);
-  for (size_t i = 0; i < _curr_length; ++i) {
+                _name, _region_num, _scan_only_prefix);
+  for (size_t i = 0; i < _region_num; ++i) {
     gclog_or_tty->print_cr("    age %4d   surv rate %6.2lf %%   pred %6.2lf %%%s",
                   i, _surv_rate[i] * 100.0,
                   _g1p->get_new_prediction(_surv_rate_pred[i]) * 100.0,
--- a/src/share/vm/gc_implementation/g1/survRateGroup.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/g1/survRateGroup.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -29,7 +29,7 @@
   G1CollectorPolicy* _g1p;
   const char* _name;

-  size_t  _array_length;
+  size_t  _stats_arrays_length;
   double* _surv_rate;
   double* _accum_surv_rate_pred;
   double  _last_pred;
@@ -40,7 +40,7 @@
   size_t         _summary_surv_rates_max_len;

   int _all_regions_allocated;
-  size_t _curr_length;
+  size_t _region_num;
   size_t _scan_only_prefix;
   size_t _setup_seq_num;

@@ -48,6 +48,7 @@
   SurvRateGroup(G1CollectorPolicy* g1p,
                 const char* name,
                 size_t summary_surv_rates_len);
+  void reset();
   void start_adding_regions();
   void stop_adding_regions();
   void record_scan_only_prefix(size_t scan_only_prefix);
@@ -55,22 +56,21 @@
   void all_surviving_words_recorded(bool propagate);
   const char* name() { return _name; }

-  size_t region_num() { return _curr_length; }
+  size_t region_num() { return _region_num; }
   size_t scan_only_length() { return _scan_only_prefix; }
   double accum_surv_rate_pred(int age) {
     assert(age >= 0, "must be");
-    if ((size_t)age < _array_length)
+    if ((size_t)age < _stats_arrays_length)
       return _accum_surv_rate_pred[age];
     else {
-      double diff = (double) (age - _array_length + 1);
-      return _accum_surv_rate_pred[_array_length-1] + diff * _last_pred;
+      double diff = (double) (age - _stats_arrays_length + 1);
+      return _accum_surv_rate_pred[_stats_arrays_length-1] + diff * _last_pred;
     }
   }

   double accum_surv_rate(size_t adjustment);

   TruncatedSeq* get_seq(size_t age) {
-    guarantee( 0 <= age, "pre-condition" );
     if (age >= _setup_seq_num) {
       guarantee( _setup_seq_num > 0, "invariant" );
       age = _setup_seq_num-1;
--- a/src/share/vm/gc_implementation/includeDB_gc_g1	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/includeDB_gc_g1	Sun Feb 15 20:09:02 2009 -0800
@@ -48,6 +48,7 @@
 concurrentG1Refine.cpp			space.inline.hpp

 concurrentG1Refine.hpp			globalDefinitions.hpp
+concurrentG1Refine.hpp			allocation.hpp

 concurrentG1RefineThread.cpp		concurrentG1Refine.hpp
 concurrentG1RefineThread.cpp		concurrentG1RefineThread.hpp
@@ -172,6 +173,7 @@
 g1CollectorPolicy.cpp                   g1CollectorPolicy.hpp
 g1CollectorPolicy.cpp                   heapRegionRemSet.hpp
 g1CollectorPolicy.cpp			mutexLocker.hpp
+g1CollectorPolicy.cpp			gcPolicyCounters.hpp

 g1CollectorPolicy.hpp                   collectorPolicy.hpp
 g1CollectorPolicy.hpp                   collectionSetChooser.hpp
@@ -228,7 +230,7 @@
 g1MMUTracker.cpp			mutexLocker.hpp

 g1MMUTracker.hpp			debug.hpp
-
+g1MMUTracker.hpp			allocation.hpp
 g1RemSet.cpp				bufferingOopClosure.hpp
 g1RemSet.cpp				concurrentG1Refine.hpp
 g1RemSet.cpp				concurrentG1RefineThread.hpp
@@ -272,6 +274,7 @@
 heapRegion.hpp                          watermark.hpp
 heapRegion.hpp				g1_specialized_oop_closures.hpp
 heapRegion.hpp				survRateGroup.hpp
+heapRegion.hpp				ageTable.hpp

 heapRegionRemSet.hpp			sparsePRT.hpp
--- a/src/share/vm/gc_implementation/includeDB_gc_parNew	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/includeDB_gc_parNew	Sun Feb 15 20:09:02 2009 -0800
@@ -79,6 +79,7 @@
 parNewGeneration.cpp                    sharedHeap.hpp
 parNewGeneration.cpp                    space.hpp
 parNewGeneration.cpp                    spaceDecorator.hpp
+parNewGeneration.cpp                    thread.hpp
 parNewGeneration.cpp                    workgroup.hpp

 parNewGeneration.hpp                    defNewGeneration.hpp
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -404,6 +404,8 @@
     if (terminator()->offer_termination()) break;
     par_scan_state()->end_term_time();
   }
+  assert(par_gen()->_overflow_list == NULL && par_gen()->_num_par_pushes == 0,
+         "Broken overflow list?");
   // Finish the last termination pause.
   par_scan_state()->end_term_time();
 }
@@ -456,6 +458,8 @@
   _is_alive_closure(this),
   _plab_stats(YoungPLABSize, PLABWeight)
 {
+  NOT_PRODUCT(_overflow_counter = ParGCWorkQueueOverflowInterval;)
+  NOT_PRODUCT(_num_par_pushes = 0;)
   _task_queues = new ObjToScanQueueSet(ParallelGCThreads);
   guarantee(_task_queues != NULL, "task_queues allocation failure.");

@@ -993,12 +997,19 @@
              "push forwarded object");
     }
     // Push it on one of the queues of to-be-scanned objects.
-    if (!par_scan_state->work_queue()->push(obj_to_push)) {
+    bool simulate_overflow = false;
+    NOT_PRODUCT(
+      if (ParGCWorkQueueOverflowALot && should_simulate_overflow()) {
+        // simulate a stack overflow
+        simulate_overflow = true;
+      }
+    )
+    if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) {
       // Add stats for overflow pushes.
       if (Verbose && PrintGCDetails) {
         gclog_or_tty->print("queue overflow!\n");
       }
-      push_on_overflow_list(old);
+      push_on_overflow_list(old, par_scan_state);
       par_scan_state->note_overflow_push();
     }
     par_scan_state->note_push();
@@ -1110,9 +1121,16 @@
              "push forwarded object");
     }
     // Push it on one of the queues of to-be-scanned objects.
-    if (!par_scan_state->work_queue()->push(obj_to_push)) {
+    bool simulate_overflow = false;
+    NOT_PRODUCT(
+      if (ParGCWorkQueueOverflowALot && should_simulate_overflow()) {
+        // simulate a stack overflow
+        simulate_overflow = true;
+      }
+    )
+    if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) {
       // Add stats for overflow pushes.
-      push_on_overflow_list(old);
+      push_on_overflow_list(old, par_scan_state);
       par_scan_state->note_overflow_push();
     }
     par_scan_state->note_push();
@@ -1135,89 +1153,190 @@
   return forward_ptr;
 }

-void ParNewGeneration::push_on_overflow_list(oop from_space_obj) {
-  oop cur_overflow_list = _overflow_list;
+#ifndef PRODUCT
+// It's OK to call this multi-threaded;  the worst thing
+// that can happen is that we'll get a bunch of closely
+// spaced simulated oveflows, but that's OK, in fact
+// probably good as it would exercise the overflow code
+// under contention.
+bool ParNewGeneration::should_simulate_overflow() {
+  if (_overflow_counter-- <= 0) { // just being defensive
+    _overflow_counter = ParGCWorkQueueOverflowInterval;
+    return true;
+  } else {
+    return false;
+  }
+}
+#endif
+
+#define BUSY (oop(0x1aff1aff))
+void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) {
   // if the object has been forwarded to itself, then we cannot
   // use the klass pointer for the linked list.  Instead we have
   // to allocate an oopDesc in the C-Heap and use that for the linked list.
+  // XXX This is horribly inefficient when a promotion failure occurs
+  // and should be fixed. XXX FIX ME !!!
+#ifndef PRODUCT
+  Atomic::inc_ptr(&_num_par_pushes);
+  assert(_num_par_pushes > 0, "Tautology");
+#endif
   if (from_space_obj->forwardee() == from_space_obj) {
     oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1);
     listhead->forward_to(from_space_obj);
     from_space_obj = listhead;
   }
-  while (true) {
-    from_space_obj->set_klass_to_list_ptr(cur_overflow_list);
-    oop observed_overflow_list =
+  oop observed_overflow_list = _overflow_list;
+  oop cur_overflow_list;
+  do {
+    cur_overflow_list = observed_overflow_list;
+    if (cur_overflow_list != BUSY) {
+      from_space_obj->set_klass_to_list_ptr(cur_overflow_list);
+    } else {
+      from_space_obj->set_klass_to_list_ptr(NULL);
+    }
+    observed_overflow_list =
       (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list);
-    if (observed_overflow_list == cur_overflow_list) break;
-    // Otherwise...
-    cur_overflow_list = observed_overflow_list;
-  }
+  } while (cur_overflow_list != observed_overflow_list);
 }

+// *NOTE*: The overflow list manipulation code here and
+// in CMSCollector:: are very similar in shape,
+// except that in the CMS case we thread the objects
+// directly into the list via their mark word, and do
+// not need to deal with special cases below related
+// to chunking of object arrays and promotion failure
+// handling.
+// CR 6797058 has been filed to attempt consolidation of
+// the common code.
+// Because of the common code, if you make any changes in
+// the code below, please check the CMS version to see if
+// similar changes might be needed.
+// See CMSCollector::par_take_from_overflow_list() for
+// more extensive documentation comments.
 bool
 ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) {
   ObjToScanQueue* work_q = par_scan_state->work_queue();
+  assert(work_q->size() == 0, "Should first empty local work queue");
   // How many to take?
-  int objsFromOverflow = MIN2(work_q->max_elems()/4,
-                              (juint)ParGCDesiredObjsFromOverflowList);
+  size_t objsFromOverflow = MIN2((size_t)work_q->max_elems()/4,
+                                 (size_t)ParGCDesiredObjsFromOverflowList);

   if (_overflow_list == NULL) return false;

   // Otherwise, there was something there; try claiming the list.
-  oop prefix = (oop)Atomic::xchg_ptr(NULL, &_overflow_list);
-
-  if (prefix == NULL) {
-    return false;
+  oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
+  // Trim off a prefix of at most objsFromOverflow items
+  Thread* tid = Thread::current();
+  size_t spin_count = (size_t)ParallelGCThreads;
+  size_t sleep_time_millis = MAX2((size_t)1, objsFromOverflow/100);
+  for (size_t spin = 0; prefix == BUSY && spin < spin_count; spin++) {
+    // someone grabbed it before we did ...
+    // ... we spin for a short while...
+    os::sleep(tid, sleep_time_millis, false);
+    if (_overflow_list == NULL) {
+      // nothing left to take
+      return false;
+    } else if (_overflow_list != BUSY) {
+     // try and grab the prefix
+     prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
+    }
   }
-  // Trim off a prefix of at most objsFromOverflow items
-  int i = 1;
+  if (prefix == NULL || prefix == BUSY) {
+     // Nothing to take or waited long enough
+     if (prefix == NULL) {
+       // Write back the NULL in case we overwrote it with BUSY above
+       // and it is still the same value.
+       (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
+     }
+     return false;
+  }
+  assert(prefix != NULL && prefix != BUSY, "Error");
+  size_t i = 1;
   oop cur = prefix;
   while (i < objsFromOverflow && cur->klass_or_null() != NULL) {
     i++; cur = oop(cur->klass());
   }

   // Reattach remaining (suffix) to overflow list
-  if (cur->klass_or_null() != NULL) {
-    oop suffix = oop(cur->klass());
-    cur->set_klass_to_list_ptr(NULL);
-
-    // Find last item of suffix list
-    oop last = suffix;
-    while (last->klass_or_null() != NULL) {
-      last = oop(last->klass());
+  if (cur->klass_or_null() == NULL) {
+    // Write back the NULL in lieu of the BUSY we wrote
+    // above and it is still the same value.
+    if (_overflow_list == BUSY) {
+      (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
     }
-    // Atomically prepend suffix to current overflow list
-    oop cur_overflow_list = _overflow_list;
-    while (true) {
-      last->set_klass_to_list_ptr(cur_overflow_list);
-      oop observed_overflow_list =
-        (oop)Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list);
-      if (observed_overflow_list == cur_overflow_list) break;
-      // Otherwise...
-      cur_overflow_list = observed_overflow_list;
+  } else {
+    assert(cur->klass_or_null() != BUSY, "Error");
+    oop suffix = oop(cur->klass());       // suffix will be put back on global list
+    cur->set_klass_to_list_ptr(NULL);     // break off suffix
+    // It's possible that the list is still in the empty(busy) state
+    // we left it in a short while ago; in that case we may be
+    // able to place back the suffix.
+    oop observed_overflow_list = _overflow_list;
+    oop cur_overflow_list = observed_overflow_list;
+    bool attached = false;
+    while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
+      observed_overflow_list =
+        (oop) Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list);
+      if (cur_overflow_list == observed_overflow_list) {
+        attached = true;
+        break;
+      } else cur_overflow_list = observed_overflow_list;
+    }
+    if (!attached) {
+      // Too bad, someone else got in in between; we'll need to do a splice.
+      // Find the last item of suffix list
+      oop last = suffix;
+      while (last->klass_or_null() != NULL) {
+        last = oop(last->klass());
+      }
+      // Atomically prepend suffix to current overflow list
+      observed_overflow_list = _overflow_list;
+      do {
+        cur_overflow_list = observed_overflow_list;
+        if (cur_overflow_list != BUSY) {
+          // Do the splice ...
+          last->set_klass_to_list_ptr(cur_overflow_list);
+        } else { // cur_overflow_list == BUSY
+          last->set_klass_to_list_ptr(NULL);
+        }
+        observed_overflow_list =
+          (oop)Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list);
+      } while (cur_overflow_list != observed_overflow_list);
     }
   }

   // Push objects on prefix list onto this thread's work queue
-  assert(cur != NULL, "program logic");
+  assert(prefix != NULL && prefix != BUSY, "program logic");
   cur = prefix;
-  int n = 0;
+  ssize_t n = 0;
   while (cur != NULL) {
     oop obj_to_push = cur->forwardee();
     oop next        = oop(cur->klass_or_null());
     cur->set_klass(obj_to_push->klass());
-    if (par_scan_state->should_be_partially_scanned(obj_to_push, cur)) {
-      obj_to_push = cur;
+    // This may be an array object that is self-forwarded. In that case, the list pointer
+    // space, cur, is not in the Java heap, but rather in the C-heap and should be freed.
+    if (!is_in_reserved(cur)) {
+      // This can become a scaling bottleneck when there is work queue overflow coincident
+      // with promotion failure.
+      oopDesc* f = cur;
+      FREE_C_HEAP_ARRAY(oopDesc, f);
+    } else if (par_scan_state->should_be_partially_scanned(obj_to_push, cur)) {
       assert(arrayOop(cur)->length() == 0, "entire array remaining to be scanned");
+      obj_to_push = cur;
     }
-    work_q->push(obj_to_push);
+    bool ok = work_q->push(obj_to_push);
+    assert(ok, "Should have succeeded");
     cur = next;
     n++;
   }
   par_scan_state->note_overflow_refill(n);
+#ifndef PRODUCT
+  assert(_num_par_pushes >= n, "Too many pops?");
+  Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);
+#endif
   return true;
 }
+#undef BUSY

 void ParNewGeneration::ref_processor_init()
 {
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -278,6 +278,7 @@
   friend class ParNewRefProcTask;
   friend class ParNewRefProcTaskExecutor;
   friend class ParScanThreadStateSet;
+  friend class ParEvacuateFollowersClosure;

  private:
   // XXX use a global constant instead of 64!
@@ -296,6 +297,7 @@
   // klass-pointers (klass information already copied to the forwarded
   // image.)  Manipulated with CAS.
   oop _overflow_list;
+  NOT_PRODUCT(ssize_t _num_par_pushes;)

   // If true, older generation does not support promotion undo, so avoid.
   static bool _avoid_promotion_undo;
@@ -372,8 +374,12 @@
   oop copy_to_survivor_space_with_undo(ParScanThreadState* par_scan_state,
                              oop obj, size_t obj_sz, markOop m);

+  // in support of testing overflow code
+  NOT_PRODUCT(int _overflow_counter;)
+  NOT_PRODUCT(bool should_simulate_overflow();)
+
   // Push the given (from-space) object on the global overflow list.
-  void push_on_overflow_list(oop from_space_obj);
+  void push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state);

   // If the global overflow list is non-empty, move some tasks from it
   // onto "work_q" (which must be empty).  No more than 1/4 of the
--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -362,6 +362,10 @@
   if (PrintHeapAtGC) {
     Universe::print_heap_after_gc();
   }
+
+#ifdef TRACESPINNING
+  ParallelTaskTerminator::print_termination_counts();
+#endif
 }

 bool PSMarkSweep::absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy,
--- a/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -116,7 +116,7 @@
   // ObjectSpace stuff
   //

-  _object_space = new MutableSpace();
+  _object_space = new MutableSpace(virtual_space()->alignment());

   if (_object_space == NULL)
     vm_exit_during_initialization("Could not allocate an old gen space");
@@ -385,10 +385,10 @@
   start_array()->set_covered_region(new_memregion);
   Universe::heap()->barrier_set()->resize_covered_region(new_memregion);

-  HeapWord* const virtual_space_high = (HeapWord*) virtual_space()->high();
-
   // ALWAYS do this last!!
-  object_space()->set_end(virtual_space_high);
+  object_space()->initialize(new_memregion,
+                             SpaceDecorator::DontClear,
+                             SpaceDecorator::DontMangle);

   assert(new_word_size == heap_word_size(object_space()->capacity_in_bytes()),
     "Sanity");
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -2203,6 +2203,10 @@
                            collection_exit.ticks());
     gc_task_manager()->print_task_time_stamps();
   }
+
+#ifdef TRACESPINNING
+  ParallelTaskTerminator::print_termination_counts();
+#endif
 }

 bool PSParallelCompact::absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy,
--- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -615,6 +615,10 @@
     gc_task_manager()->print_task_time_stamps();
   }

+#ifdef TRACESPINNING
+  ParallelTaskTerminator::print_termination_counts();
+#endif
+
   return !promotion_failure_occurred;
 }
--- a/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -78,7 +78,7 @@
   _special = false;
 }

-bool PSVirtualSpace::expand_by(size_t bytes, bool pre_touch) {
+bool PSVirtualSpace::expand_by(size_t bytes) {
   assert(is_aligned(bytes), "arg not aligned");
   DEBUG_ONLY(PSVirtualSpaceVerifier this_verifier(this));

@@ -92,15 +92,6 @@
     _committed_high_addr += bytes;
   }

-  if (pre_touch || AlwaysPreTouch) {
-    for (char* curr = base_addr;
-         curr < _committed_high_addr;
-         curr += os::vm_page_size()) {
-      char tmp = *curr;
-      *curr = 0;
-    }
-  }
-
   return result;
 }

@@ -255,7 +246,7 @@
   DEBUG_ONLY(verify());
 }

-bool PSVirtualSpaceHighToLow::expand_by(size_t bytes, bool pre_touch) {
+bool PSVirtualSpaceHighToLow::expand_by(size_t bytes) {
   assert(is_aligned(bytes), "arg not aligned");
   DEBUG_ONLY(PSVirtualSpaceVerifier this_verifier(this));

@@ -269,15 +260,6 @@
     _committed_low_addr -= bytes;
   }

-  if (pre_touch || AlwaysPreTouch) {
-    for (char* curr = base_addr;
-         curr < _committed_high_addr;
-         curr += os::vm_page_size()) {
-      char tmp = *curr;
-      *curr = 0;
-    }
-  }
-
   return result;
 }
--- a/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -80,7 +80,7 @@
   inline  void   set_reserved(char* low_addr, char* high_addr, bool special);
   inline  void   set_reserved(ReservedSpace rs);
   inline  void   set_committed(char* low_addr, char* high_addr);
-  virtual bool   expand_by(size_t bytes, bool pre_touch = false);
+  virtual bool   expand_by(size_t bytes);
   virtual bool   shrink_by(size_t bytes);
   virtual size_t expand_into(PSVirtualSpace* space, size_t bytes);
   void           release();
@@ -127,7 +127,7 @@
   PSVirtualSpaceHighToLow(ReservedSpace rs, size_t alignment);
   PSVirtualSpaceHighToLow(ReservedSpace rs);

-  virtual bool   expand_by(size_t bytes, bool pre_touch = false);
+  virtual bool   expand_by(size_t bytes);
   virtual bool   shrink_by(size_t bytes);
   virtual size_t expand_into(PSVirtualSpace* space, size_t bytes);
--- a/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -64,12 +64,12 @@
   }

   if (UseNUMA) {
-    _eden_space = new MutableNUMASpace();
+    _eden_space = new MutableNUMASpace(virtual_space()->alignment());
   } else {
-    _eden_space = new MutableSpace();
+    _eden_space = new MutableSpace(virtual_space()->alignment());
   }
-  _from_space = new MutableSpace();
-  _to_space   = new MutableSpace();
+  _from_space = new MutableSpace(virtual_space()->alignment());
+  _to_space   = new MutableSpace(virtual_space()->alignment());

   if (_eden_space == NULL || _from_space == NULL || _to_space == NULL) {
     vm_exit_during_initialization("Could not allocate a young gen space");
--- a/src/share/vm/gc_implementation/shared/ageTable.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/shared/ageTable.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -67,6 +67,12 @@
   }
 }

+void ageTable::merge_par(ageTable* subTable) {
+  for (int i = 0; i < table_size; i++) {
+    Atomic::add_ptr(subTable->sizes[i], &sizes[i]);
+  }
+}
+
 int ageTable::compute_tenuring_threshold(size_t survivor_capacity) {
   size_t desired_survivor_size = (size_t)((((double) survivor_capacity)*TargetSurvivorRatio)/100);
   size_t total = 0;
--- a/src/share/vm/gc_implementation/shared/ageTable.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/shared/ageTable.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -56,6 +56,7 @@
   // Merge another age table with the current one.  Used
   // for parallel young generation gc.
   void merge(ageTable* subTable);
+  void merge_par(ageTable* subTable);

   // calculate new tenuring threshold based on age information
   int compute_tenuring_threshold(size_t survivor_capacity);
--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -27,7 +27,7 @@
 # include "incls/_mutableNUMASpace.cpp.incl"


-MutableNUMASpace::MutableNUMASpace() {
+MutableNUMASpace::MutableNUMASpace(size_t alignment) : MutableSpace(alignment) {
   _lgrp_spaces = new (ResourceObj::C_HEAP) GrowableArray<LGRPSpace*>(0, true);
   _page_size = os::vm_page_size();
   _adaptation_cycles = 0;
@@ -221,7 +221,7 @@
         }
       }
       if (!found) {
-        lgrp_spaces()->append(new LGRPSpace(lgrp_ids[i]));
+        lgrp_spaces()->append(new LGRPSpace(lgrp_ids[i], alignment()));
       }
     }

@@ -443,10 +443,10 @@
   // Is there bottom?
   if (new_region.start() < intersection.start()) { // Yes
     // Try to coalesce small pages into a large one.
-    if (UseLargePages && page_size() >= os::large_page_size()) {
-      HeapWord* p = (HeapWord*)round_to((intptr_t) intersection.start(), os::large_page_size());
+    if (UseLargePages && page_size() >= alignment()) {
+      HeapWord* p = (HeapWord*)round_to((intptr_t) intersection.start(), alignment());
       if (new_region.contains(p)
-          && pointer_delta(p, new_region.start(), sizeof(char)) >= os::large_page_size()) {
+          && pointer_delta(p, new_region.start(), sizeof(char)) >= alignment()) {
         if (intersection.contains(p)) {
           intersection = MemRegion(p, intersection.end());
         } else {
@@ -462,10 +462,10 @@
   // Is there top?
   if (intersection.end() < new_region.end()) { // Yes
     // Try to coalesce small pages into a large one.
-    if (UseLargePages && page_size() >= os::large_page_size()) {
-      HeapWord* p = (HeapWord*)round_down((intptr_t) intersection.end(), os::large_page_size());
+    if (UseLargePages && page_size() >= alignment()) {
+      HeapWord* p = (HeapWord*)round_down((intptr_t) intersection.end(), alignment());
       if (new_region.contains(p)
-          && pointer_delta(new_region.end(), p, sizeof(char)) >= os::large_page_size()) {
+          && pointer_delta(new_region.end(), p, sizeof(char)) >= alignment()) {
         if (intersection.contains(p)) {
           intersection = MemRegion(intersection.start(), p);
         } else {
@@ -504,12 +504,12 @@
             // That's the only case we have to make an additional bias_region() call.
             HeapWord* start = invalid_region->start();
             HeapWord* end = invalid_region->end();
-            if (UseLargePages && page_size() >= os::large_page_size()) {
-              HeapWord *p = (HeapWord*)round_down((intptr_t) start, os::large_page_size());
+            if (UseLargePages && page_size() >= alignment()) {
+              HeapWord *p = (HeapWord*)round_down((intptr_t) start, alignment());
               if (new_region.contains(p)) {
                 start = p;
               }
-              p = (HeapWord*)round_to((intptr_t) end, os::large_page_size());
+              p = (HeapWord*)round_to((intptr_t) end, alignment());
               if (new_region.contains(end)) {
                 end = p;
               }
@@ -526,7 +526,8 @@

 void MutableNUMASpace::initialize(MemRegion mr,
                                   bool clear_space,
-                                  bool mangle_space) {
+                                  bool mangle_space,
+                                  bool setup_pages) {
   assert(clear_space, "Reallocation will destory data!");
   assert(lgrp_spaces()->length() > 0, "There should be at least one space");

@@ -538,7 +539,7 @@

   // Compute chunk sizes
   size_t prev_page_size = page_size();
-  set_page_size(UseLargePages ? os::large_page_size() : os::vm_page_size());
+  set_page_size(UseLargePages ? alignment() : os::vm_page_size());
   HeapWord* rounded_bottom = (HeapWord*)round_to((intptr_t) bottom(), page_size());
   HeapWord* rounded_end = (HeapWord*)round_down((intptr_t) end(), page_size());
   size_t base_space_size_pages = pointer_delta(rounded_end, rounded_bottom, sizeof(char)) / page_size();
@@ -666,7 +667,7 @@
     }

     // Clear space (set top = bottom) but never mangle.
-    s->initialize(new_region, SpaceDecorator::Clear, SpaceDecorator::DontMangle);
+    s->initialize(new_region, SpaceDecorator::Clear, SpaceDecorator::DontMangle, MutableSpace::DontSetupPages);

     set_adaptation_cycles(samples_count());
   }
--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -82,8 +82,8 @@
     char* last_page_scanned()            { return _last_page_scanned; }
     void set_last_page_scanned(char* p)  { _last_page_scanned = p;    }
    public:
-    LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) {
-      _space = new MutableSpace();
+    LGRPSpace(int l, size_t alignment) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) {
+      _space = new MutableSpace(alignment);
       _alloc_rate = new AdaptiveWeightedAverage(NUMAChunkResizeWeight);
     }
     ~LGRPSpace() {
@@ -183,10 +183,10 @@

  public:
   GrowableArray<LGRPSpace*>* lgrp_spaces() const     { return _lgrp_spaces;       }
-  MutableNUMASpace();
+  MutableNUMASpace(size_t alignment);
   virtual ~MutableNUMASpace();
   // Space initialization.
-  virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space);
+  virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space, bool setup_pages = SetupPages);
   // Update space layout if necessary. Do all adaptive resizing job.
   virtual void update();
   // Update allocation rate averages.
--- a/src/share/vm/gc_implementation/shared/mutableSpace.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/shared/mutableSpace.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -25,7 +25,10 @@
 # include "incls/_precompiled.incl"
 # include "incls/_mutableSpace.cpp.incl"

-MutableSpace::MutableSpace(): ImmutableSpace(), _top(NULL) {
+MutableSpace::MutableSpace(size_t alignment): ImmutableSpace(), _top(NULL), _alignment(alignment) {
+  assert(MutableSpace::alignment() >= 0 &&
+         MutableSpace::alignment() % os::vm_page_size() == 0,
+         "Space should be aligned");
   _mangler = new MutableSpaceMangler(this);
 }

@@ -33,16 +36,88 @@
   delete _mangler;
 }

+void MutableSpace::numa_setup_pages(MemRegion mr, bool clear_space) {
+  if (!mr.is_empty()) {
+    size_t page_size = UseLargePages ? alignment() : os::vm_page_size();
+    HeapWord *start = (HeapWord*)round_to((intptr_t) mr.start(), page_size);
+    HeapWord *end =  (HeapWord*)round_down((intptr_t) mr.end(), page_size);
+    if (end > start) {
+      size_t size = pointer_delta(end, start, sizeof(char));
+      if (clear_space) {
+        // Prefer page reallocation to migration.
+        os::free_memory((char*)start, size);
+      }
+      os::numa_make_global((char*)start, size);
+    }
+  }
+}
+
+void MutableSpace::pretouch_pages(MemRegion mr) {
+  for (volatile char *p = (char*)mr.start(); p < (char*)mr.end(); p += os::vm_page_size()) {
+    char t = *p; *p = t;
+  }
+}
+
 void MutableSpace::initialize(MemRegion mr,
                               bool clear_space,
-                              bool mangle_space) {
-  HeapWord* bottom = mr.start();
-  HeapWord* end    = mr.end();
+                              bool mangle_space,
+                              bool setup_pages) {
+
+  assert(Universe::on_page_boundary(mr.start()) && Universe::on_page_boundary(mr.end()),
+         "invalid space boundaries");

-  assert(Universe::on_page_boundary(bottom) && Universe::on_page_boundary(end),
-         "invalid space boundaries");
-  set_bottom(bottom);
-  set_end(end);
+  if (setup_pages && (UseNUMA || AlwaysPreTouch)) {
+    // The space may move left and right or expand/shrink.
+    // We'd like to enforce the desired page placement.
+    MemRegion head, tail;
+    if (last_setup_region().is_empty()) {
+      // If it's the first initialization don't limit the amount of work.
+      head = mr;
+      tail = MemRegion(mr.end(), mr.end());
+    } else {
+      // Is there an intersection with the address space?
+      MemRegion intersection = last_setup_region().intersection(mr);
+      if (intersection.is_empty()) {
+        intersection = MemRegion(mr.end(), mr.end());
+      }
+      // All the sizes below are in words.
+      size_t head_size = 0, tail_size = 0;
+      if (mr.start() <= intersection.start()) {
+        head_size = pointer_delta(intersection.start(), mr.start());
+      }
+      if(intersection.end() <= mr.end()) {
+        tail_size = pointer_delta(mr.end(), intersection.end());
+      }
+      // Limit the amount of page manipulation if necessary.
+      if (NUMASpaceResizeRate > 0 && !AlwaysPreTouch) {
+        const size_t change_size = head_size + tail_size;
+        const float setup_rate_words = NUMASpaceResizeRate >> LogBytesPerWord;
+        head_size = MIN2((size_t)(setup_rate_words * head_size / change_size),
+                         head_size);
+        tail_size = MIN2((size_t)(setup_rate_words * tail_size / change_size),
+                         tail_size);
+      }
+      head = MemRegion(intersection.start() - head_size, intersection.start());
+      tail = MemRegion(intersection.end(), intersection.end() + tail_size);
+    }
+    assert(mr.contains(head) && mr.contains(tail), "Sanity");
+
+    if (UseNUMA) {
+      numa_setup_pages(head, clear_space);
+      numa_setup_pages(tail, clear_space);
+    }
+
+    if (AlwaysPreTouch) {
+      pretouch_pages(head);
+      pretouch_pages(tail);
+    }
+
+    // Remember where we stopped so that we can continue later.
+    set_last_setup_region(MemRegion(head.start(), tail.end()));
+  }
+
+  set_bottom(mr.start());
+  set_end(mr.end());

   if (clear_space) {
     clear(mangle_space);
--- a/src/share/vm/gc_implementation/shared/mutableSpace.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_implementation/shared/mutableSpace.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -25,7 +25,10 @@
 // A MutableSpace is a subtype of ImmutableSpace that supports the
 // concept of allocation. This includes the concepts that a space may
 // be only partially full, and the querry methods that go with such
-// an assumption.
+// an assumption. MutableSpace is also responsible for minimizing the
+// page allocation time by having the memory pretouched (with
+// AlwaysPretouch) and for optimizing page placement on NUMA systems
+// by make the underlying region interleaved (with UseNUMA).
 //
 // Invariant: (ImmutableSpace +) bottom() <= top() <= end()
 // top() is inclusive and end() is exclusive.
@@ -37,15 +40,23 @@

   // Helper for mangling unused space in debug builds
   MutableSpaceMangler* _mangler;
-
+  // The last region which page had been setup to be interleaved.
+  MemRegion _last_setup_region;
+  size_t _alignment;
  protected:
   HeapWord* _top;

   MutableSpaceMangler* mangler() { return _mangler; }

+  void numa_setup_pages(MemRegion mr, bool clear_space);
+  void pretouch_pages(MemRegion mr);
+
+  void set_last_setup_region(MemRegion mr) { _last_setup_region = mr;   }
+  MemRegion last_setup_region() const      { return _last_setup_region; }
+
  public:
   virtual ~MutableSpace();
-  MutableSpace();
+  MutableSpace(size_t page_size);

   // Accessors
   HeapWord* top() const                    { return _top;    }
@@ -57,13 +68,20 @@
   virtual void set_bottom(HeapWord* value) { _bottom = value; }
   virtual void set_end(HeapWord* value)    { _end = value; }

+  size_t alignment()                       { return _alignment; }
+
   // Returns a subregion containing all objects in this space.
   MemRegion used_region() { return MemRegion(bottom(), top()); }

+  static const bool SetupPages = true;
+  static const bool DontSetupPages = false;
+
   // Initialization
   virtual void initialize(MemRegion mr,
                           bool clear_space,
-                          bool mangle_space);
+                          bool mangle_space,
+                          bool setup_pages = SetupPages);
+
   virtual void clear(bool mangle_space);
   // Does the usual initialization but optionally resets top to bottom.
 #if 0  // MANGLE_SPACE
--- a/src/share/vm/gc_interface/collectedHeap.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/gc_interface/collectedHeap.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -42,6 +42,7 @@
 class CollectedHeap : public CHeapObj {
   friend class VMStructs;
   friend class IsGCActiveMark; // Block structured external access to _is_gc_active
+  friend class constantPoolCacheKlass; // allocate() method inserts is_conc_safe

 #ifdef ASSERT
   static int       _fire_out_of_memory_count;
@@ -82,8 +83,6 @@
   // Reinitialize tlabs before resuming mutators.
   virtual void resize_all_tlabs();

-  debug_only(static void check_for_valid_allocation_state();)
-
  protected:
   // Allocate from the current thread's TLAB, with broken-out slow path.
   inline static HeapWord* allocate_from_tlab(Thread* thread, size_t size);
@@ -142,6 +141,7 @@
     PRODUCT_RETURN;
   virtual void check_for_non_bad_heap_word_value(HeapWord* addr, size_t size)
     PRODUCT_RETURN;
+  debug_only(static void check_for_valid_allocation_state();)

  public:
   enum Name {
--- a/src/share/vm/interpreter/rewriter.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/interpreter/rewriter.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -48,9 +48,14 @@


 // Creates a constant pool cache given an inverse_index_map
+// This creates the constant pool cache initially in a state
+// that is unsafe for concurrent GC processing but sets it to
+// a safe mode before the constant pool cache is returned.
 constantPoolCacheHandle Rewriter::new_constant_pool_cache(intArray& inverse_index_map, TRAPS) {
   const int length = inverse_index_map.length();
-  constantPoolCacheOop cache = oopFactory::new_constantPoolCache(length, CHECK_(constantPoolCacheHandle()));
+  constantPoolCacheOop cache = oopFactory::new_constantPoolCache(length,
+                                             methodOopDesc::IsUnsafeConc,
+                                             CHECK_(constantPoolCacheHandle()));
   cache->initialize(inverse_index_map);
   return constantPoolCacheHandle(THREAD, cache);
 }
--- a/src/share/vm/memory/genCollectedHeap.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/memory/genCollectedHeap.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -610,6 +610,10 @@
     Universe::print_heap_after_gc();
   }

+#ifdef TRACESPINNING
+  ParallelTaskTerminator::print_termination_counts();
+#endif
+
   if (ExitAfterGCNum > 0 && total_collections() == ExitAfterGCNum) {
     tty->print_cr("Stopping after GC #%d", ExitAfterGCNum);
     vm_exit(-1);
--- a/src/share/vm/memory/oopFactory.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/memory/oopFactory.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -90,9 +90,11 @@
 }


-constantPoolCacheOop oopFactory::new_constantPoolCache(int length, TRAPS) {
+constantPoolCacheOop oopFactory::new_constantPoolCache(int length,
+                                                       bool is_conc_safe,
+                                                       TRAPS) {
   constantPoolCacheKlass* ck = constantPoolCacheKlass::cast(Universe::constantPoolCacheKlassObj());
-  return ck->allocate(length, CHECK_NULL);
+  return ck->allocate(length, is_conc_safe, CHECK_NULL);
 }
--- a/src/share/vm/memory/oopFactory.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/memory/oopFactory.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -84,7 +84,9 @@
   static constantPoolOop      new_constantPool     (int length,
                                                     bool is_conc_safe,
                                                     TRAPS);
-  static constantPoolCacheOop new_constantPoolCache(int length, TRAPS);
+  static constantPoolCacheOop new_constantPoolCache(int length,
+                                                    bool is_conc_safe,
+                                                    TRAPS);

   // Instance classes
   static klassOop        new_instanceKlass(int vtable_len, int itable_len, int static_field_size,
--- a/src/share/vm/memory/referenceProcessor.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/memory/referenceProcessor.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -721,12 +721,6 @@
                              iter.obj(), iter.obj()->blueprint()->internal_name());
     }
     assert(iter.obj()->is_oop(UseConcMarkSweepGC), "Adding a bad reference");
-    // If discovery is concurrent, we may have objects with null referents,
-    // being those that were concurrently cleared after they were discovered
-    // (and not subsequently precleaned).
-    assert(   (discovery_is_atomic() && iter.referent()->is_oop())
-           || (!discovery_is_atomic() && iter.referent()->is_oop_or_null(UseConcMarkSweepGC)),
-           "Adding a bad referent");
     iter.next();
   }
   // Remember to keep sentinel pointer around
--- a/src/share/vm/oops/cpCacheKlass.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/oops/cpCacheKlass.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -32,13 +32,43 @@
 }


-constantPoolCacheOop constantPoolCacheKlass::allocate(int length, TRAPS) {
+constantPoolCacheOop constantPoolCacheKlass::allocate(int length,
+                                                      bool is_conc_safe,
+                                                      TRAPS) {
   // allocate memory
   int size = constantPoolCacheOopDesc::object_size(length);
+
   KlassHandle klass (THREAD, as_klassOop());
-  constantPoolCacheOop cache = (constantPoolCacheOop)
-    CollectedHeap::permanent_obj_allocate(klass, size, CHECK_NULL);
+
+  // This is the original code.  The code from permanent_obj_allocate()
+  // was in-lined to allow the setting of is_conc_safe before the klass
+  // is installed.
+  // constantPoolCacheOop cache = (constantPoolCacheOop)
+  //   CollectedHeap::permanent_obj_allocate(klass, size, CHECK_NULL);
+
+  oop obj = CollectedHeap::permanent_obj_allocate_no_klass_install(klass, size, CHECK_NULL);
+  constantPoolCacheOop cache = (constantPoolCacheOop) obj;
+  cache->set_is_conc_safe(is_conc_safe);
+  // The store to is_conc_safe must be visible before the klass
+  // is set.  This should be done safely because _is_conc_safe has
+  // been declared volatile.  If there are any problems, consider adding
+  // OrderAccess::storestore();
+  CollectedHeap::post_allocation_install_obj_klass(klass, obj, size);
+  NOT_PRODUCT(Universe::heap()->check_for_bad_heap_word_value((HeapWord*) obj,
+                                                              size));
+
+  // The length field affects the size of the object.  The allocation
+  // above allocates the correct size (see calculation of "size") but
+  // the size() method of the constant pool cache oop will not reflect
+  // that size until the correct length is set.
   cache->set_length(length);
+
+  // The store of the length must be visible before is_conc_safe is
+  // set to a safe state.
+  // This should be done safely because _is_conc_safe has
+  // been declared volatile.  If there are any problems, consider adding
+  // OrderAccess::storestore();
+  cache->set_is_conc_safe(methodOopDesc::IsSafeConc);
   cache->set_constant_pool(NULL);
   return cache;
 }
@@ -114,7 +144,6 @@
   return size;
 }

-
 int constantPoolCacheKlass::oop_adjust_pointers(oop obj) {
   assert(obj->is_constantPoolCache(), "obj must be constant pool cache");
   constantPoolCacheOop cache = (constantPoolCacheOop)obj;
@@ -131,6 +160,11 @@
   return size;
 }

+bool constantPoolCacheKlass::oop_is_conc_safe(oop obj) const {
+  assert(obj->is_constantPoolCache(), "should be constant pool");
+  return constantPoolCacheOop(obj)->is_conc_safe();
+}
+
 #ifndef SERIALGC
 void constantPoolCacheKlass::oop_copy_contents(PSPromotionManager* pm,
                                                oop obj) {
--- a/src/share/vm/oops/cpCacheKlass.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/oops/cpCacheKlass.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -32,7 +32,7 @@

   // Allocation
   DEFINE_ALLOCATE_PERMANENT(constantPoolCacheKlass);
-  constantPoolCacheOop allocate(int length, TRAPS);
+  constantPoolCacheOop allocate(int length, bool is_conc_safe, TRAPS);
   static klassOop create_klass(TRAPS);

   // Casting from klassOop
@@ -48,6 +48,7 @@
   // Garbage collection
   void oop_follow_contents(oop obj);
   int oop_adjust_pointers(oop obj);
+  virtual bool oop_is_conc_safe(oop obj) const;

   // Parallel Scavenge and Parallel Old
   PARALLEL_GC_DECLS
--- a/src/share/vm/oops/cpCacheOop.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/oops/cpCacheOop.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -291,6 +291,9 @@
  private:
   int             _length;
   constantPoolOop _constant_pool;                // the corresponding constant pool
+  // If true, safe for concurrent GC processing,
+  // Set unconditionally in constantPoolCacheKlass::allocate()
+  volatile bool        _is_conc_safe;

   // Sizing
   debug_only(friend class ClassVerifier;)
@@ -316,6 +319,12 @@
   constantPoolOop constant_pool() const          { return _constant_pool; }
   ConstantPoolCacheEntry* entry_at(int i) const  { assert(0 <= i && i < length(), "index out of bounds"); return base() + i; }

+  // GC support
+  // If the _length field has not been set, the size of the
+  // constantPoolCache cannot be correctly calculated.
+  bool is_conc_safe()                            { return _is_conc_safe; }
+  void set_is_conc_safe(bool v)                  { _is_conc_safe = v; }
+
   // Code generation
   static ByteSize base_offset()                  { return in_ByteSize(sizeof(constantPoolCacheOopDesc)); }
--- a/src/share/vm/opto/graphKit.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/opto/graphKit.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -3233,12 +3233,11 @@

   // Now some of the values

-  Node* marking = __ load(no_ctrl, marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw);
-  Node* index   = __ load(no_ctrl, index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw);
-  Node* buffer  = __ load(no_ctrl, buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
+  Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw);

   // if (!marking)
   __ if_then(marking, BoolTest::ne, zero); {
+    Node* index   = __ load(__ ctrl(), index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw);

     const Type* t1 = adr->bottom_type();
     const Type* t2 = val->bottom_type();
@@ -3246,6 +3245,7 @@
     Node* orig = __ load(no_ctrl, adr, val_type, bt, alias_idx);
     // if (orig != NULL)
     __ if_then(orig, BoolTest::ne, null()); {
+      Node* buffer  = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);

       // load original value
       // alias_idx correct??
--- a/src/share/vm/opto/memnode.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/opto/memnode.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -1303,6 +1303,7 @@
     Node*    base   = AddPNode::Ideal_base_and_offset(address, phase, ignore);
     if (base != NULL
         && phase->type(base)->higher_equal(TypePtr::NOTNULL)
+        && phase->C->get_alias_index(phase->type(address)->is_ptr()) != Compile::AliasIdxRaw
         && all_controls_dominate(base, phase->C->start())) {
       // A method-invariant, non-null address (constant or 'this' argument).
       set_req(MemNode::Control, NULL);
--- a/src/share/vm/runtime/globals.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/runtime/globals.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -1307,7 +1307,14 @@
   product(intx, ParGCArrayScanChunk, 50,                                    \
           "Scan a subset and push remainder, if array is bigger than this") \
                                                                             \
-  product(intx, ParGCDesiredObjsFromOverflowList, 20,                       \
+  notproduct(bool, ParGCWorkQueueOverflowALot, false,                       \
+          "Whether we should simulate work queue overflow in ParNew")       \
+                                                                            \
+  notproduct(uintx, ParGCWorkQueueOverflowInterval, 1000,                   \
+          "An `interval' counter that determines how frequently"            \
+          " we simulate overflow; a smaller number increases frequency")    \
+                                                                            \
+  product(uintx, ParGCDesiredObjsFromOverflowList, 20,                      \
           "The desired number of objects to claim from the overflow list")  \
                                                                             \
   product(uintx, CMSParPromoteBlocksToClaim, 50,                            \
@@ -1419,18 +1426,18 @@
   develop(bool, CMSOverflowEarlyRestoration, false,                         \
           "Whether preserved marks should be restored early")               \
                                                                             \
-  product(uintx, CMSMarkStackSize, 32*K,                                    \
+  product(uintx, CMSMarkStackSize, NOT_LP64(32*K) LP64_ONLY(4*M),           \
           "Size of CMS marking stack")                                      \
                                                                             \
-  product(uintx, CMSMarkStackSizeMax, 4*M,                                  \
+  product(uintx, CMSMarkStackSizeMax, NOT_LP64(4*M) LP64_ONLY(512*M),       \
           "Max size of CMS marking stack")                                  \
                                                                             \
   notproduct(bool, CMSMarkStackOverflowALot, false,                         \
           "Whether we should simulate frequent marking stack / work queue"  \
           " overflow")                                                      \
                                                                             \
-  notproduct(intx, CMSMarkStackOverflowInterval, 1000,                      \
-          "A per-thread `interval' counter that determines how frequently"  \
+  notproduct(uintx, CMSMarkStackOverflowInterval, 1000,                     \
+          "An `interval' counter that determines how frequently"            \
           " we simulate overflow; a smaller number increases frequency")    \
                                                                             \
   product(uintx, CMSMaxAbortablePrecleanLoops, 0,                           \
@@ -1648,7 +1655,14 @@
   develop(uintx, WorkStealingYieldsBeforeSleep, 1000,                       \
           "Number of yields before a sleep is done during workstealing")    \
                                                                             \
-  product(uintx, PreserveMarkStackSize, 40,                                 \
+  develop(uintx, WorkStealingHardSpins, 4096,                               \
+          "Number of iterations in a spin loop between checks on "          \
+          "time out of hard spin")                                          \
+                                                                            \
+  develop(uintx, WorkStealingSpinToYieldRatio, 10,                          \
+          "Ratio of hard spins to calls to yield")                          \
+                                                                            \
+  product(uintx, PreserveMarkStackSize, 1024,                               \
            "Size for stack used in promotion failure handling")             \
                                                                             \
   product_pd(bool, UseTLAB, "Use thread-local object allocation")           \
--- a/src/share/vm/runtime/os.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/runtime/os.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -74,13 +74,11 @@
   const int milliseconds_after_second =
     milliseconds_since_19700101 % milliseconds_per_microsecond;
   // Convert the time value to a tm and timezone variable
-  const struct tm *time_struct_temp = localtime(&seconds_since_19700101);
-  if (time_struct_temp == NULL) {
-    assert(false, "Failed localtime");
+  struct tm time_struct;
+  if (localtime_pd(&seconds_since_19700101, &time_struct) == NULL) {
+    assert(false, "Failed localtime_pd");
     return NULL;
   }
-  // Save the results of localtime
-  const struct tm time_struct = *time_struct_temp;
   const time_t zone = timezone;

   // If daylight savings time is in effect,
@@ -93,10 +91,10 @@
     UTC_to_local = UTC_to_local - seconds_per_hour;
   }
   // Compute the time zone offset.
-  //    localtime(3C) sets timezone to the difference (in seconds)
+  //    localtime_pd() sets timezone to the difference (in seconds)
   //    between UTC and and local time.
   //    ISO 8601 says we need the difference between local time and UTC,
-  //    we change the sign of the localtime(3C) result.
+  //    we change the sign of the localtime_pd() result.
   const time_t local_to_UTC = -(UTC_to_local);
   // Then we have to figure out if if we are ahead (+) or behind (-) UTC.
   char sign_local_to_UTC = '+';
--- a/src/share/vm/runtime/os.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/runtime/os.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -120,7 +120,8 @@
   // Return current local time in a string (YYYY-MM-DD HH:MM:SS).
   // It is MT safe, but not async-safe, as reading time zone
   // information may require a lock on some platforms.
-  static char* local_time_string(char *buf, size_t buflen);
+  static char*      local_time_string(char *buf, size_t buflen);
+  static struct tm* localtime_pd     (const time_t* clock, struct tm*  res);
   // Fill in buffer with current local time as an ISO-8601 string.
   // E.g., YYYY-MM-DDThh:mm:ss.mmm+zzzz.
   // Returns buffer, or NULL if it failed.
--- a/src/share/vm/utilities/taskqueue.cpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/utilities/taskqueue.cpp	Sun Feb 15 20:09:02 2009 -0800
@@ -25,6 +25,12 @@
 # include "incls/_precompiled.incl"
 # include "incls/_taskqueue.cpp.incl"

+#ifdef TRACESPINNING
+uint ParallelTaskTerminator::_total_yields = 0;
+uint ParallelTaskTerminator::_total_spins = 0;
+uint ParallelTaskTerminator::_total_peeks = 0;
+#endif
+
 bool TaskQueueSuper::peek() {
   return _bottom != _age.top();
 }
@@ -69,15 +75,62 @@
 ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) {
   Atomic::inc(&_offered_termination);

-  juint yield_count = 0;
+  uint yield_count = 0;
+  // Number of hard spin loops done since last yield
+  uint hard_spin_count = 0;
+  // Number of iterations in the hard spin loop.
+  uint hard_spin_limit = WorkStealingHardSpins;
+
+  // If WorkStealingSpinToYieldRatio is 0, no hard spinning is done.
+  // If it is greater than 0, then start with a small number
+  // of spins and increase number with each turn at spinning until
+  // the count of hard spins exceeds WorkStealingSpinToYieldRatio.
+  // Then do a yield() call and start spinning afresh.
+  if (WorkStealingSpinToYieldRatio > 0) {
+    hard_spin_limit = WorkStealingHardSpins >> WorkStealingSpinToYieldRatio;
+    hard_spin_limit = MAX2(hard_spin_limit, 1U);
+  }
+  // Remember the initial spin limit.
+  uint hard_spin_start = hard_spin_limit;
+
+  // Loop waiting for all threads to offer termination or
+  // more work.
   while (true) {
+    // Are all threads offering termination?
     if (_offered_termination == _n_threads) {
-      //inner_termination_loop();
       return true;
     } else {
+      // Look for more work.
+      // Periodically sleep() instead of yield() to give threads
+      // waiting on the cores the chance to grab this code
       if (yield_count <= WorkStealingYieldsBeforeSleep) {
+        // Do a yield or hardspin.  For purposes of deciding whether
+        // to sleep, count this as a yield.
         yield_count++;
-        yield();
+
+        // Periodically call yield() instead spinning
+        // After WorkStealingSpinToYieldRatio spins, do a yield() call
+        // and reset the counts and starting limit.
+        if (hard_spin_count > WorkStealingSpinToYieldRatio) {
+          yield();
+          hard_spin_count = 0;
+          hard_spin_limit = hard_spin_start;
+#ifdef TRACESPINNING
+          _total_yields++;
+#endif
+        } else {
+          // Hard spin this time
+          // Increase the hard spinning period but only up to a limit.
+          hard_spin_limit = MIN2(2*hard_spin_limit,
+                                 (uint) WorkStealingHardSpins);
+          for (uint j = 0; j < hard_spin_limit; j++) {
+            SpinPause();
+          }
+          hard_spin_count++;
+#ifdef TRACESPINNING
+          _total_spins++;
+#endif
+        }
       } else {
         if (PrintGCDetails && Verbose) {
          gclog_or_tty->print_cr("ParallelTaskTerminator::offer_termination() "
@@ -92,6 +145,9 @@
         sleep(WorkStealingSleepMillis);
       }

+#ifdef TRACESPINNING
+      _total_peeks++;
+#endif
       if (peek_in_queue_set() ||
           (terminator != NULL && terminator->should_exit_termination())) {
         Atomic::dec(&_offered_termination);
@@ -101,6 +157,16 @@
   }
 }

+#ifdef TRACESPINNING
+void ParallelTaskTerminator::print_termination_counts() {
+  gclog_or_tty->print_cr("ParallelTaskTerminator Total yields: %lld  "
+    "Total spins: %lld  Total peeks: %lld",
+    total_yields(),
+    total_spins(),
+    total_peeks());
+}
+#endif
+
 void ParallelTaskTerminator::reset_for_reuse() {
   if (_offered_termination != 0) {
     assert(_offered_termination == _n_threads,
--- a/src/share/vm/utilities/taskqueue.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/utilities/taskqueue.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -22,67 +22,76 @@
  *
  */

+#ifdef LP64
+typedef juint TAG_TYPE;
+// for a taskqueue size of 4M
+#define LOG_TASKQ_SIZE 22
+#else
+typedef jushort TAG_TYPE;
+// for a taskqueue size of 16K
+#define LOG_TASKQ_SIZE 14
+#endif
+
 class TaskQueueSuper: public CHeapObj {
 protected:
   // The first free element after the last one pushed (mod _n).
-  // (For now we'll assume only 32-bit CAS).
-  volatile juint _bottom;
+  volatile uint _bottom;

   // log2 of the size of the queue.
   enum SomeProtectedConstants {
-    Log_n = 14
+    Log_n = LOG_TASKQ_SIZE
   };
+#undef LOG_TASKQ_SIZE

   // Size of the queue.
-  juint n() { return (1 << Log_n); }
+  uint n() { return (1 << Log_n); }
   // For computing "x mod n" efficiently.
-  juint n_mod_mask() { return n() - 1; }
+  uint n_mod_mask() { return n() - 1; }

   struct Age {
-    jushort _top;
-    jushort _tag;
+    TAG_TYPE _top;
+    TAG_TYPE _tag;

-    jushort tag() const { return _tag; }
-    jushort top() const { return _top; }
+    TAG_TYPE tag() const { return _tag; }
+    TAG_TYPE top() const { return _top; }

     Age() { _tag = 0; _top = 0; }

     friend bool operator ==(const Age& a1, const Age& a2) {
       return a1.tag() == a2.tag() && a1.top() == a2.top();
     }
-
   };
   Age _age;
   // These make sure we do single atomic reads and writes.
   Age get_age() {
-    jint res = *(volatile jint*)(&_age);
+    uint res = *(volatile uint*)(&_age);
     return *(Age*)(&res);
   }
   void set_age(Age a) {
-    *(volatile jint*)(&_age) = *(int*)(&a);
+    *(volatile uint*)(&_age) = *(uint*)(&a);
   }

-  jushort get_top() {
+  TAG_TYPE get_top() {
     return get_age().top();
   }

   // These both operate mod _n.
-  juint increment_index(juint ind) {
+  uint increment_index(uint ind) {
     return (ind + 1) & n_mod_mask();
   }
-  juint decrement_index(juint ind) {
+  uint decrement_index(uint ind) {
     return (ind - 1) & n_mod_mask();
   }

   // Returns a number in the range [0.._n).  If the result is "n-1", it
   // should be interpreted as 0.
-  juint dirty_size(juint bot, juint top) {
-    return ((jint)bot - (jint)top) & n_mod_mask();
+  uint dirty_size(uint bot, uint top) {
+    return ((int)bot - (int)top) & n_mod_mask();
   }

   // Returns the size corresponding to the given "bot" and "top".
-  juint size(juint bot, juint top) {
-    juint sz = dirty_size(bot, top);
+  uint size(uint bot, uint top) {
+    uint sz = dirty_size(bot, top);
     // Has the queue "wrapped", so that bottom is less than top?
     // There's a complicated special case here.  A pair of threads could
     // perform pop_local and pop_global operations concurrently, starting
@@ -94,7 +103,7 @@
     // owner performs pop_local's, and several concurrent threads
     // attempting to perform the pop_global will all perform the same CAS,
     // and only one can succeed.  Any stealing thread that reads after
-    // either the increment or decrement will seen an empty queue, and will
+    // either the increment or decrement will see an empty queue, and will
     // not join the competitors.  The "sz == -1 || sz == _n-1" state will
     // not be modified  by concurrent queues, so the owner thread can reset
     // the state to _bottom == top so subsequent pushes will be performed
@@ -112,11 +121,11 @@
   // Return an estimate of the number of elements in the queue.
   // The "careful" version admits the possibility of pop_local/pop_global
   // races.
-  juint size() {
+  uint size() {
     return size(_bottom, get_top());
   }

-  juint dirty_size() {
+  uint dirty_size() {
     return dirty_size(_bottom, get_top());
   }

@@ -127,15 +136,15 @@

   // Maximum number of elements allowed in the queue.  This is two less
   // than the actual queue size, for somewhat complicated reasons.
-  juint max_elems() { return n() - 2; }
+  uint max_elems() { return n() - 2; }

 };

 template<class E> class GenericTaskQueue: public TaskQueueSuper {
 private:
   // Slow paths for push, pop_local.  (pop_global has no fast path.)
-  bool push_slow(E t, juint dirty_n_elems);
-  bool pop_local_slow(juint localBot, Age oldAge);
+  bool push_slow(E t, uint dirty_n_elems);
+  bool pop_local_slow(uint localBot, Age oldAge);

 public:
   // Initializes the queue to empty.
@@ -170,7 +179,7 @@

 template<class E>
 GenericTaskQueue<E>::GenericTaskQueue():TaskQueueSuper() {
-  assert(sizeof(Age) == sizeof(jint), "Depends on this.");
+  assert(sizeof(Age) == sizeof(int), "Depends on this.");
 }

 template<class E>
@@ -182,9 +191,9 @@
 template<class E>
 void GenericTaskQueue<E>::oops_do(OopClosure* f) {
   // tty->print_cr("START OopTaskQueue::oops_do");
-  int iters = size();
-  juint index = _bottom;
-  for (int i = 0; i < iters; ++i) {
+  uint iters = size();
+  uint index = _bottom;
+  for (uint i = 0; i < iters; ++i) {
     index = decrement_index(index);
     // tty->print_cr("  doing entry %d," INTPTR_T " -> " INTPTR_T,
     //            index, &_elems[index], _elems[index]);
@@ -198,10 +207,10 @@


 template<class E>
-bool GenericTaskQueue<E>::push_slow(E t, juint dirty_n_elems) {
+bool GenericTaskQueue<E>::push_slow(E t, uint dirty_n_elems) {
   if (dirty_n_elems == n() - 1) {
     // Actually means 0, so do the push.
-    juint localBot = _bottom;
+    uint localBot = _bottom;
     _elems[localBot] = t;
     _bottom = increment_index(localBot);
     return true;
@@ -211,7 +220,7 @@

 template<class E>
 bool GenericTaskQueue<E>::
-pop_local_slow(juint localBot, Age oldAge) {
+pop_local_slow(uint localBot, Age oldAge) {
   // This queue was observed to contain exactly one element; either this
   // thread will claim it, or a competing "pop_global".  In either case,
   // the queue will be logically empty afterwards.  Create a new Age value
@@ -230,9 +239,8 @@
     Age tempAge;
     // No competing pop_global has yet incremented "top"; we'll try to
     // install new_age, thus claiming the element.
-    assert(sizeof(Age) == sizeof(jint) && sizeof(jint) == sizeof(juint),
-           "Assumption about CAS unit.");
-    *(jint*)&tempAge = Atomic::cmpxchg(*(jint*)&newAge, (volatile jint*)&_age, *(jint*)&oldAge);
+    assert(sizeof(Age) == sizeof(int), "Assumption about CAS unit.");
+    *(uint*)&tempAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge);
     if (tempAge == oldAge) {
       // We win.
       assert(dirty_size(localBot, get_top()) != n() - 1,
@@ -253,8 +261,8 @@
 bool GenericTaskQueue<E>::pop_global(E& t) {
   Age newAge;
   Age oldAge = get_age();
-  juint localBot = _bottom;
-  juint n_elems = size(localBot, oldAge.top());
+  uint localBot = _bottom;
+  uint n_elems = size(localBot, oldAge.top());
   if (n_elems == 0) {
     return false;
   }
@@ -263,7 +271,7 @@
   newAge._top = increment_index(newAge.top());
   if ( newAge._top == 0 ) newAge._tag++;
   Age resAge;
-  *(jint*)&resAge = Atomic::cmpxchg(*(jint*)&newAge, (volatile jint*)&_age, *(jint*)&oldAge);
+  *(uint*)&resAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge);
   // Note that using "_bottom" here might fail, since a pop_local might
   // have decremented it.
   assert(dirty_size(localBot, newAge._top) != n() - 1,
@@ -287,7 +295,7 @@

 template<class E> class GenericTaskQueueSet: public TaskQueueSetSuper {
 private:
-  int _n;
+  uint _n;
   GenericTaskQueue<E>** _queues;

 public:
@@ -300,51 +308,51 @@
     }
   }

-  bool steal_1_random(int queue_num, int* seed, E& t);
-  bool steal_best_of_2(int queue_num, int* seed, E& t);
-  bool steal_best_of_all(int queue_num, int* seed, E& t);
+  bool steal_1_random(uint queue_num, int* seed, E& t);
+  bool steal_best_of_2(uint queue_num, int* seed, E& t);
+  bool steal_best_of_all(uint queue_num, int* seed, E& t);

-  void register_queue(int i, GenericTaskQueue<E>* q);
+  void register_queue(uint i, GenericTaskQueue<E>* q);

-  GenericTaskQueue<E>* queue(int n);
+  GenericTaskQueue<E>* queue(uint n);

   // The thread with queue number "queue_num" (and whose random number seed
   // is at "seed") is trying to steal a task from some other queue.  (It
   // may try several queues, according to some configuration parameter.)
   // If some steal succeeds, returns "true" and sets "t" the stolen task,
   // otherwise returns false.
-  bool steal(int queue_num, int* seed, E& t);
+  bool steal(uint queue_num, int* seed, E& t);

   bool peek();
 };

 template<class E>
-void GenericTaskQueueSet<E>::register_queue(int i, GenericTaskQueue<E>* q) {
-  assert(0 <= i && i < _n, "index out of range.");
+void GenericTaskQueueSet<E>::register_queue(uint i, GenericTaskQueue<E>* q) {
+  assert(i < _n, "index out of range.");
   _queues[i] = q;
 }

 template<class E>
-GenericTaskQueue<E>* GenericTaskQueueSet<E>::queue(int i) {
+GenericTaskQueue<E>* GenericTaskQueueSet<E>::queue(uint i) {
   return _queues[i];
 }

 template<class E>
-bool GenericTaskQueueSet<E>::steal(int queue_num, int* seed, E& t) {
-  for (int i = 0; i < 2 * _n; i++)
+bool GenericTaskQueueSet<E>::steal(uint queue_num, int* seed, E& t) {
+  for (uint i = 0; i < 2 * _n; i++)
     if (steal_best_of_2(queue_num, seed, t))
       return true;
   return false;
 }

 template<class E>
-bool GenericTaskQueueSet<E>::steal_best_of_all(int queue_num, int* seed, E& t) {
+bool GenericTaskQueueSet<E>::steal_best_of_all(uint queue_num, int* seed, E& t) {
   if (_n > 2) {
     int best_k;
-    jint best_sz = 0;
-    for (int k = 0; k < _n; k++) {
+    uint best_sz = 0;
+    for (uint k = 0; k < _n; k++) {
       if (k == queue_num) continue;
-      jint sz = _queues[k]->size();
+      uint sz = _queues[k]->size();
       if (sz > best_sz) {
         best_sz = sz;
         best_k = k;
@@ -362,9 +370,9 @@
 }

 template<class E>
-bool GenericTaskQueueSet<E>::steal_1_random(int queue_num, int* seed, E& t) {
+bool GenericTaskQueueSet<E>::steal_1_random(uint queue_num, int* seed, E& t) {
   if (_n > 2) {
-    int k = queue_num;
+    uint k = queue_num;
     while (k == queue_num) k = randomParkAndMiller(seed) % _n;
     return _queues[2]->pop_global(t);
   } else if (_n == 2) {
@@ -378,20 +386,20 @@
 }

 template<class E>
-bool GenericTaskQueueSet<E>::steal_best_of_2(int queue_num, int* seed, E& t) {
+bool GenericTaskQueueSet<E>::steal_best_of_2(uint queue_num, int* seed, E& t) {
   if (_n > 2) {
-    int k1 = queue_num;
+    uint k1 = queue_num;
     while (k1 == queue_num) k1 = randomParkAndMiller(seed) % _n;
-    int k2 = queue_num;
+    uint k2 = queue_num;
     while (k2 == queue_num || k2 == k1) k2 = randomParkAndMiller(seed) % _n;
     // Sample both and try the larger.
-    juint sz1 = _queues[k1]->size();
-    juint sz2 = _queues[k2]->size();
+    uint sz1 = _queues[k1]->size();
+    uint sz2 = _queues[k2]->size();
     if (sz2 > sz1) return _queues[k2]->pop_global(t);
     else return _queues[k1]->pop_global(t);
   } else if (_n == 2) {
     // Just try the other one.
-    int k = (queue_num + 1) % 2;
+    uint k = (queue_num + 1) % 2;
     return _queues[k]->pop_global(t);
   } else {
     assert(_n == 1, "can't be zero.");
@@ -402,7 +410,7 @@
 template<class E>
 bool GenericTaskQueueSet<E>::peek() {
   // Try all the queues.
-  for (int j = 0; j < _n; j++) {
+  for (uint j = 0; j < _n; j++) {
     if (_queues[j]->peek())
       return true;
   }
@@ -418,11 +426,19 @@
 // A class to aid in the termination of a set of parallel tasks using
 // TaskQueueSet's for work stealing.

+#undef TRACESPINNING
+
 class ParallelTaskTerminator: public StackObj {
 private:
   int _n_threads;
   TaskQueueSetSuper* _queue_set;
-  jint _offered_termination;
+  int _offered_termination;
+
+#ifdef TRACESPINNING
+  static uint _total_yields;
+  static uint _total_spins;
+  static uint _total_peeks;
+#endif

   bool peek_in_queue_set();
 protected:
@@ -454,13 +470,19 @@
   // the terminator is finished.
   void reset_for_reuse();

+#ifdef TRACESPINNING
+  static uint total_yields() { return _total_yields; }
+  static uint total_spins() { return _total_spins; }
+  static uint total_peeks() { return _total_peeks; }
+  static void print_termination_counts();
+#endif
 };

 #define SIMPLE_STACK 0

 template<class E> inline bool GenericTaskQueue<E>::push(E t) {
 #if SIMPLE_STACK
-  juint localBot = _bottom;
+  uint localBot = _bottom;
   if (_bottom < max_elems()) {
     _elems[localBot] = t;
     _bottom = localBot + 1;
@@ -469,10 +491,10 @@
     return false;
   }
 #else
-  juint localBot = _bottom;
+  uint localBot = _bottom;
   assert((localBot >= 0) && (localBot < n()), "_bottom out of range.");
-  jushort top = get_top();
-  juint dirty_n_elems = dirty_size(localBot, top);
+  TAG_TYPE top = get_top();
+  uint dirty_n_elems = dirty_size(localBot, top);
   assert((dirty_n_elems >= 0) && (dirty_n_elems < n()),
          "n_elems out of range.");
   if (dirty_n_elems < max_elems()) {
@@ -487,19 +509,19 @@

 template<class E> inline bool GenericTaskQueue<E>::pop_local(E& t) {
 #if SIMPLE_STACK
-  juint localBot = _bottom;
+  uint localBot = _bottom;
   assert(localBot > 0, "precondition.");
   localBot--;
   t = _elems[localBot];
   _bottom = localBot;
   return true;
 #else
-  juint localBot = _bottom;
+  uint localBot = _bottom;
   // This value cannot be n-1.  That can only occur as a result of
   // the assignment to bottom in this method.  If it does, this method
   // resets the size( to 0 before the next call (which is sequential,
   // since this is pop_local.)
-  juint dirty_n_elems = dirty_size(localBot, get_top());
+  uint dirty_n_elems = dirty_size(localBot, get_top());
   assert(dirty_n_elems != n() - 1, "Shouldn't be possible...");
   if (dirty_n_elems == 0) return false;
   localBot = decrement_index(localBot);
@@ -512,7 +534,7 @@
   // If there's still at least one element in the queue, based on the
   // "_bottom" and "age" we've read, then there can be no interference with
   // a "pop_global" operation, and we're done.
-  juint tp = get_top();
+  TAG_TYPE tp = get_top();    // XXX
   if (size(localBot, tp) > 0) {
     assert(dirty_size(localBot, tp) != n() - 1,
            "Shouldn't be possible...");
@@ -581,7 +603,7 @@
   bool is_empty();
   bool stealable_is_empty();
   bool overflow_is_empty();
-  juint stealable_size() { return _region_queue.size(); }
+  uint stealable_size() { return _region_queue.size(); }
   RegionTaskQueue* task_queue() { return &_region_queue; }
 };
--- a/src/share/vm/utilities/workgroup.hpp	Mon Feb 09 13:47:26 2009 -0800
+++ b/src/share/vm/utilities/workgroup.hpp	Sun Feb 15 20:09:02 2009 -0800
@@ -32,7 +32,7 @@

 // An abstract task to be worked on by a gang.
 // You subclass this to supply your own work() method
-class AbstractGangTask: public CHeapObj {
+class AbstractGangTask VALUE_OBJ_CLASS_SPEC {
 public:
   // The abstract work method.
   // The argument tells you which member of the gang you are.