Mercurial > hg > truffle
diff src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp @ 534:5cfd8d19e546
6786503: Overflow list performance can be improved
Summary: Avoid overflow list walk in CMS & ParNew when it is unnecessary. Fix a couple of correctness issues, including a C-heap leak, in ParNew at the intersection of promotion failure, work queue overflow and object array chunking. Add stress testing option and related assertion checking.
Reviewed-by: jmasa
author | ysr |
---|---|
date | Mon, 26 Jan 2009 12:47:21 -0800 |
parents | 0af8b0718fc9 |
children | 0fbdb4381b99 98cb887364d3 |
line wrap: on
line diff
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Wed Jan 21 13:40:10 2009 -0800 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Mon Jan 26 12:47:21 2009 -0800 @@ -8508,7 +8508,7 @@ size_t i = num; oop cur = _overflow_list; const markOop proto = markOopDesc::prototype(); - NOT_PRODUCT(size_t n = 0;) + NOT_PRODUCT(ssize_t n = 0;) for (oop next; i > 0 && cur != NULL; cur = next, i--) { next = oop(cur->mark()); cur->set_mark(proto); // until proven otherwise @@ -8525,45 +8525,131 @@ return !stack->isEmpty(); } -// Multi-threaded; use CAS to break off a prefix +#define BUSY (oop(0x1aff1aff)) +// (MT-safe) Get a prefix of at most "num" from the list. +// The overflow list is chained through the mark word of +// each object in the list. We fetch the entire list, +// break off a prefix of the right size and return the +// remainder. If other threads try to take objects from +// the overflow list at that time, they will wait for +// some time to see if data becomes available. If (and +// only if) another thread places one or more object(s) +// on the global list before we have returned the suffix +// to the global list, we will walk down our local list +// to find its end and append the global list to +// our suffix before returning it. This suffix walk can +// prove to be expensive (quadratic in the amount of traffic) +// when there are many objects in the overflow list and +// there is much producer-consumer contention on the list. +// *NOTE*: The overflow list manipulation code here and +// in ParNewGeneration:: are very similar in shape, +// except that in the ParNew case we use the old (from/eden) +// copy of the object to thread the list via its klass word. +// Because of the common code, if you make any changes in +// the code below, please check the ParNew version to see if +// similar changes might be needed. +// CR 6797058 has been filed to consolidate the common code. bool CMSCollector::par_take_from_overflow_list(size_t num, OopTaskQueue* work_q) { - assert(work_q->size() == 0, "That's the current policy"); + assert(work_q->size() == 0, "First empty local work queue"); assert(num < work_q->max_elems(), "Can't bite more than we can chew"); if (_overflow_list == NULL) { return false; } // Grab the entire list; we'll put back a suffix - oop prefix = (oop)Atomic::xchg_ptr(NULL, &_overflow_list); - if (prefix == NULL) { // someone grabbed it before we did ... - // ... we could spin for a short while, but for now we don't - return false; - } + oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); + Thread* tid = Thread::current(); + size_t CMSOverflowSpinCount = (size_t)ParallelGCThreads; + size_t sleep_time_millis = MAX2((size_t)1, num/100); + // If the list is busy, we spin for a short while, + // sleeping between attempts to get the list. + for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) { + os::sleep(tid, sleep_time_millis, false); + if (_overflow_list == NULL) { + // Nothing left to take + return false; + } else if (_overflow_list != BUSY) { + // Try and grab the prefix + prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); + } + } + // If the list was found to be empty, or we spun long + // enough, we give up and return empty-handed. If we leave + // the list in the BUSY state below, it must be the case that + // some other thread holds the overflow list and will set it + // to a non-BUSY state in the future. + if (prefix == NULL || prefix == BUSY) { + // Nothing to take or waited long enough + if (prefix == NULL) { + // Write back the NULL in case we overwrote it with BUSY above + // and it is still the same value. + (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY); + } + return false; + } + assert(prefix != NULL && prefix != BUSY, "Error"); size_t i = num; oop cur = prefix; + // Walk down the first "num" objects, unless we reach the end. for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--); - if (cur->mark() != NULL) { + if (cur->mark() == NULL) { + // We have "num" or fewer elements in the list, so there + // is nothing to return to the global list. + // Write back the NULL in lieu of the BUSY we wrote + // above, if it is still the same value. + if (_overflow_list == BUSY) { + (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY); + } + } else { + // Chop off the suffix and rerturn it to the global list. + assert(cur->mark() != BUSY, "Error"); oop suffix_head = cur->mark(); // suffix will be put back on global list cur->set_mark(NULL); // break off suffix - // Find tail of suffix so we can prepend suffix to global list - for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark())); - oop suffix_tail = cur; - assert(suffix_tail != NULL && suffix_tail->mark() == NULL, - "Tautology"); + // It's possible that the list is still in the empty(busy) state + // we left it in a short while ago; in that case we may be + // able to place back the suffix without incurring the cost + // of a walk down the list. oop observed_overflow_list = _overflow_list; - do { - cur = observed_overflow_list; - suffix_tail->set_mark(markOop(cur)); + oop cur_overflow_list = observed_overflow_list; + bool attached = false; + while (observed_overflow_list == BUSY || observed_overflow_list == NULL) { observed_overflow_list = - (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur); - } while (cur != observed_overflow_list); + (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list); + if (cur_overflow_list == observed_overflow_list) { + attached = true; + break; + } else cur_overflow_list = observed_overflow_list; + } + if (!attached) { + // Too bad, someone else sneaked in (at least) an element; we'll need + // to do a splice. Find tail of suffix so we can prepend suffix to global + // list. + for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark())); + oop suffix_tail = cur; + assert(suffix_tail != NULL && suffix_tail->mark() == NULL, + "Tautology"); + observed_overflow_list = _overflow_list; + do { + cur_overflow_list = observed_overflow_list; + if (cur_overflow_list != BUSY) { + // Do the splice ... + suffix_tail->set_mark(markOop(cur_overflow_list)); + } else { // cur_overflow_list == BUSY + suffix_tail->set_mark(NULL); + } + // ... and try to place spliced list back on overflow_list ... + observed_overflow_list = + (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list); + } while (cur_overflow_list != observed_overflow_list); + // ... until we have succeeded in doing so. + } } // Push the prefix elements on work_q assert(prefix != NULL, "control point invariant"); const markOop proto = markOopDesc::prototype(); oop next; - NOT_PRODUCT(size_t n = 0;) + NOT_PRODUCT(ssize_t n = 0;) for (cur = prefix; cur != NULL; cur = next) { next = oop(cur->mark()); cur->set_mark(proto); // until proven otherwise @@ -8597,11 +8683,16 @@ oop cur_overflow_list; do { cur_overflow_list = observed_overflow_list; - p->set_mark(markOop(cur_overflow_list)); + if (cur_overflow_list != BUSY) { + p->set_mark(markOop(cur_overflow_list)); + } else { + p->set_mark(NULL); + } observed_overflow_list = (oop) Atomic::cmpxchg_ptr(p, &_overflow_list, cur_overflow_list); } while (cur_overflow_list != observed_overflow_list); } +#undef BUSY // Single threaded // General Note on GrowableArray: pushes may silently fail @@ -8610,7 +8701,7 @@ // a lot of code in the JVM. The prudent thing for GrowableArray // to do (for now) is to exit with an error. However, that may // be too draconian in some cases because the caller may be -// able to recover without much harm. For suych cases, we +// able to recover without much harm. For such cases, we // should probably introduce a "soft_push" method which returns // an indication of success or failure with the assumption that // the caller may be able to recover from a failure; code in @@ -8618,8 +8709,6 @@ // failures where possible, thus, incrementally hardening the VM // in such low resource situations. void CMSCollector::preserve_mark_work(oop p, markOop m) { - int PreserveMarkStackSize = 128; - if (_preserved_oop_stack == NULL) { assert(_preserved_mark_stack == NULL, "bijection with preserved_oop_stack");