graal-jvmci-8: src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp comparison

comparison src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp @ 534:5cfd8d19e546

6786503: Overflow list performance can be improved Summary: Avoid overflow list walk in CMS & ParNew when it is unnecessary. Fix a couple of correctness issues, including a C-heap leak, in ParNew at the intersection of promotion failure, work queue overflow and object array chunking. Add stress testing option and related assertion checking. Reviewed-by: jmasa

author	ysr
date	Mon, 26 Jan 2009 12:47:21 -0800
parents	0af8b0718fc9
children	0fbdb4381b99 98cb887364d3

comparison

equal deleted inserted replaced

-:2b1de1db9a9d
+:5cfd8d19e546
 assert(stack->isEmpty(), "Expected precondition");
 assert(stack->capacity() > num, "Shouldn't bite more than can chew");
 size_t i = num;
 oop  cur = _overflow_list;
 const markOop proto = markOopDesc::prototype();
-NOT_PRODUCT(size_t n = 0;)
+NOT_PRODUCT(ssize_t n = 0;)
 for (oop next; i > 0 && cur != NULL; cur = next, i--) {
 next = oop(cur->mark());
 cur->set_mark(proto);   // until proven otherwise
 assert(cur->is_oop(), "Should be an oop");
 bool res = stack->push(cur);
 _num_par_pushes -=n;
 #endif
 return !stack->isEmpty();
 }
-// Multi-threaded; use CAS to break off a prefix
+#define BUSY  (oop(0x1aff1aff))
+// (MT-safe) Get a prefix of at most "num" from the list.
+// The overflow list is chained through the mark word of
+// each object in the list. We fetch the entire list,
+// break off a prefix of the right size and return the
+// remainder. If other threads try to take objects from
+// the overflow list at that time, they will wait for
+// some time to see if data becomes available. If (and
+// only if) another thread places one or more object(s)
+// on the global list before we have returned the suffix
+// to the global list, we will walk down our local list
+// to find its end and append the global list to
+// our suffix before returning it. This suffix walk can
+// prove to be expensive (quadratic in the amount of traffic)
+// when there are many objects in the overflow list and
+// there is much producer-consumer contention on the list.
+// *NOTE*: The overflow list manipulation code here and
+// in ParNewGeneration:: are very similar in shape,
+// except that in the ParNew case we use the old (from/eden)
+// copy of the object to thread the list via its klass word.
+// Because of the common code, if you make any changes in
+// the code below, please check the ParNew version to see if
+// similar changes might be needed.
+// CR 6797058 has been filed to consolidate the common code.
 bool CMSCollector::par_take_from_overflow_list(size_t num,
 OopTaskQueue* work_q) {
-assert(work_q->size() == 0, "That's the current policy");
+assert(work_q->size() == 0, "First empty local work queue");
 assert(num < work_q->max_elems(), "Can't bite more than we can chew");
 if (_overflow_list == NULL) {
 return false;
 }
 // Grab the entire list; we'll put back a suffix
-oop prefix = (oop)Atomic::xchg_ptr(NULL, &_overflow_list);
+oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
-if (prefix == NULL) {  // someone grabbed it before we did ...
+Thread* tid = Thread::current();
-// ... we could spin for a short while, but for now we don't
+size_t CMSOverflowSpinCount = (size_t)ParallelGCThreads;
-return false;
+size_t sleep_time_millis = MAX2((size_t)1, num/100);
-}
+// If the list is busy, we spin for a short while,
+// sleeping between attempts to get the list.
+for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) {
+os::sleep(tid, sleep_time_millis, false);
+if (_overflow_list == NULL) {
+// Nothing left to take
+return false;
+} else if (_overflow_list != BUSY) {
+// Try and grab the prefix
+prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
+}
+}
+// If the list was found to be empty, or we spun long
+// enough, we give up and return empty-handed. If we leave
+// the list in the BUSY state below, it must be the case that
+// some other thread holds the overflow list and will set it
+// to a non-BUSY state in the future.
+if (prefix == NULL || prefix == BUSY) {
+// Nothing to take or waited long enough
+if (prefix == NULL) {
+// Write back the NULL in case we overwrote it with BUSY above
+// and it is still the same value.
+(void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
+}
+return false;
+}
+assert(prefix != NULL && prefix != BUSY, "Error");
 size_t i = num;
 oop cur = prefix;
+// Walk down the first "num" objects, unless we reach the end.
 for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--);
-if (cur->mark() != NULL) {
+if (cur->mark() == NULL) {
+// We have "num" or fewer elements in the list, so there
+// is nothing to return to the global list.
+// Write back the NULL in lieu of the BUSY we wrote
+// above, if it is still the same value.
+if (_overflow_list == BUSY) {
+(void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
+}
+} else {
+// Chop off the suffix and rerturn it to the global list.
+assert(cur->mark() != BUSY, "Error");
 oop suffix_head = cur->mark(); // suffix will be put back on global list
 cur->set_mark(NULL);           // break off suffix
-// Find tail of suffix so we can prepend suffix to global list
+// It's possible that the list is still in the empty(busy) state
-for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark()));
+// we left it in a short while ago; in that case we may be
-oop suffix_tail = cur;
+// able to place back the suffix without incurring the cost
-assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
+// of a walk down the list.
-"Tautology");
 oop observed_overflow_list = _overflow_list;
-do {
+oop cur_overflow_list = observed_overflow_list;
-cur = observed_overflow_list;
+bool attached = false;
-suffix_tail->set_mark(markOop(cur));
+while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
 observed_overflow_list =
-(oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur);
+(oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
-} while (cur != observed_overflow_list);
+if (cur_overflow_list == observed_overflow_list) {
+attached = true;
+break;
+} else cur_overflow_list = observed_overflow_list;
+}
+if (!attached) {
+// Too bad, someone else sneaked in (at least) an element; we'll need
+// to do a splice. Find tail of suffix so we can prepend suffix to global
+// list.
+for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark()));
+oop suffix_tail = cur;
+assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
+"Tautology");
+observed_overflow_list = _overflow_list;
+do {
+cur_overflow_list = observed_overflow_list;
+if (cur_overflow_list != BUSY) {
+// Do the splice ...
+suffix_tail->set_mark(markOop(cur_overflow_list));
+} else { // cur_overflow_list == BUSY
+suffix_tail->set_mark(NULL);
+}
+// ... and try to place spliced list back on overflow_list ...
+observed_overflow_list =
+(oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
+} while (cur_overflow_list != observed_overflow_list);
+// ... until we have succeeded in doing so.
+}
 }
 // Push the prefix elements on work_q
 assert(prefix != NULL, "control point invariant");
 const markOop proto = markOopDesc::prototype();
 oop next;
-NOT_PRODUCT(size_t n = 0;)
+NOT_PRODUCT(ssize_t n = 0;)
 for (cur = prefix; cur != NULL; cur = next) {
 next = oop(cur->mark());
 cur->set_mark(proto);   // until proven otherwise
 assert(cur->is_oop(), "Should be an oop");
 bool res = work_q->push(cur);
 par_preserve_mark_if_necessary(p);
 oop observed_overflow_list = _overflow_list;
 oop cur_overflow_list;
 do {
 cur_overflow_list = observed_overflow_list;
-p->set_mark(markOop(cur_overflow_list));
+if (cur_overflow_list != BUSY) {
+p->set_mark(markOop(cur_overflow_list));
+} else {
+p->set_mark(NULL);
+}
 observed_overflow_list =
 (oop) Atomic::cmpxchg_ptr(p, &_overflow_list, cur_overflow_list);
 } while (cur_overflow_list != observed_overflow_list);
 }
+#undef BUSY
 // Single threaded
 // General Note on GrowableArray: pushes may silently fail
 // because we are (temporarily) out of C-heap for expanding
 // the stack. The problem is quite ubiquitous and affects
 // a lot of code in the JVM. The prudent thing for GrowableArray
 // to do (for now) is to exit with an error. However, that may
 // be too draconian in some cases because the caller may be
-// able to recover without much harm. For suych cases, we
+// able to recover without much harm. For such cases, we
 // should probably introduce a "soft_push" method which returns
 // an indication of success or failure with the assumption that
 // the caller may be able to recover from a failure; code in
 // the VM can then be changed, incrementally, to deal with such
 // failures where possible, thus, incrementally hardening the VM
 // in such low resource situations.
 void CMSCollector::preserve_mark_work(oop p, markOop m) {
-int PreserveMarkStackSize = 128;
 if (_preserved_oop_stack == NULL) {
 assert(_preserved_mark_stack == NULL,
 "bijection with preserved_oop_stack");
 // Allocate the stacks
 _preserved_oop_stack  = new (ResourceObj::C_HEAP)

Mercurial > hg > graal-jvmci-8

comparison src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp @ 534:5cfd8d19e546