Mercurial > hg > graal-jvmci-8
comparison src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp @ 534:5cfd8d19e546
6786503: Overflow list performance can be improved
Summary: Avoid overflow list walk in CMS & ParNew when it is unnecessary. Fix a couple of correctness issues, including a C-heap leak, in ParNew at the intersection of promotion failure, work queue overflow and object array chunking. Add stress testing option and related assertion checking.
Reviewed-by: jmasa
author | ysr |
---|---|
date | Mon, 26 Jan 2009 12:47:21 -0800 |
parents | 0af8b0718fc9 |
children | 0fbdb4381b99 98cb887364d3 |
comparison
equal
deleted
inserted
replaced
527:2b1de1db9a9d | 534:5cfd8d19e546 |
---|---|
8506 assert(stack->isEmpty(), "Expected precondition"); | 8506 assert(stack->isEmpty(), "Expected precondition"); |
8507 assert(stack->capacity() > num, "Shouldn't bite more than can chew"); | 8507 assert(stack->capacity() > num, "Shouldn't bite more than can chew"); |
8508 size_t i = num; | 8508 size_t i = num; |
8509 oop cur = _overflow_list; | 8509 oop cur = _overflow_list; |
8510 const markOop proto = markOopDesc::prototype(); | 8510 const markOop proto = markOopDesc::prototype(); |
8511 NOT_PRODUCT(size_t n = 0;) | 8511 NOT_PRODUCT(ssize_t n = 0;) |
8512 for (oop next; i > 0 && cur != NULL; cur = next, i--) { | 8512 for (oop next; i > 0 && cur != NULL; cur = next, i--) { |
8513 next = oop(cur->mark()); | 8513 next = oop(cur->mark()); |
8514 cur->set_mark(proto); // until proven otherwise | 8514 cur->set_mark(proto); // until proven otherwise |
8515 assert(cur->is_oop(), "Should be an oop"); | 8515 assert(cur->is_oop(), "Should be an oop"); |
8516 bool res = stack->push(cur); | 8516 bool res = stack->push(cur); |
8523 _num_par_pushes -=n; | 8523 _num_par_pushes -=n; |
8524 #endif | 8524 #endif |
8525 return !stack->isEmpty(); | 8525 return !stack->isEmpty(); |
8526 } | 8526 } |
8527 | 8527 |
8528 // Multi-threaded; use CAS to break off a prefix | 8528 #define BUSY (oop(0x1aff1aff)) |
8529 // (MT-safe) Get a prefix of at most "num" from the list. | |
8530 // The overflow list is chained through the mark word of | |
8531 // each object in the list. We fetch the entire list, | |
8532 // break off a prefix of the right size and return the | |
8533 // remainder. If other threads try to take objects from | |
8534 // the overflow list at that time, they will wait for | |
8535 // some time to see if data becomes available. If (and | |
8536 // only if) another thread places one or more object(s) | |
8537 // on the global list before we have returned the suffix | |
8538 // to the global list, we will walk down our local list | |
8539 // to find its end and append the global list to | |
8540 // our suffix before returning it. This suffix walk can | |
8541 // prove to be expensive (quadratic in the amount of traffic) | |
8542 // when there are many objects in the overflow list and | |
8543 // there is much producer-consumer contention on the list. | |
8544 // *NOTE*: The overflow list manipulation code here and | |
8545 // in ParNewGeneration:: are very similar in shape, | |
8546 // except that in the ParNew case we use the old (from/eden) | |
8547 // copy of the object to thread the list via its klass word. | |
8548 // Because of the common code, if you make any changes in | |
8549 // the code below, please check the ParNew version to see if | |
8550 // similar changes might be needed. | |
8551 // CR 6797058 has been filed to consolidate the common code. | |
8529 bool CMSCollector::par_take_from_overflow_list(size_t num, | 8552 bool CMSCollector::par_take_from_overflow_list(size_t num, |
8530 OopTaskQueue* work_q) { | 8553 OopTaskQueue* work_q) { |
8531 assert(work_q->size() == 0, "That's the current policy"); | 8554 assert(work_q->size() == 0, "First empty local work queue"); |
8532 assert(num < work_q->max_elems(), "Can't bite more than we can chew"); | 8555 assert(num < work_q->max_elems(), "Can't bite more than we can chew"); |
8533 if (_overflow_list == NULL) { | 8556 if (_overflow_list == NULL) { |
8534 return false; | 8557 return false; |
8535 } | 8558 } |
8536 // Grab the entire list; we'll put back a suffix | 8559 // Grab the entire list; we'll put back a suffix |
8537 oop prefix = (oop)Atomic::xchg_ptr(NULL, &_overflow_list); | 8560 oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); |
8538 if (prefix == NULL) { // someone grabbed it before we did ... | 8561 Thread* tid = Thread::current(); |
8539 // ... we could spin for a short while, but for now we don't | 8562 size_t CMSOverflowSpinCount = (size_t)ParallelGCThreads; |
8540 return false; | 8563 size_t sleep_time_millis = MAX2((size_t)1, num/100); |
8541 } | 8564 // If the list is busy, we spin for a short while, |
8565 // sleeping between attempts to get the list. | |
8566 for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) { | |
8567 os::sleep(tid, sleep_time_millis, false); | |
8568 if (_overflow_list == NULL) { | |
8569 // Nothing left to take | |
8570 return false; | |
8571 } else if (_overflow_list != BUSY) { | |
8572 // Try and grab the prefix | |
8573 prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); | |
8574 } | |
8575 } | |
8576 // If the list was found to be empty, or we spun long | |
8577 // enough, we give up and return empty-handed. If we leave | |
8578 // the list in the BUSY state below, it must be the case that | |
8579 // some other thread holds the overflow list and will set it | |
8580 // to a non-BUSY state in the future. | |
8581 if (prefix == NULL || prefix == BUSY) { | |
8582 // Nothing to take or waited long enough | |
8583 if (prefix == NULL) { | |
8584 // Write back the NULL in case we overwrote it with BUSY above | |
8585 // and it is still the same value. | |
8586 (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY); | |
8587 } | |
8588 return false; | |
8589 } | |
8590 assert(prefix != NULL && prefix != BUSY, "Error"); | |
8542 size_t i = num; | 8591 size_t i = num; |
8543 oop cur = prefix; | 8592 oop cur = prefix; |
8593 // Walk down the first "num" objects, unless we reach the end. | |
8544 for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--); | 8594 for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--); |
8545 if (cur->mark() != NULL) { | 8595 if (cur->mark() == NULL) { |
8596 // We have "num" or fewer elements in the list, so there | |
8597 // is nothing to return to the global list. | |
8598 // Write back the NULL in lieu of the BUSY we wrote | |
8599 // above, if it is still the same value. | |
8600 if (_overflow_list == BUSY) { | |
8601 (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY); | |
8602 } | |
8603 } else { | |
8604 // Chop off the suffix and rerturn it to the global list. | |
8605 assert(cur->mark() != BUSY, "Error"); | |
8546 oop suffix_head = cur->mark(); // suffix will be put back on global list | 8606 oop suffix_head = cur->mark(); // suffix will be put back on global list |
8547 cur->set_mark(NULL); // break off suffix | 8607 cur->set_mark(NULL); // break off suffix |
8548 // Find tail of suffix so we can prepend suffix to global list | 8608 // It's possible that the list is still in the empty(busy) state |
8549 for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark())); | 8609 // we left it in a short while ago; in that case we may be |
8550 oop suffix_tail = cur; | 8610 // able to place back the suffix without incurring the cost |
8551 assert(suffix_tail != NULL && suffix_tail->mark() == NULL, | 8611 // of a walk down the list. |
8552 "Tautology"); | |
8553 oop observed_overflow_list = _overflow_list; | 8612 oop observed_overflow_list = _overflow_list; |
8554 do { | 8613 oop cur_overflow_list = observed_overflow_list; |
8555 cur = observed_overflow_list; | 8614 bool attached = false; |
8556 suffix_tail->set_mark(markOop(cur)); | 8615 while (observed_overflow_list == BUSY || observed_overflow_list == NULL) { |
8557 observed_overflow_list = | 8616 observed_overflow_list = |
8558 (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur); | 8617 (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list); |
8559 } while (cur != observed_overflow_list); | 8618 if (cur_overflow_list == observed_overflow_list) { |
8619 attached = true; | |
8620 break; | |
8621 } else cur_overflow_list = observed_overflow_list; | |
8622 } | |
8623 if (!attached) { | |
8624 // Too bad, someone else sneaked in (at least) an element; we'll need | |
8625 // to do a splice. Find tail of suffix so we can prepend suffix to global | |
8626 // list. | |
8627 for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark())); | |
8628 oop suffix_tail = cur; | |
8629 assert(suffix_tail != NULL && suffix_tail->mark() == NULL, | |
8630 "Tautology"); | |
8631 observed_overflow_list = _overflow_list; | |
8632 do { | |
8633 cur_overflow_list = observed_overflow_list; | |
8634 if (cur_overflow_list != BUSY) { | |
8635 // Do the splice ... | |
8636 suffix_tail->set_mark(markOop(cur_overflow_list)); | |
8637 } else { // cur_overflow_list == BUSY | |
8638 suffix_tail->set_mark(NULL); | |
8639 } | |
8640 // ... and try to place spliced list back on overflow_list ... | |
8641 observed_overflow_list = | |
8642 (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list); | |
8643 } while (cur_overflow_list != observed_overflow_list); | |
8644 // ... until we have succeeded in doing so. | |
8645 } | |
8560 } | 8646 } |
8561 | 8647 |
8562 // Push the prefix elements on work_q | 8648 // Push the prefix elements on work_q |
8563 assert(prefix != NULL, "control point invariant"); | 8649 assert(prefix != NULL, "control point invariant"); |
8564 const markOop proto = markOopDesc::prototype(); | 8650 const markOop proto = markOopDesc::prototype(); |
8565 oop next; | 8651 oop next; |
8566 NOT_PRODUCT(size_t n = 0;) | 8652 NOT_PRODUCT(ssize_t n = 0;) |
8567 for (cur = prefix; cur != NULL; cur = next) { | 8653 for (cur = prefix; cur != NULL; cur = next) { |
8568 next = oop(cur->mark()); | 8654 next = oop(cur->mark()); |
8569 cur->set_mark(proto); // until proven otherwise | 8655 cur->set_mark(proto); // until proven otherwise |
8570 assert(cur->is_oop(), "Should be an oop"); | 8656 assert(cur->is_oop(), "Should be an oop"); |
8571 bool res = work_q->push(cur); | 8657 bool res = work_q->push(cur); |
8595 par_preserve_mark_if_necessary(p); | 8681 par_preserve_mark_if_necessary(p); |
8596 oop observed_overflow_list = _overflow_list; | 8682 oop observed_overflow_list = _overflow_list; |
8597 oop cur_overflow_list; | 8683 oop cur_overflow_list; |
8598 do { | 8684 do { |
8599 cur_overflow_list = observed_overflow_list; | 8685 cur_overflow_list = observed_overflow_list; |
8600 p->set_mark(markOop(cur_overflow_list)); | 8686 if (cur_overflow_list != BUSY) { |
8687 p->set_mark(markOop(cur_overflow_list)); | |
8688 } else { | |
8689 p->set_mark(NULL); | |
8690 } | |
8601 observed_overflow_list = | 8691 observed_overflow_list = |
8602 (oop) Atomic::cmpxchg_ptr(p, &_overflow_list, cur_overflow_list); | 8692 (oop) Atomic::cmpxchg_ptr(p, &_overflow_list, cur_overflow_list); |
8603 } while (cur_overflow_list != observed_overflow_list); | 8693 } while (cur_overflow_list != observed_overflow_list); |
8604 } | 8694 } |
8695 #undef BUSY | |
8605 | 8696 |
8606 // Single threaded | 8697 // Single threaded |
8607 // General Note on GrowableArray: pushes may silently fail | 8698 // General Note on GrowableArray: pushes may silently fail |
8608 // because we are (temporarily) out of C-heap for expanding | 8699 // because we are (temporarily) out of C-heap for expanding |
8609 // the stack. The problem is quite ubiquitous and affects | 8700 // the stack. The problem is quite ubiquitous and affects |
8610 // a lot of code in the JVM. The prudent thing for GrowableArray | 8701 // a lot of code in the JVM. The prudent thing for GrowableArray |
8611 // to do (for now) is to exit with an error. However, that may | 8702 // to do (for now) is to exit with an error. However, that may |
8612 // be too draconian in some cases because the caller may be | 8703 // be too draconian in some cases because the caller may be |
8613 // able to recover without much harm. For suych cases, we | 8704 // able to recover without much harm. For such cases, we |
8614 // should probably introduce a "soft_push" method which returns | 8705 // should probably introduce a "soft_push" method which returns |
8615 // an indication of success or failure with the assumption that | 8706 // an indication of success or failure with the assumption that |
8616 // the caller may be able to recover from a failure; code in | 8707 // the caller may be able to recover from a failure; code in |
8617 // the VM can then be changed, incrementally, to deal with such | 8708 // the VM can then be changed, incrementally, to deal with such |
8618 // failures where possible, thus, incrementally hardening the VM | 8709 // failures where possible, thus, incrementally hardening the VM |
8619 // in such low resource situations. | 8710 // in such low resource situations. |
8620 void CMSCollector::preserve_mark_work(oop p, markOop m) { | 8711 void CMSCollector::preserve_mark_work(oop p, markOop m) { |
8621 int PreserveMarkStackSize = 128; | |
8622 | |
8623 if (_preserved_oop_stack == NULL) { | 8712 if (_preserved_oop_stack == NULL) { |
8624 assert(_preserved_mark_stack == NULL, | 8713 assert(_preserved_mark_stack == NULL, |
8625 "bijection with preserved_oop_stack"); | 8714 "bijection with preserved_oop_stack"); |
8626 // Allocate the stacks | 8715 // Allocate the stacks |
8627 _preserved_oop_stack = new (ResourceObj::C_HEAP) | 8716 _preserved_oop_stack = new (ResourceObj::C_HEAP) |