Mercurial > hg > graal-jvmci-8
changeset 590:3264b1424f72
Merge
author | apangin |
---|---|
date | Sun, 15 Feb 2009 20:09:02 -0800 |
parents | fe3d7c11b4b7 (diff) 01ddca3f0730 (current diff) |
children | a53107650e8b 82e4d969e7cb |
files | |
diffstat | 58 files changed, 1063 insertions(+), 394 deletions(-) [+] |
line wrap: on
line diff
--- a/.hgtags Mon Feb 09 13:47:26 2009 -0800 +++ b/.hgtags Sun Feb 15 20:09:02 2009 -0800 @@ -18,3 +18,5 @@ f9d938ede1960d18cb7cf23c645b026519c1a678 jdk7-b41 ad8c8ca4ab0f4c86e74c061958f44a8f4a930f2c jdk7-b42 fc6a5ae3fef5ebacfa896dbb3ae37715e388e282 jdk7-b43 +809e899c638bd9b21836abf9d09ab2a30ff3900b jdk7-b44 +945bf754069766e76873c53102fae48abf04cf5b jdk7-b45
--- a/src/os/linux/vm/os_linux.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/os/linux/vm/os_linux.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -1432,6 +1432,10 @@ return buf; } +struct tm* os::localtime_pd(const time_t* clock, struct tm* res) { + return localtime_r(clock, res); +} + //////////////////////////////////////////////////////////////////////////////// // runtime exit support
--- a/src/os/solaris/vm/os_solaris.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/os/solaris/vm/os_solaris.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -323,6 +323,10 @@ return (size_t)(base - bottom); } +struct tm* os::localtime_pd(const time_t* clock, struct tm* res) { + return localtime_r(clock, res); +} + // interruptible infrastructure // setup_interruptible saves the thread state before going into an
--- a/src/os/windows/vm/os_windows.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/os/windows/vm/os_windows.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -327,6 +327,14 @@ return sz; } +struct tm* os::localtime_pd(const time_t* clock, struct tm* res) { + const struct tm* time_struct_ptr = localtime(clock); + if (time_struct_ptr != NULL) { + *res = *time_struct_ptr; + return res; + } + return NULL; +} LONG WINAPI topLevelExceptionFilter(struct _EXCEPTION_POINTERS* exceptionInfo);
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -8508,7 +8508,7 @@ size_t i = num; oop cur = _overflow_list; const markOop proto = markOopDesc::prototype(); - NOT_PRODUCT(size_t n = 0;) + NOT_PRODUCT(ssize_t n = 0;) for (oop next; i > 0 && cur != NULL; cur = next, i--) { next = oop(cur->mark()); cur->set_mark(proto); // until proven otherwise @@ -8525,45 +8525,131 @@ return !stack->isEmpty(); } -// Multi-threaded; use CAS to break off a prefix +#define BUSY (oop(0x1aff1aff)) +// (MT-safe) Get a prefix of at most "num" from the list. +// The overflow list is chained through the mark word of +// each object in the list. We fetch the entire list, +// break off a prefix of the right size and return the +// remainder. If other threads try to take objects from +// the overflow list at that time, they will wait for +// some time to see if data becomes available. If (and +// only if) another thread places one or more object(s) +// on the global list before we have returned the suffix +// to the global list, we will walk down our local list +// to find its end and append the global list to +// our suffix before returning it. This suffix walk can +// prove to be expensive (quadratic in the amount of traffic) +// when there are many objects in the overflow list and +// there is much producer-consumer contention on the list. +// *NOTE*: The overflow list manipulation code here and +// in ParNewGeneration:: are very similar in shape, +// except that in the ParNew case we use the old (from/eden) +// copy of the object to thread the list via its klass word. +// Because of the common code, if you make any changes in +// the code below, please check the ParNew version to see if +// similar changes might be needed. +// CR 6797058 has been filed to consolidate the common code. bool CMSCollector::par_take_from_overflow_list(size_t num, OopTaskQueue* work_q) { - assert(work_q->size() == 0, "That's the current policy"); + assert(work_q->size() == 0, "First empty local work queue"); assert(num < work_q->max_elems(), "Can't bite more than we can chew"); if (_overflow_list == NULL) { return false; } // Grab the entire list; we'll put back a suffix - oop prefix = (oop)Atomic::xchg_ptr(NULL, &_overflow_list); - if (prefix == NULL) { // someone grabbed it before we did ... - // ... we could spin for a short while, but for now we don't - return false; - } + oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); + Thread* tid = Thread::current(); + size_t CMSOverflowSpinCount = (size_t)ParallelGCThreads; + size_t sleep_time_millis = MAX2((size_t)1, num/100); + // If the list is busy, we spin for a short while, + // sleeping between attempts to get the list. + for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) { + os::sleep(tid, sleep_time_millis, false); + if (_overflow_list == NULL) { + // Nothing left to take + return false; + } else if (_overflow_list != BUSY) { + // Try and grab the prefix + prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); + } + } + // If the list was found to be empty, or we spun long + // enough, we give up and return empty-handed. If we leave + // the list in the BUSY state below, it must be the case that + // some other thread holds the overflow list and will set it + // to a non-BUSY state in the future. + if (prefix == NULL || prefix == BUSY) { + // Nothing to take or waited long enough + if (prefix == NULL) { + // Write back the NULL in case we overwrote it with BUSY above + // and it is still the same value. + (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY); + } + return false; + } + assert(prefix != NULL && prefix != BUSY, "Error"); size_t i = num; oop cur = prefix; + // Walk down the first "num" objects, unless we reach the end. for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--); - if (cur->mark() != NULL) { + if (cur->mark() == NULL) { + // We have "num" or fewer elements in the list, so there + // is nothing to return to the global list. + // Write back the NULL in lieu of the BUSY we wrote + // above, if it is still the same value. + if (_overflow_list == BUSY) { + (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY); + } + } else { + // Chop off the suffix and rerturn it to the global list. + assert(cur->mark() != BUSY, "Error"); oop suffix_head = cur->mark(); // suffix will be put back on global list cur->set_mark(NULL); // break off suffix - // Find tail of suffix so we can prepend suffix to global list - for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark())); - oop suffix_tail = cur; - assert(suffix_tail != NULL && suffix_tail->mark() == NULL, - "Tautology"); + // It's possible that the list is still in the empty(busy) state + // we left it in a short while ago; in that case we may be + // able to place back the suffix without incurring the cost + // of a walk down the list. oop observed_overflow_list = _overflow_list; - do { - cur = observed_overflow_list; - suffix_tail->set_mark(markOop(cur)); + oop cur_overflow_list = observed_overflow_list; + bool attached = false; + while (observed_overflow_list == BUSY || observed_overflow_list == NULL) { observed_overflow_list = - (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur); - } while (cur != observed_overflow_list); + (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list); + if (cur_overflow_list == observed_overflow_list) { + attached = true; + break; + } else cur_overflow_list = observed_overflow_list; + } + if (!attached) { + // Too bad, someone else sneaked in (at least) an element; we'll need + // to do a splice. Find tail of suffix so we can prepend suffix to global + // list. + for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark())); + oop suffix_tail = cur; + assert(suffix_tail != NULL && suffix_tail->mark() == NULL, + "Tautology"); + observed_overflow_list = _overflow_list; + do { + cur_overflow_list = observed_overflow_list; + if (cur_overflow_list != BUSY) { + // Do the splice ... + suffix_tail->set_mark(markOop(cur_overflow_list)); + } else { // cur_overflow_list == BUSY + suffix_tail->set_mark(NULL); + } + // ... and try to place spliced list back on overflow_list ... + observed_overflow_list = + (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list); + } while (cur_overflow_list != observed_overflow_list); + // ... until we have succeeded in doing so. + } } // Push the prefix elements on work_q assert(prefix != NULL, "control point invariant"); const markOop proto = markOopDesc::prototype(); oop next; - NOT_PRODUCT(size_t n = 0;) + NOT_PRODUCT(ssize_t n = 0;) for (cur = prefix; cur != NULL; cur = next) { next = oop(cur->mark()); cur->set_mark(proto); // until proven otherwise @@ -8597,11 +8683,16 @@ oop cur_overflow_list; do { cur_overflow_list = observed_overflow_list; - p->set_mark(markOop(cur_overflow_list)); + if (cur_overflow_list != BUSY) { + p->set_mark(markOop(cur_overflow_list)); + } else { + p->set_mark(NULL); + } observed_overflow_list = (oop) Atomic::cmpxchg_ptr(p, &_overflow_list, cur_overflow_list); } while (cur_overflow_list != observed_overflow_list); } +#undef BUSY // Single threaded // General Note on GrowableArray: pushes may silently fail @@ -8610,7 +8701,7 @@ // a lot of code in the JVM. The prudent thing for GrowableArray // to do (for now) is to exit with an error. However, that may // be too draconian in some cases because the caller may be -// able to recover without much harm. For suych cases, we +// able to recover without much harm. For such cases, we // should probably introduce a "soft_push" method which returns // an indication of success or failure with the assumption that // the caller may be able to recover from a failure; code in @@ -8618,8 +8709,6 @@ // failures where possible, thus, incrementally hardening the VM // in such low resource situations. void CMSCollector::preserve_mark_work(oop p, markOop m) { - int PreserveMarkStackSize = 128; - if (_preserved_oop_stack == NULL) { assert(_preserved_mark_stack == NULL, "bijection with preserved_oop_stack");
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -595,7 +595,7 @@ size_t _ser_kac_preclean_ovflw; size_t _ser_kac_ovflw; size_t _par_kac_ovflw; - NOT_PRODUCT(size_t _num_par_pushes;) + NOT_PRODUCT(ssize_t _num_par_pushes;) // ("Weak") Reference processing support ReferenceProcessor* _ref_processor;
--- a/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -24,7 +24,7 @@ // We need to sort heap regions by collection desirability. -class CSetChooserCache { +class CSetChooserCache VALUE_OBJ_CLASS_SPEC { private: enum { CacheLength = 16
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -33,7 +33,7 @@ PYA_cancel // It's been completed by somebody else: cancel. }; -class ConcurrentG1Refine { +class ConcurrentG1Refine: public CHeapObj { ConcurrentG1RefineThread* _cg1rThread; volatile jint _pya;
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -30,7 +30,7 @@ // A generic CM bit map. This is essentially a wrapper around the BitMap // class, with one bit per (1<<_shifter) HeapWords. -class CMBitMapRO { +class CMBitMapRO VALUE_OBJ_CLASS_SPEC { protected: HeapWord* _bmStartWord; // base address of range covered by map size_t _bmWordSize; // map size (in #HeapWords covered) @@ -139,7 +139,7 @@ // Represents a marking stack used by the CM collector. // Ideally this should be GrowableArray<> just like MSC's marking stack(s). -class CMMarkStack { +class CMMarkStack VALUE_OBJ_CLASS_SPEC { ConcurrentMark* _cm; oop* _base; // bottom of stack jint _index; // one more than last occupied index @@ -237,7 +237,7 @@ void oops_do(OopClosure* f); }; -class CMRegionStack { +class CMRegionStack VALUE_OBJ_CLASS_SPEC { MemRegion* _base; jint _capacity; jint _index; @@ -312,7 +312,7 @@ class ConcurrentMarkThread; -class ConcurrentMark { +class ConcurrentMark: public CHeapObj { friend class ConcurrentMarkThread; friend class CMTask; friend class CMBitMapClosure;
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -141,7 +141,7 @@ _scan_only_head(NULL), _scan_only_tail(NULL), _curr_scan_only(NULL), _length(0), _scan_only_length(0), _last_sampled_rs_lengths(0), - _survivor_head(NULL), _survivors_tail(NULL), _survivor_length(0) + _survivor_head(NULL), _survivor_tail(NULL), _survivor_length(0) { guarantee( check_list_empty(false), "just making sure..." ); } @@ -159,16 +159,15 @@ } void YoungList::add_survivor_region(HeapRegion* hr) { - assert(!hr->is_survivor(), "should not already be for survived"); + assert(hr->is_survivor(), "should be flagged as survivor region"); assert(hr->get_next_young_region() == NULL, "cause it should!"); hr->set_next_young_region(_survivor_head); if (_survivor_head == NULL) { - _survivors_tail = hr; + _survivor_tail = hr; } _survivor_head = hr; - hr->set_survivor(); ++_survivor_length; } @@ -239,7 +238,7 @@ empty_list(_survivor_head); _survivor_head = NULL; - _survivors_tail = NULL; + _survivor_tail = NULL; _survivor_length = 0; _last_sampled_rs_lengths = 0; @@ -391,6 +390,7 @@ // Add survivor regions to SurvRateGroup. _g1h->g1_policy()->note_start_adding_survivor_regions(); + _g1h->g1_policy()->finished_recalculating_age_indexes(true /* is_survivors */); for (HeapRegion* curr = _survivor_head; curr != NULL; curr = curr->get_next_young_region()) { @@ -401,7 +401,7 @@ if (_survivor_head != NULL) { _head = _survivor_head; _length = _survivor_length + _scan_only_length; - _survivors_tail->set_next_young_region(_scan_only_head); + _survivor_tail->set_next_young_region(_scan_only_head); } else { _head = _scan_only_head; _length = _scan_only_length; @@ -418,9 +418,9 @@ _curr_scan_only = NULL; _survivor_head = NULL; - _survivors_tail = NULL; + _survivor_tail = NULL; _survivor_length = 0; - _g1h->g1_policy()->finished_recalculating_age_indexes(); + _g1h->g1_policy()->finished_recalculating_age_indexes(false /* is_survivors */); assert(check_list_well_formed(), "young list should be well formed"); } @@ -553,7 +553,7 @@ if (_gc_alloc_region_counts[purpose] < g1_policy()->max_regions(purpose)) { alloc_region = newAllocRegion_work(word_size, true, zero_filled); if (purpose == GCAllocForSurvived && alloc_region != NULL) { - _young_list->add_survivor_region(alloc_region); + alloc_region->set_survivor(); } ++_gc_alloc_region_counts[purpose]; } else { @@ -949,6 +949,10 @@ GCOverheadReporter::recordSTWEnd(end); g1_policy()->record_full_collection_end(); +#ifdef TRACESPINNING + ParallelTaskTerminator::print_termination_counts(); +#endif + gc_epilogue(true); // Abandon concurrent refinement. This must happen last: in the @@ -2593,6 +2597,9 @@ _young_list->print(); #endif // SCAN_ONLY_VERBOSE + g1_policy()->record_survivor_regions(_young_list->survivor_length(), + _young_list->first_survivor_region(), + _young_list->last_survivor_region()); _young_list->reset_auxilary_lists(); } } else { @@ -2619,7 +2626,9 @@ #endif // SCAN_ONLY_VERBOSE double end_time_sec = os::elapsedTime(); - g1_policy()->record_pause_time((end_time_sec - start_time_sec)*1000.0); + if (!evacuation_failed()) { + g1_policy()->record_pause_time((end_time_sec - start_time_sec)*1000.0); + } GCOverheadReporter::recordSTWEnd(end_time_sec); g1_policy()->record_collection_pause_end(popular_region != NULL, abandoned); @@ -2642,8 +2651,13 @@ } } - if (mark_in_progress()) + if (mark_in_progress()) { concurrent_mark()->update_g1_committed(); + } + +#ifdef TRACESPINNING + ParallelTaskTerminator::print_termination_counts(); +#endif gc_epilogue(false); } @@ -2754,6 +2768,13 @@ _gc_alloc_region_list = r->next_gc_alloc_region(); r->set_next_gc_alloc_region(NULL); r->set_is_gc_alloc_region(false); + if (r->is_survivor()) { + if (r->is_empty()) { + r->set_not_young(); + } else { + _young_list->add_survivor_region(r); + } + } if (r->is_empty()) { ++_free_regions; } @@ -3150,6 +3171,20 @@ return block; } +void G1CollectedHeap::retire_alloc_region(HeapRegion* alloc_region, + bool par) { + // Another thread might have obtained alloc_region for the given + // purpose, and might be attempting to allocate in it, and might + // succeed. Therefore, we can't do the "finalization" stuff on the + // region below until we're sure the last allocation has happened. + // We ensure this by allocating the remaining space with a garbage + // object. + if (par) par_allocate_remaining_space(alloc_region); + // Now we can do the post-GC stuff on the region. + alloc_region->note_end_of_copying(); + g1_policy()->record_after_bytes(alloc_region->used()); +} + HeapWord* G1CollectedHeap::allocate_during_gc_slow(GCAllocPurpose purpose, HeapRegion* alloc_region, @@ -3167,16 +3202,7 @@ // Otherwise, continue; this new region is empty, too. } assert(alloc_region != NULL, "We better have an allocation region"); - // Another thread might have obtained alloc_region for the given - // purpose, and might be attempting to allocate in it, and might - // succeed. Therefore, we can't do the "finalization" stuff on the - // region below until we're sure the last allocation has happened. - // We ensure this by allocating the remaining space with a garbage - // object. - if (par) par_allocate_remaining_space(alloc_region); - // Now we can do the post-GC stuff on the region. - alloc_region->note_end_of_copying(); - g1_policy()->record_after_bytes(alloc_region->used()); + retire_alloc_region(alloc_region, par); if (_gc_alloc_region_counts[purpose] >= g1_policy()->max_regions(purpose)) { // Cannot allocate more regions for the given purpose. @@ -3185,7 +3211,7 @@ if (purpose != alt_purpose) { HeapRegion* alt_region = _gc_alloc_regions[alt_purpose]; // Has not the alternative region been aliased? - if (alloc_region != alt_region) { + if (alloc_region != alt_region && alt_region != NULL) { // Try to allocate in the alternative region. if (par) { block = alt_region->par_allocate(word_size); @@ -3194,9 +3220,10 @@ } // Make an alias. _gc_alloc_regions[purpose] = _gc_alloc_regions[alt_purpose]; - } - if (block != NULL) { - return block; + if (block != NULL) { + return block; + } + retire_alloc_region(alt_region, par); } // Both the allocation region and the alternative one are full // and aliased, replace them with a new allocation region. @@ -3497,6 +3524,7 @@ OverflowQueue* _overflowed_refs; G1ParGCAllocBuffer _alloc_buffers[GCAllocPurposeCount]; + ageTable _age_table; size_t _alloc_buffer_waste; size_t _undo_waste; @@ -3538,6 +3566,7 @@ _refs(g1h->task_queue(queue_num)), _hash_seed(17), _queue_num(queue_num), _term_attempts(0), + _age_table(false), #if G1_DETAILED_STATS _pushes(0), _pops(0), _steals(0), _steal_attempts(0), _overflow_pushes(0), @@ -3572,8 +3601,9 @@ RefToScanQueue* refs() { return _refs; } OverflowQueue* overflowed_refs() { return _overflowed_refs; } - - inline G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) { + ageTable* age_table() { return &_age_table; } + + G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) { return &_alloc_buffers[purpose]; } @@ -3834,7 +3864,9 @@ (!from_region->is_young() && young_index == 0), "invariant" ); G1CollectorPolicy* g1p = _g1->g1_policy(); markOop m = old->mark(); - GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, m->age(), + int age = m->has_displaced_mark_helper() ? m->displaced_mark_helper()->age() + : m->age(); + GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, age, word_sz); HeapWord* obj_ptr = _par_scan_state->allocate(alloc_purpose, word_sz); oop obj = oop(obj_ptr); @@ -3872,9 +3904,12 @@ obj->incr_age(); } else { m = m->incr_age(); + obj->set_mark(m); } + _par_scan_state->age_table()->add(obj, word_sz); + } else { + obj->set_mark(m); } - obj->set_mark(m); // preserve "next" mark bit if (_g1->mark_in_progress() && !_g1->is_obj_ill(old)) { @@ -4129,6 +4164,9 @@ _g1h->g1_policy()->record_obj_copy_time(i, elapsed_ms-term_ms); _g1h->g1_policy()->record_termination_time(i, term_ms); } + if (G1UseSurvivorSpace) { + _g1h->g1_policy()->record_thread_age_table(pss.age_table()); + } _g1h->update_surviving_young_words(pss.surviving_young_words()+1); // Clean up any par-expanded rem sets. @@ -4368,7 +4406,7 @@ // Is this the right thing to do here? We don't save marks // on individual heap regions when we allocate from // them in parallel, so this seems like the correct place for this. - all_alloc_regions_note_end_of_copying(); + retire_all_alloc_regions(); { G1IsAliveClosure is_alive(this); G1KeepAliveClosure keep_alive(this); @@ -5008,7 +5046,7 @@ return no_allocs; } -void G1CollectedHeap::all_alloc_regions_note_end_of_copying() { +void G1CollectedHeap::retire_all_alloc_regions() { for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { HeapRegion* r = _gc_alloc_regions[ap]; if (r != NULL) { @@ -5021,8 +5059,7 @@ } } if (!has_processed_alias) { - r->note_end_of_copying(); - g1_policy()->record_after_bytes(r->used()); + retire_alloc_region(r, false /* par */); } } }
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -90,7 +90,7 @@ HeapRegion* _curr_scan_only; HeapRegion* _survivor_head; - HeapRegion* _survivors_tail; + HeapRegion* _survivor_tail; size_t _survivor_length; void empty_list(HeapRegion* list); @@ -105,6 +105,7 @@ bool is_empty() { return _length == 0; } size_t length() { return _length; } size_t scan_only_length() { return _scan_only_length; } + size_t survivor_length() { return _survivor_length; } void rs_length_sampling_init(); bool rs_length_sampling_more(); @@ -120,6 +121,7 @@ HeapRegion* first_region() { return _head; } HeapRegion* first_scan_only_region() { return _scan_only_head; } HeapRegion* first_survivor_region() { return _survivor_head; } + HeapRegion* last_survivor_region() { return _survivor_tail; } HeapRegion* par_get_next_scan_only_region() { MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); HeapRegion* ret = _curr_scan_only; @@ -219,7 +221,7 @@ // The to-space memory regions into which objects are being copied during // a GC. HeapRegion* _gc_alloc_regions[GCAllocPurposeCount]; - uint _gc_alloc_region_counts[GCAllocPurposeCount]; + size_t _gc_alloc_region_counts[GCAllocPurposeCount]; // A list of the regions that have been set to be alloc regions in the // current collection. @@ -281,8 +283,8 @@ // Returns "true" iff none of the gc alloc regions have any allocations // since the last call to "save_marks". bool all_alloc_regions_no_allocs_since_save_marks(); - // Calls "note_end_of_copying on all gc alloc_regions. - void all_alloc_regions_note_end_of_copying(); + // Perform finalization stuff on all allocation regions. + void retire_all_alloc_regions(); // The number of regions allocated to hold humongous objects. int _num_humongous_regions; @@ -351,6 +353,10 @@ // that parallel threads might be attempting allocations. void par_allocate_remaining_space(HeapRegion* r); + // Retires an allocation region when it is full or at the end of a + // GC pause. + void retire_alloc_region(HeapRegion* alloc_region, bool par); + // Helper function for two callbacks below. // "full", if true, indicates that the GC is for a System.gc() request, // and should collect the entire heap. If "clear_all_soft_refs" is true,
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -196,8 +196,13 @@ _short_lived_surv_rate_group(new SurvRateGroup(this, "Short Lived", G1YoungSurvRateNumRegionsSummary)), _survivor_surv_rate_group(new SurvRateGroup(this, "Survivor", - G1YoungSurvRateNumRegionsSummary)) + G1YoungSurvRateNumRegionsSummary)), // add here any more surv rate groups + _recorded_survivor_regions(0), + _recorded_survivor_head(NULL), + _recorded_survivor_tail(NULL), + _survivors_age_table(true) + { _recent_prev_end_times_for_all_gcs_sec->add(os::elapsedTime()); _prev_collection_pause_end_ms = os::elapsedTime() * 1000.0; @@ -272,6 +277,15 @@ _concurrent_mark_cleanup_times_ms->add(0.20); _tenuring_threshold = MaxTenuringThreshold; + if (G1UseSurvivorSpace) { + // if G1FixedSurvivorSpaceSize is 0 which means the size is not + // fixed, then _max_survivor_regions will be calculated at + // calculate_young_list_target_config during initialization + _max_survivor_regions = G1FixedSurvivorSpaceSize / HeapRegion::GrainBytes; + } else { + _max_survivor_regions = 0; + } + initialize_all(); } @@ -283,6 +297,9 @@ void G1CollectorPolicy::initialize_flags() { set_min_alignment(HeapRegion::GrainBytes); set_max_alignment(GenRemSet::max_alignment_constraint(rem_set_name())); + if (SurvivorRatio < 1) { + vm_exit_during_initialization("Invalid survivor ratio specified"); + } CollectorPolicy::initialize_flags(); } @@ -301,6 +318,8 @@ "-XX:+UseConcMarkSweepGC."); } + initialize_gc_policy_counters(); + if (G1Gen) { _in_young_gc_mode = true; @@ -322,6 +341,12 @@ } } +// Create the jstat counters for the policy. +void G1CollectorPolicy::initialize_gc_policy_counters() +{ + _gc_policy_counters = new GCPolicyCounters("GarbageFirst", 1, 2 + G1Gen); +} + void G1CollectorPolicy::calculate_young_list_min_length() { _young_list_min_length = 0; @@ -352,6 +377,7 @@ guarantee( so_length < _young_list_target_length, "invariant" ); _young_list_so_prefix_length = so_length; } + calculate_survivors_policy(); } // This method calculate the optimal scan-only set for a fixed young @@ -448,6 +474,9 @@ if (full_young_gcs() && _free_regions_at_end_of_collection > 0) { // we are in fully-young mode and there are free regions in the heap + double survivor_regions_evac_time = + predict_survivor_regions_evac_time(); + size_t min_so_length = 0; size_t max_so_length = 0; @@ -497,9 +526,8 @@ scanned_cards = predict_non_young_card_num(adj_rs_lengths); // calculate this once, so that we don't have to recalculate it in // the innermost loop - double base_time_ms = predict_base_elapsed_time_ms(pending_cards, - scanned_cards); - + double base_time_ms = predict_base_elapsed_time_ms(pending_cards, scanned_cards) + + survivor_regions_evac_time; // the result size_t final_young_length = 0; size_t final_so_length = 0; @@ -548,14 +576,14 @@ bool done = false; // this is the outermost loop while (!done) { -#if 0 +#ifdef TRACE_CALC_YOUNG_CONFIG // leave this in for debugging, just in case gclog_or_tty->print_cr("searching between " SIZE_FORMAT " and " SIZE_FORMAT ", incr " SIZE_FORMAT ", pass %s", from_so_length, to_so_length, so_length_incr, (pass == pass_type_coarse) ? "coarse" : (pass == pass_type_fine) ? "fine" : "final"); -#endif // 0 +#endif // TRACE_CALC_YOUNG_CONFIG size_t so_length = from_so_length; size_t init_free_regions = @@ -651,11 +679,11 @@ guarantee( so_length_incr == so_coarse_increments, "invariant" ); guarantee( final_so_length >= min_so_length, "invariant" ); -#if 0 +#ifdef TRACE_CALC_YOUNG_CONFIG // leave this in for debugging, just in case gclog_or_tty->print_cr(" coarse pass: SO length " SIZE_FORMAT, final_so_length); -#endif // 0 +#endif // TRACE_CALC_YOUNG_CONFIG from_so_length = (final_so_length - min_so_length > so_coarse_increments) ? @@ -687,12 +715,12 @@ // of the optimal size_t new_so_length = 950 * final_so_length / 1000; -#if 0 +#ifdef TRACE_CALC_YOUNG_CONFIG // leave this in for debugging, just in case gclog_or_tty->print_cr(" fine pass: SO length " SIZE_FORMAT ", setting it to " SIZE_FORMAT, final_so_length, new_so_length); -#endif // 0 +#endif // TRACE_CALC_YOUNG_CONFIG from_so_length = new_so_length; to_so_length = new_so_length; @@ -719,7 +747,8 @@ } // we should have at least one region in the target young length - _young_list_target_length = MAX2((size_t) 1, final_young_length); + _young_list_target_length = + MAX2((size_t) 1, final_young_length + _recorded_survivor_regions); if (final_so_length >= final_young_length) // and we need to ensure that the S-O length is not greater than // the target young length (this is being a bit careful) @@ -734,7 +763,7 @@ double end_time_sec = os::elapsedTime(); double elapsed_time_ms = (end_time_sec - start_time_sec) * 1000.0; -#if 0 +#ifdef TRACE_CALC_YOUNG_CONFIG // leave this in for debugging, just in case gclog_or_tty->print_cr("target = %1.1lf ms, young = " SIZE_FORMAT ", SO = " SIZE_FORMAT ", " @@ -747,9 +776,9 @@ calculations, full_young_gcs() ? "full" : "partial", should_initiate_conc_mark() ? " i-m" : "", - in_marking_window(), - in_marking_window_im()); -#endif // 0 + _in_marking_window, + _in_marking_window_im); +#endif // TRACE_CALC_YOUNG_CONFIG if (_young_list_target_length < _young_list_min_length) { // bummer; this means that, if we do a pause when the optimal @@ -768,14 +797,14 @@ // S-O length so_length = calculate_optimal_so_length(_young_list_min_length); -#if 0 +#ifdef TRACE_CALC_YOUNG_CONFIG // leave this in for debugging, just in case gclog_or_tty->print_cr("adjusted target length from " SIZE_FORMAT " to " SIZE_FORMAT ", SO " SIZE_FORMAT, _young_list_target_length, _young_list_min_length, so_length); -#endif // 0 +#endif // TRACE_CALC_YOUNG_CONFIG _young_list_target_length = MAX2(_young_list_min_length, (size_t)1); @@ -785,12 +814,12 @@ // we are in a partially-young mode or we've run out of regions (due // to evacuation failure) -#if 0 +#ifdef TRACE_CALC_YOUNG_CONFIG // leave this in for debugging, just in case gclog_or_tty->print_cr("(partial) setting target to " SIZE_FORMAT ", SO " SIZE_FORMAT, _young_list_min_length, 0); -#endif // 0 +#endif // TRACE_CALC_YOUNG_CONFIG // we'll do the pause as soon as possible and with no S-O prefix // (see above for the reasons behind the latter) @@ -884,6 +913,16 @@ return true; } +double G1CollectorPolicy::predict_survivor_regions_evac_time() { + double survivor_regions_evac_time = 0.0; + for (HeapRegion * r = _recorded_survivor_head; + r != NULL && r != _recorded_survivor_tail->get_next_young_region(); + r = r->get_next_young_region()) { + survivor_regions_evac_time += predict_region_elapsed_time_ms(r, true); + } + return survivor_regions_evac_time; +} + void G1CollectorPolicy::check_prediction_validity() { guarantee( adaptive_young_list_length(), "should not call this otherwise" ); @@ -995,11 +1034,15 @@ _short_lived_surv_rate_group->start_adding_regions(); // also call this on any additional surv rate groups + record_survivor_regions(0, NULL, NULL); + _prev_region_num_young = _region_num_young; _prev_region_num_tenured = _region_num_tenured; _free_regions_at_end_of_collection = _g1->free_regions(); _scan_only_regions_at_end_of_collection = 0; + // Reset survivors SurvRateGroup. + _survivor_surv_rate_group->reset(); calculate_young_list_min_length(); calculate_young_list_target_config(); } @@ -1104,6 +1147,10 @@ _short_lived_surv_rate_group->record_scan_only_prefix(short_lived_so_length); tag_scan_only(short_lived_so_length); + if (G1UseSurvivorSpace) { + _survivors_age_table.clear(); + } + assert( verify_young_ages(), "region age verification" ); } @@ -1965,9 +2012,6 @@ // </NEW PREDICTION> _target_pause_time_ms = -1.0; - - // TODO: calculate tenuring threshold - _tenuring_threshold = MaxTenuringThreshold; } // <NEW PREDICTION> @@ -2058,7 +2102,7 @@ guarantee( hr->is_young() && hr->age_in_surv_rate_group() != -1, "invariant" ); int age = hr->age_in_surv_rate_group(); - double yg_surv_rate = predict_yg_surv_rate(age); + double yg_surv_rate = predict_yg_surv_rate(age, hr->surv_rate_group()); bytes_to_copy = (size_t) ((double) hr->used() * yg_surv_rate); } @@ -2091,7 +2135,7 @@ } #if PREDICTIONS_VERBOSE if (young) { - _recorded_young_bytes += hr->asSpace()->used(); + _recorded_young_bytes += hr->used(); } else { _recorded_marked_bytes += hr->max_live_bytes(); } @@ -2119,11 +2163,6 @@ predict_non_young_card_num(_predicted_rs_lengths); _recorded_region_num = _recorded_young_regions + _recorded_non_young_regions; - _predicted_young_survival_ratio = 0.0; - for (int i = 0; i < _recorded_young_regions; ++i) - _predicted_young_survival_ratio += predict_yg_surv_rate(i); - _predicted_young_survival_ratio /= (double) _recorded_young_regions; - _predicted_scan_only_scan_time_ms = predict_scan_only_time_ms(_recorded_scan_only_regions); _predicted_rs_update_time_ms = @@ -2673,8 +2712,11 @@ assert(in_young_gc_mode(), "should be in young GC mode"); bool ret; size_t young_list_length = _g1->young_list_length(); - - if (young_list_length < _young_list_target_length) { + size_t young_list_max_length = _young_list_target_length; + if (G1FixedEdenSize) { + young_list_max_length -= _max_survivor_regions; + } + if (young_list_length < young_list_max_length) { ret = true; ++_region_num_young; } else { @@ -2710,17 +2752,39 @@ } -uint G1CollectorPolicy::max_regions(int purpose) { +size_t G1CollectorPolicy::max_regions(int purpose) { switch (purpose) { case GCAllocForSurvived: - return G1MaxSurvivorRegions; + return _max_survivor_regions; case GCAllocForTenured: - return UINT_MAX; + return REGIONS_UNLIMITED; default: - return UINT_MAX; + ShouldNotReachHere(); + return REGIONS_UNLIMITED; }; } +// Calculates survivor space parameters. +void G1CollectorPolicy::calculate_survivors_policy() +{ + if (!G1UseSurvivorSpace) { + return; + } + if (G1FixedSurvivorSpaceSize == 0) { + _max_survivor_regions = _young_list_target_length / SurvivorRatio; + } else { + _max_survivor_regions = G1FixedSurvivorSpaceSize / HeapRegion::GrainBytes; + } + + if (G1FixedTenuringThreshold) { + _tenuring_threshold = MaxTenuringThreshold; + } else { + _tenuring_threshold = _survivors_age_table.compute_tenuring_threshold( + HeapRegion::GrainWords * _max_survivor_regions); + } +} + + void G1CollectorPolicy_BestRegionsFirst:: set_single_region_collection_set(HeapRegion* hr) { @@ -2743,7 +2807,11 @@ double max_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0; size_t young_list_length = _g1->young_list_length(); - bool reached_target_length = young_list_length >= _young_list_target_length; + size_t young_list_max_length = _young_list_target_length; + if (G1FixedEdenSize) { + young_list_max_length -= _max_survivor_regions; + } + bool reached_target_length = young_list_length >= young_list_max_length; if (in_young_gc_mode()) { if (reached_target_length) {
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -49,7 +49,7 @@ class MainBodySummary; class PopPreambleSummary; -class PauseSummary { +class PauseSummary: public CHeapObj { define_num_seq(total) define_num_seq(other) @@ -58,7 +58,7 @@ virtual PopPreambleSummary* pop_preamble_summary() { return NULL; } }; -class MainBodySummary { +class MainBodySummary: public CHeapObj { define_num_seq(satb_drain) // optional define_num_seq(parallel) // parallel only define_num_seq(ext_root_scan) @@ -75,7 +75,7 @@ define_num_seq(clear_ct) // parallel only }; -class PopPreambleSummary { +class PopPreambleSummary: public CHeapObj { define_num_seq(pop_preamble) define_num_seq(pop_update_rs) define_num_seq(pop_scan_rs) @@ -557,6 +557,8 @@ return get_new_neg_prediction(_young_gc_eff_seq); } + double predict_survivor_regions_evac_time(); + // </NEW PREDICTION> public: @@ -599,8 +601,8 @@ // Returns an estimate of the survival rate of the region at yg-age // "yg_age". - double predict_yg_surv_rate(int age) { - TruncatedSeq* seq = _short_lived_surv_rate_group->get_seq(age); + double predict_yg_surv_rate(int age, SurvRateGroup* surv_rate_group) { + TruncatedSeq* seq = surv_rate_group->get_seq(age); if (seq->num() == 0) gclog_or_tty->print("BARF! age is %d", age); guarantee( seq->num() > 0, "invariant" ); @@ -610,6 +612,10 @@ return pred; } + double predict_yg_surv_rate(int age) { + return predict_yg_surv_rate(age, _short_lived_surv_rate_group); + } + double accum_yg_surv_rate_pred(int age) { return _short_lived_surv_rate_group->accum_surv_rate_pred(age); } @@ -822,6 +828,9 @@ virtual void init(); + // Create jstat counters for the policy. + virtual void initialize_gc_policy_counters(); + virtual HeapWord* mem_allocate_work(size_t size, bool is_tlab, bool* gc_overhead_limit_was_exceeded); @@ -1047,8 +1056,12 @@ // Print stats on young survival ratio void print_yg_surv_rate_info() const; - void finished_recalculating_age_indexes() { - _short_lived_surv_rate_group->finished_recalculating_age_indexes(); + void finished_recalculating_age_indexes(bool is_survivors) { + if (is_survivors) { + _survivor_surv_rate_group->finished_recalculating_age_indexes(); + } else { + _short_lived_surv_rate_group->finished_recalculating_age_indexes(); + } // do that for any other surv rate groups } @@ -1097,6 +1110,17 @@ // maximum amount of suvivors regions. int _tenuring_threshold; + // The limit on the number of regions allocated for survivors. + size_t _max_survivor_regions; + + // The amount of survor regions after a collection. + size_t _recorded_survivor_regions; + // List of survivor regions. + HeapRegion* _recorded_survivor_head; + HeapRegion* _recorded_survivor_tail; + + ageTable _survivors_age_table; + public: inline GCAllocPurpose @@ -1116,7 +1140,9 @@ return GCAllocForTenured; } - uint max_regions(int purpose); + static const size_t REGIONS_UNLIMITED = ~(size_t)0; + + size_t max_regions(int purpose); // The limit on regions for a particular purpose is reached. void note_alloc_region_limit_reached(int purpose) { @@ -1132,6 +1158,23 @@ void note_stop_adding_survivor_regions() { _survivor_surv_rate_group->stop_adding_regions(); } + + void record_survivor_regions(size_t regions, + HeapRegion* head, + HeapRegion* tail) { + _recorded_survivor_regions = regions; + _recorded_survivor_head = head; + _recorded_survivor_tail = tail; + } + + void record_thread_age_table(ageTable* age_table) + { + _survivors_age_table.merge_par(age_table); + } + + // Calculates survivor space parameters. + void calculate_survivors_policy(); + }; // This encapsulates a particular strategy for a g1 Collector.
--- a/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -28,7 +28,7 @@ /***** ALL TIMES ARE IN SECS!!!!!!! *****/ // this is the "interface" -class G1MMUTracker { +class G1MMUTracker: public CHeapObj { protected: double _time_slice; double _max_gc_time; // this is per time slice @@ -67,7 +67,7 @@ } }; -class G1MMUTrackerQueueElem { +class G1MMUTrackerQueueElem VALUE_OBJ_CLASS_SPEC { private: double _start_time; double _end_time;
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -572,6 +572,9 @@ } guarantee( _cards_scanned == NULL, "invariant" ); _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers()); + for (uint i = 0; i < n_workers(); ++i) { + _cards_scanned[i] = 0; + } _total_cards_scanned = 0; }
--- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -30,7 +30,7 @@ class HRInto_G1RemSet; class ConcurrentG1Refine; -class G1RemSet { +class G1RemSet: public CHeapObj { protected: G1CollectedHeap* _g1;
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -281,7 +281,17 @@ develop(bool, G1HRRSFlushLogBuffersOnVerify, false, \ "Forces flushing of log buffers before verification.") \ \ - product(intx, G1MaxSurvivorRegions, 0, \ - "The maximum number of survivor regions") + product(bool, G1UseSurvivorSpace, true, \ + "When true, use survivor space.") \ + \ + product(bool, G1FixedTenuringThreshold, false, \ + "When set, G1 will not adjust the tenuring threshold") \ + \ + product(bool, G1FixedEdenSize, false, \ + "When set, G1 will not allocate unused survivor space regions") \ + \ + product(uintx, G1FixedSurvivorSpaceSize, 0, \ + "If non-0 is the size of the G1 survivor space, " \ + "otherwise SurvivorRatio is used to determine the size") G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -566,7 +566,11 @@ void note_end_of_copying() { assert(top() >= _next_top_at_mark_start, "Increase only"); - _next_top_at_mark_start = top(); + // Survivor regions will be scanned on the start of concurrent + // marking. + if (!is_survivor()) { + _next_top_at_mark_start = top(); + } } // Returns "false" iff no object in the region was allocated when the @@ -829,7 +833,7 @@ // A linked lists of heap regions. It leaves the "next" field // unspecified; that's up to subtypes. -class RegionList { +class RegionList VALUE_OBJ_CLASS_SPEC { protected: virtual HeapRegion* get_next(HeapRegion* chr) = 0; virtual void set_next(HeapRegion* chr,
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -65,9 +65,11 @@ // We need access in order to union things into the base table. BitMap* bm() { return &_bm; } +#if PRT_COUNT_OCCUPIED void recount_occupied() { _occupied = (jint) bm()->count_one_bits(); } +#endif PerRegionTable(HeapRegion* hr) : _hr(hr), @@ -1144,7 +1146,9 @@ size_t i = _outgoing_region_map.get_next_one_offset(0); while (i < _outgoing_region_map.size()) { HeapRegion* to_region = g1h->region_at(i); - to_region->rem_set()->clear_incoming_entry(hr()); + if (!to_region->in_collection_set()) { + to_region->rem_set()->clear_incoming_entry(hr()); + } i = _outgoing_region_map.get_next_one_offset(i+1); } }
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -58,7 +58,7 @@ // is represented. If a deleted PRT is re-used, a thread adding a bit, // thinking the PRT is for a different region, does no harm. -class OtherRegionsTable: public CHeapObj { +class OtherRegionsTable VALUE_OBJ_CLASS_SPEC { friend class HeapRegionRemSetIterator; G1CollectedHeap* _g1h;
--- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -29,7 +29,7 @@ class PtrQueueSet; -class PtrQueue: public CHeapObj { +class PtrQueue VALUE_OBJ_CLASS_SPEC { protected: // The ptr queue set to which this queue belongs. @@ -130,7 +130,7 @@ // In particular, the individual queues allocate buffers from this shared // set, and return completed buffers to the set. // All these variables are are protected by the TLOQ_CBL_mon. XXX ??? -class PtrQueueSet: public CHeapObj { +class PtrQueueSet VALUE_OBJ_CLASS_SPEC { protected:
--- a/src/share/vm/gc_implementation/g1/sparsePRT.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/sparsePRT.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -33,7 +33,7 @@ // old versions synchronously. -class SparsePRTEntry { +class SparsePRTEntry: public CHeapObj { public: enum SomePublicConstants { CardsPerEntry = (short)4, @@ -167,7 +167,7 @@ }; // ValueObj because will be embedded in HRRS iterator. -class RSHashTableIter: public CHeapObj { +class RSHashTableIter VALUE_OBJ_CLASS_SPEC { short _tbl_ind; short _bl_ind; short _card_ind; @@ -213,7 +213,7 @@ class SparsePRTIter; -class SparsePRT : public CHeapObj { +class SparsePRT VALUE_OBJ_CLASS_SPEC { // Iterations are done on the _cur hash table, since they only need to // see entries visible at the start of a collection pause. // All other operations are done using the _next hash table.
--- a/src/share/vm/gc_implementation/g1/survRateGroup.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/survRateGroup.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -29,23 +29,14 @@ const char* name, size_t summary_surv_rates_len) : _g1p(g1p), _name(name), - _all_regions_allocated(0), - _curr_length(0), _scan_only_prefix(0), _setup_seq_num(0), - _array_length(0), _surv_rate(NULL), _accum_surv_rate_pred(NULL), - _accum_surv_rate(0.0), _surv_rate_pred(NULL), _last_pred(0.0), _summary_surv_rates_len(summary_surv_rates_len), _summary_surv_rates_max_len(0), - _summary_surv_rates(NULL) { - - // the following will set up the arrays with length 1 - _curr_length = 1; - stop_adding_regions(); - guarantee( _array_length == 1, "invariant" ); - guarantee( _surv_rate_pred[0] != NULL, "invariant" ); - _surv_rate_pred[0]->add(0.4); - all_surviving_words_recorded(false); - _curr_length = 0; - + _summary_surv_rates(NULL), + _surv_rate(NULL), + _accum_surv_rate_pred(NULL), + _surv_rate_pred(NULL) +{ + reset(); if (summary_surv_rates_len > 0) { size_t length = summary_surv_rates_len; _summary_surv_rates = NEW_C_HEAP_ARRAY(NumberSeq*, length); @@ -60,61 +51,80 @@ start_adding_regions(); } + +void SurvRateGroup::reset() +{ + _all_regions_allocated = 0; + _scan_only_prefix = 0; + _setup_seq_num = 0; + _stats_arrays_length = 0; + _accum_surv_rate = 0.0; + _last_pred = 0.0; + // the following will set up the arrays with length 1 + _region_num = 1; + stop_adding_regions(); + guarantee( _stats_arrays_length == 1, "invariant" ); + guarantee( _surv_rate_pred[0] != NULL, "invariant" ); + _surv_rate_pred[0]->add(0.4); + all_surviving_words_recorded(false); + _region_num = 0; +} + + void SurvRateGroup::start_adding_regions() { - _setup_seq_num = _array_length; - _curr_length = _scan_only_prefix; + _setup_seq_num = _stats_arrays_length; + _region_num = _scan_only_prefix; _accum_surv_rate = 0.0; #if 0 - gclog_or_tty->print_cr("start adding regions, seq num %d, length %d", - _setup_seq_num, _curr_length); + gclog_or_tty->print_cr("[%s] start adding regions, seq num %d, length %d", + _name, _setup_seq_num, _region_num); #endif // 0 } void SurvRateGroup::stop_adding_regions() { - size_t length = _curr_length; #if 0 - gclog_or_tty->print_cr("stop adding regions, length %d", length); + gclog_or_tty->print_cr("[%s] stop adding regions, length %d", _name, _region_num); #endif // 0 - if (length > _array_length) { + if (_region_num > _stats_arrays_length) { double* old_surv_rate = _surv_rate; double* old_accum_surv_rate_pred = _accum_surv_rate_pred; TruncatedSeq** old_surv_rate_pred = _surv_rate_pred; - _surv_rate = NEW_C_HEAP_ARRAY(double, length); + _surv_rate = NEW_C_HEAP_ARRAY(double, _region_num); if (_surv_rate == NULL) { - vm_exit_out_of_memory(sizeof(double) * length, + vm_exit_out_of_memory(sizeof(double) * _region_num, "Not enough space for surv rate array."); } - _accum_surv_rate_pred = NEW_C_HEAP_ARRAY(double, length); + _accum_surv_rate_pred = NEW_C_HEAP_ARRAY(double, _region_num); if (_accum_surv_rate_pred == NULL) { - vm_exit_out_of_memory(sizeof(double) * length, + vm_exit_out_of_memory(sizeof(double) * _region_num, "Not enough space for accum surv rate pred array."); } - _surv_rate_pred = NEW_C_HEAP_ARRAY(TruncatedSeq*, length); + _surv_rate_pred = NEW_C_HEAP_ARRAY(TruncatedSeq*, _region_num); if (_surv_rate == NULL) { - vm_exit_out_of_memory(sizeof(TruncatedSeq*) * length, + vm_exit_out_of_memory(sizeof(TruncatedSeq*) * _region_num, "Not enough space for surv rate pred array."); } - for (size_t i = 0; i < _array_length; ++i) + for (size_t i = 0; i < _stats_arrays_length; ++i) _surv_rate_pred[i] = old_surv_rate_pred[i]; #if 0 - gclog_or_tty->print_cr("stop adding regions, new seqs %d to %d", - _array_length, length - 1); + gclog_or_tty->print_cr("[%s] stop adding regions, new seqs %d to %d", + _name, _array_length, _region_num - 1); #endif // 0 - for (size_t i = _array_length; i < length; ++i) { + for (size_t i = _stats_arrays_length; i < _region_num; ++i) { _surv_rate_pred[i] = new TruncatedSeq(10); // _surv_rate_pred[i]->add(last_pred); } - _array_length = length; + _stats_arrays_length = _region_num; if (old_surv_rate != NULL) FREE_C_HEAP_ARRAY(double, old_surv_rate); @@ -124,7 +134,7 @@ FREE_C_HEAP_ARRAY(NumberSeq*, old_surv_rate_pred); } - for (size_t i = 0; i < _array_length; ++i) + for (size_t i = 0; i < _stats_arrays_length; ++i) _surv_rate[i] = 0.0; } @@ -135,7 +145,7 @@ double ret = _accum_surv_rate; if (adjustment > 0) { - TruncatedSeq* seq = get_seq(_curr_length+1); + TruncatedSeq* seq = get_seq(_region_num+1); double surv_rate = _g1p->get_new_prediction(seq); ret += surv_rate; } @@ -145,23 +155,23 @@ int SurvRateGroup::next_age_index() { - TruncatedSeq* seq = get_seq(_curr_length); + TruncatedSeq* seq = get_seq(_region_num); double surv_rate = _g1p->get_new_prediction(seq); _accum_surv_rate += surv_rate; - ++_curr_length; + ++_region_num; return (int) ++_all_regions_allocated; } void SurvRateGroup::record_scan_only_prefix(size_t scan_only_prefix) { - guarantee( scan_only_prefix <= _curr_length, "pre-condition" ); + guarantee( scan_only_prefix <= _region_num, "pre-condition" ); _scan_only_prefix = scan_only_prefix; } void SurvRateGroup::record_surviving_words(int age_in_group, size_t surv_words) { - guarantee( 0 <= age_in_group && (size_t) age_in_group < _curr_length, + guarantee( 0 <= age_in_group && (size_t) age_in_group < _region_num, "pre-condition" ); guarantee( _surv_rate[age_in_group] <= 0.00001, "should only update each slot once" ); @@ -178,15 +188,15 @@ void SurvRateGroup::all_surviving_words_recorded(bool propagate) { - if (propagate && _curr_length > 0) { // conservative - double surv_rate = _surv_rate_pred[_curr_length-1]->last(); + if (propagate && _region_num > 0) { // conservative + double surv_rate = _surv_rate_pred[_region_num-1]->last(); #if 0 gclog_or_tty->print_cr("propagating %1.2lf from %d to %d", surv_rate, _curr_length, _array_length - 1); #endif // 0 - for (size_t i = _curr_length; i < _array_length; ++i) { + for (size_t i = _region_num; i < _stats_arrays_length; ++i) { guarantee( _surv_rate[i] <= 0.00001, "the slot should not have been updated" ); _surv_rate_pred[i]->add(surv_rate); @@ -195,7 +205,7 @@ double accum = 0.0; double pred = 0.0; - for (size_t i = 0; i < _array_length; ++i) { + for (size_t i = 0; i < _stats_arrays_length; ++i) { pred = _g1p->get_new_prediction(_surv_rate_pred[i]); if (pred > 1.0) pred = 1.0; accum += pred; @@ -209,8 +219,8 @@ void SurvRateGroup::print() { gclog_or_tty->print_cr("Surv Rate Group: %s (%d entries, %d scan-only)", - _name, _curr_length, _scan_only_prefix); - for (size_t i = 0; i < _curr_length; ++i) { + _name, _region_num, _scan_only_prefix); + for (size_t i = 0; i < _region_num; ++i) { gclog_or_tty->print_cr(" age %4d surv rate %6.2lf %% pred %6.2lf %%%s", i, _surv_rate[i] * 100.0, _g1p->get_new_prediction(_surv_rate_pred[i]) * 100.0,
--- a/src/share/vm/gc_implementation/g1/survRateGroup.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/survRateGroup.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -29,7 +29,7 @@ G1CollectorPolicy* _g1p; const char* _name; - size_t _array_length; + size_t _stats_arrays_length; double* _surv_rate; double* _accum_surv_rate_pred; double _last_pred; @@ -40,7 +40,7 @@ size_t _summary_surv_rates_max_len; int _all_regions_allocated; - size_t _curr_length; + size_t _region_num; size_t _scan_only_prefix; size_t _setup_seq_num; @@ -48,6 +48,7 @@ SurvRateGroup(G1CollectorPolicy* g1p, const char* name, size_t summary_surv_rates_len); + void reset(); void start_adding_regions(); void stop_adding_regions(); void record_scan_only_prefix(size_t scan_only_prefix); @@ -55,22 +56,21 @@ void all_surviving_words_recorded(bool propagate); const char* name() { return _name; } - size_t region_num() { return _curr_length; } + size_t region_num() { return _region_num; } size_t scan_only_length() { return _scan_only_prefix; } double accum_surv_rate_pred(int age) { assert(age >= 0, "must be"); - if ((size_t)age < _array_length) + if ((size_t)age < _stats_arrays_length) return _accum_surv_rate_pred[age]; else { - double diff = (double) (age - _array_length + 1); - return _accum_surv_rate_pred[_array_length-1] + diff * _last_pred; + double diff = (double) (age - _stats_arrays_length + 1); + return _accum_surv_rate_pred[_stats_arrays_length-1] + diff * _last_pred; } } double accum_surv_rate(size_t adjustment); TruncatedSeq* get_seq(size_t age) { - guarantee( 0 <= age, "pre-condition" ); if (age >= _setup_seq_num) { guarantee( _setup_seq_num > 0, "invariant" ); age = _setup_seq_num-1;
--- a/src/share/vm/gc_implementation/includeDB_gc_g1 Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/includeDB_gc_g1 Sun Feb 15 20:09:02 2009 -0800 @@ -48,6 +48,7 @@ concurrentG1Refine.cpp space.inline.hpp concurrentG1Refine.hpp globalDefinitions.hpp +concurrentG1Refine.hpp allocation.hpp concurrentG1RefineThread.cpp concurrentG1Refine.hpp concurrentG1RefineThread.cpp concurrentG1RefineThread.hpp @@ -172,6 +173,7 @@ g1CollectorPolicy.cpp g1CollectorPolicy.hpp g1CollectorPolicy.cpp heapRegionRemSet.hpp g1CollectorPolicy.cpp mutexLocker.hpp +g1CollectorPolicy.cpp gcPolicyCounters.hpp g1CollectorPolicy.hpp collectorPolicy.hpp g1CollectorPolicy.hpp collectionSetChooser.hpp @@ -228,7 +230,7 @@ g1MMUTracker.cpp mutexLocker.hpp g1MMUTracker.hpp debug.hpp - +g1MMUTracker.hpp allocation.hpp g1RemSet.cpp bufferingOopClosure.hpp g1RemSet.cpp concurrentG1Refine.hpp g1RemSet.cpp concurrentG1RefineThread.hpp @@ -272,6 +274,7 @@ heapRegion.hpp watermark.hpp heapRegion.hpp g1_specialized_oop_closures.hpp heapRegion.hpp survRateGroup.hpp +heapRegion.hpp ageTable.hpp heapRegionRemSet.hpp sparsePRT.hpp
--- a/src/share/vm/gc_implementation/includeDB_gc_parNew Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/includeDB_gc_parNew Sun Feb 15 20:09:02 2009 -0800 @@ -79,6 +79,7 @@ parNewGeneration.cpp sharedHeap.hpp parNewGeneration.cpp space.hpp parNewGeneration.cpp spaceDecorator.hpp +parNewGeneration.cpp thread.hpp parNewGeneration.cpp workgroup.hpp parNewGeneration.hpp defNewGeneration.hpp
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -404,6 +404,8 @@ if (terminator()->offer_termination()) break; par_scan_state()->end_term_time(); } + assert(par_gen()->_overflow_list == NULL && par_gen()->_num_par_pushes == 0, + "Broken overflow list?"); // Finish the last termination pause. par_scan_state()->end_term_time(); } @@ -456,6 +458,8 @@ _is_alive_closure(this), _plab_stats(YoungPLABSize, PLABWeight) { + NOT_PRODUCT(_overflow_counter = ParGCWorkQueueOverflowInterval;) + NOT_PRODUCT(_num_par_pushes = 0;) _task_queues = new ObjToScanQueueSet(ParallelGCThreads); guarantee(_task_queues != NULL, "task_queues allocation failure."); @@ -993,12 +997,19 @@ "push forwarded object"); } // Push it on one of the queues of to-be-scanned objects. - if (!par_scan_state->work_queue()->push(obj_to_push)) { + bool simulate_overflow = false; + NOT_PRODUCT( + if (ParGCWorkQueueOverflowALot && should_simulate_overflow()) { + // simulate a stack overflow + simulate_overflow = true; + } + ) + if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) { // Add stats for overflow pushes. if (Verbose && PrintGCDetails) { gclog_or_tty->print("queue overflow!\n"); } - push_on_overflow_list(old); + push_on_overflow_list(old, par_scan_state); par_scan_state->note_overflow_push(); } par_scan_state->note_push(); @@ -1110,9 +1121,16 @@ "push forwarded object"); } // Push it on one of the queues of to-be-scanned objects. - if (!par_scan_state->work_queue()->push(obj_to_push)) { + bool simulate_overflow = false; + NOT_PRODUCT( + if (ParGCWorkQueueOverflowALot && should_simulate_overflow()) { + // simulate a stack overflow + simulate_overflow = true; + } + ) + if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) { // Add stats for overflow pushes. - push_on_overflow_list(old); + push_on_overflow_list(old, par_scan_state); par_scan_state->note_overflow_push(); } par_scan_state->note_push(); @@ -1135,89 +1153,190 @@ return forward_ptr; } -void ParNewGeneration::push_on_overflow_list(oop from_space_obj) { - oop cur_overflow_list = _overflow_list; +#ifndef PRODUCT +// It's OK to call this multi-threaded; the worst thing +// that can happen is that we'll get a bunch of closely +// spaced simulated oveflows, but that's OK, in fact +// probably good as it would exercise the overflow code +// under contention. +bool ParNewGeneration::should_simulate_overflow() { + if (_overflow_counter-- <= 0) { // just being defensive + _overflow_counter = ParGCWorkQueueOverflowInterval; + return true; + } else { + return false; + } +} +#endif + +#define BUSY (oop(0x1aff1aff)) +void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) { // if the object has been forwarded to itself, then we cannot // use the klass pointer for the linked list. Instead we have // to allocate an oopDesc in the C-Heap and use that for the linked list. + // XXX This is horribly inefficient when a promotion failure occurs + // and should be fixed. XXX FIX ME !!! +#ifndef PRODUCT + Atomic::inc_ptr(&_num_par_pushes); + assert(_num_par_pushes > 0, "Tautology"); +#endif if (from_space_obj->forwardee() == from_space_obj) { oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1); listhead->forward_to(from_space_obj); from_space_obj = listhead; } - while (true) { - from_space_obj->set_klass_to_list_ptr(cur_overflow_list); - oop observed_overflow_list = + oop observed_overflow_list = _overflow_list; + oop cur_overflow_list; + do { + cur_overflow_list = observed_overflow_list; + if (cur_overflow_list != BUSY) { + from_space_obj->set_klass_to_list_ptr(cur_overflow_list); + } else { + from_space_obj->set_klass_to_list_ptr(NULL); + } + observed_overflow_list = (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list); - if (observed_overflow_list == cur_overflow_list) break; - // Otherwise... - cur_overflow_list = observed_overflow_list; - } + } while (cur_overflow_list != observed_overflow_list); } +// *NOTE*: The overflow list manipulation code here and +// in CMSCollector:: are very similar in shape, +// except that in the CMS case we thread the objects +// directly into the list via their mark word, and do +// not need to deal with special cases below related +// to chunking of object arrays and promotion failure +// handling. +// CR 6797058 has been filed to attempt consolidation of +// the common code. +// Because of the common code, if you make any changes in +// the code below, please check the CMS version to see if +// similar changes might be needed. +// See CMSCollector::par_take_from_overflow_list() for +// more extensive documentation comments. bool ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) { ObjToScanQueue* work_q = par_scan_state->work_queue(); + assert(work_q->size() == 0, "Should first empty local work queue"); // How many to take? - int objsFromOverflow = MIN2(work_q->max_elems()/4, - (juint)ParGCDesiredObjsFromOverflowList); + size_t objsFromOverflow = MIN2((size_t)work_q->max_elems()/4, + (size_t)ParGCDesiredObjsFromOverflowList); if (_overflow_list == NULL) return false; // Otherwise, there was something there; try claiming the list. - oop prefix = (oop)Atomic::xchg_ptr(NULL, &_overflow_list); - - if (prefix == NULL) { - return false; + oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); + // Trim off a prefix of at most objsFromOverflow items + Thread* tid = Thread::current(); + size_t spin_count = (size_t)ParallelGCThreads; + size_t sleep_time_millis = MAX2((size_t)1, objsFromOverflow/100); + for (size_t spin = 0; prefix == BUSY && spin < spin_count; spin++) { + // someone grabbed it before we did ... + // ... we spin for a short while... + os::sleep(tid, sleep_time_millis, false); + if (_overflow_list == NULL) { + // nothing left to take + return false; + } else if (_overflow_list != BUSY) { + // try and grab the prefix + prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); + } } - // Trim off a prefix of at most objsFromOverflow items - int i = 1; + if (prefix == NULL || prefix == BUSY) { + // Nothing to take or waited long enough + if (prefix == NULL) { + // Write back the NULL in case we overwrote it with BUSY above + // and it is still the same value. + (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY); + } + return false; + } + assert(prefix != NULL && prefix != BUSY, "Error"); + size_t i = 1; oop cur = prefix; while (i < objsFromOverflow && cur->klass_or_null() != NULL) { i++; cur = oop(cur->klass()); } // Reattach remaining (suffix) to overflow list - if (cur->klass_or_null() != NULL) { - oop suffix = oop(cur->klass()); - cur->set_klass_to_list_ptr(NULL); - - // Find last item of suffix list - oop last = suffix; - while (last->klass_or_null() != NULL) { - last = oop(last->klass()); + if (cur->klass_or_null() == NULL) { + // Write back the NULL in lieu of the BUSY we wrote + // above and it is still the same value. + if (_overflow_list == BUSY) { + (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY); } - // Atomically prepend suffix to current overflow list - oop cur_overflow_list = _overflow_list; - while (true) { - last->set_klass_to_list_ptr(cur_overflow_list); - oop observed_overflow_list = - (oop)Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list); - if (observed_overflow_list == cur_overflow_list) break; - // Otherwise... - cur_overflow_list = observed_overflow_list; + } else { + assert(cur->klass_or_null() != BUSY, "Error"); + oop suffix = oop(cur->klass()); // suffix will be put back on global list + cur->set_klass_to_list_ptr(NULL); // break off suffix + // It's possible that the list is still in the empty(busy) state + // we left it in a short while ago; in that case we may be + // able to place back the suffix. + oop observed_overflow_list = _overflow_list; + oop cur_overflow_list = observed_overflow_list; + bool attached = false; + while (observed_overflow_list == BUSY || observed_overflow_list == NULL) { + observed_overflow_list = + (oop) Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list); + if (cur_overflow_list == observed_overflow_list) { + attached = true; + break; + } else cur_overflow_list = observed_overflow_list; + } + if (!attached) { + // Too bad, someone else got in in between; we'll need to do a splice. + // Find the last item of suffix list + oop last = suffix; + while (last->klass_or_null() != NULL) { + last = oop(last->klass()); + } + // Atomically prepend suffix to current overflow list + observed_overflow_list = _overflow_list; + do { + cur_overflow_list = observed_overflow_list; + if (cur_overflow_list != BUSY) { + // Do the splice ... + last->set_klass_to_list_ptr(cur_overflow_list); + } else { // cur_overflow_list == BUSY + last->set_klass_to_list_ptr(NULL); + } + observed_overflow_list = + (oop)Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list); + } while (cur_overflow_list != observed_overflow_list); } } // Push objects on prefix list onto this thread's work queue - assert(cur != NULL, "program logic"); + assert(prefix != NULL && prefix != BUSY, "program logic"); cur = prefix; - int n = 0; + ssize_t n = 0; while (cur != NULL) { oop obj_to_push = cur->forwardee(); oop next = oop(cur->klass_or_null()); cur->set_klass(obj_to_push->klass()); - if (par_scan_state->should_be_partially_scanned(obj_to_push, cur)) { - obj_to_push = cur; + // This may be an array object that is self-forwarded. In that case, the list pointer + // space, cur, is not in the Java heap, but rather in the C-heap and should be freed. + if (!is_in_reserved(cur)) { + // This can become a scaling bottleneck when there is work queue overflow coincident + // with promotion failure. + oopDesc* f = cur; + FREE_C_HEAP_ARRAY(oopDesc, f); + } else if (par_scan_state->should_be_partially_scanned(obj_to_push, cur)) { assert(arrayOop(cur)->length() == 0, "entire array remaining to be scanned"); + obj_to_push = cur; } - work_q->push(obj_to_push); + bool ok = work_q->push(obj_to_push); + assert(ok, "Should have succeeded"); cur = next; n++; } par_scan_state->note_overflow_refill(n); +#ifndef PRODUCT + assert(_num_par_pushes >= n, "Too many pops?"); + Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes); +#endif return true; } +#undef BUSY void ParNewGeneration::ref_processor_init() {
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -278,6 +278,7 @@ friend class ParNewRefProcTask; friend class ParNewRefProcTaskExecutor; friend class ParScanThreadStateSet; + friend class ParEvacuateFollowersClosure; private: // XXX use a global constant instead of 64! @@ -296,6 +297,7 @@ // klass-pointers (klass information already copied to the forwarded // image.) Manipulated with CAS. oop _overflow_list; + NOT_PRODUCT(ssize_t _num_par_pushes;) // If true, older generation does not support promotion undo, so avoid. static bool _avoid_promotion_undo; @@ -372,8 +374,12 @@ oop copy_to_survivor_space_with_undo(ParScanThreadState* par_scan_state, oop obj, size_t obj_sz, markOop m); + // in support of testing overflow code + NOT_PRODUCT(int _overflow_counter;) + NOT_PRODUCT(bool should_simulate_overflow();) + // Push the given (from-space) object on the global overflow list. - void push_on_overflow_list(oop from_space_obj); + void push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state); // If the global overflow list is non-empty, move some tasks from it // onto "work_q" (which must be empty). No more than 1/4 of the
--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -362,6 +362,10 @@ if (PrintHeapAtGC) { Universe::print_heap_after_gc(); } + +#ifdef TRACESPINNING + ParallelTaskTerminator::print_termination_counts(); +#endif } bool PSMarkSweep::absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy,
--- a/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -116,7 +116,7 @@ // ObjectSpace stuff // - _object_space = new MutableSpace(); + _object_space = new MutableSpace(virtual_space()->alignment()); if (_object_space == NULL) vm_exit_during_initialization("Could not allocate an old gen space"); @@ -385,10 +385,10 @@ start_array()->set_covered_region(new_memregion); Universe::heap()->barrier_set()->resize_covered_region(new_memregion); - HeapWord* const virtual_space_high = (HeapWord*) virtual_space()->high(); - // ALWAYS do this last!! - object_space()->set_end(virtual_space_high); + object_space()->initialize(new_memregion, + SpaceDecorator::DontClear, + SpaceDecorator::DontMangle); assert(new_word_size == heap_word_size(object_space()->capacity_in_bytes()), "Sanity");
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -2203,6 +2203,10 @@ collection_exit.ticks()); gc_task_manager()->print_task_time_stamps(); } + +#ifdef TRACESPINNING + ParallelTaskTerminator::print_termination_counts(); +#endif } bool PSParallelCompact::absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy,
--- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -615,6 +615,10 @@ gc_task_manager()->print_task_time_stamps(); } +#ifdef TRACESPINNING + ParallelTaskTerminator::print_termination_counts(); +#endif + return !promotion_failure_occurred; }
--- a/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -78,7 +78,7 @@ _special = false; } -bool PSVirtualSpace::expand_by(size_t bytes, bool pre_touch) { +bool PSVirtualSpace::expand_by(size_t bytes) { assert(is_aligned(bytes), "arg not aligned"); DEBUG_ONLY(PSVirtualSpaceVerifier this_verifier(this)); @@ -92,15 +92,6 @@ _committed_high_addr += bytes; } - if (pre_touch || AlwaysPreTouch) { - for (char* curr = base_addr; - curr < _committed_high_addr; - curr += os::vm_page_size()) { - char tmp = *curr; - *curr = 0; - } - } - return result; } @@ -255,7 +246,7 @@ DEBUG_ONLY(verify()); } -bool PSVirtualSpaceHighToLow::expand_by(size_t bytes, bool pre_touch) { +bool PSVirtualSpaceHighToLow::expand_by(size_t bytes) { assert(is_aligned(bytes), "arg not aligned"); DEBUG_ONLY(PSVirtualSpaceVerifier this_verifier(this)); @@ -269,15 +260,6 @@ _committed_low_addr -= bytes; } - if (pre_touch || AlwaysPreTouch) { - for (char* curr = base_addr; - curr < _committed_high_addr; - curr += os::vm_page_size()) { - char tmp = *curr; - *curr = 0; - } - } - return result; }
--- a/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -80,7 +80,7 @@ inline void set_reserved(char* low_addr, char* high_addr, bool special); inline void set_reserved(ReservedSpace rs); inline void set_committed(char* low_addr, char* high_addr); - virtual bool expand_by(size_t bytes, bool pre_touch = false); + virtual bool expand_by(size_t bytes); virtual bool shrink_by(size_t bytes); virtual size_t expand_into(PSVirtualSpace* space, size_t bytes); void release(); @@ -127,7 +127,7 @@ PSVirtualSpaceHighToLow(ReservedSpace rs, size_t alignment); PSVirtualSpaceHighToLow(ReservedSpace rs); - virtual bool expand_by(size_t bytes, bool pre_touch = false); + virtual bool expand_by(size_t bytes); virtual bool shrink_by(size_t bytes); virtual size_t expand_into(PSVirtualSpace* space, size_t bytes);
--- a/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -64,12 +64,12 @@ } if (UseNUMA) { - _eden_space = new MutableNUMASpace(); + _eden_space = new MutableNUMASpace(virtual_space()->alignment()); } else { - _eden_space = new MutableSpace(); + _eden_space = new MutableSpace(virtual_space()->alignment()); } - _from_space = new MutableSpace(); - _to_space = new MutableSpace(); + _from_space = new MutableSpace(virtual_space()->alignment()); + _to_space = new MutableSpace(virtual_space()->alignment()); if (_eden_space == NULL || _from_space == NULL || _to_space == NULL) { vm_exit_during_initialization("Could not allocate a young gen space");
--- a/src/share/vm/gc_implementation/shared/ageTable.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/shared/ageTable.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -67,6 +67,12 @@ } } +void ageTable::merge_par(ageTable* subTable) { + for (int i = 0; i < table_size; i++) { + Atomic::add_ptr(subTable->sizes[i], &sizes[i]); + } +} + int ageTable::compute_tenuring_threshold(size_t survivor_capacity) { size_t desired_survivor_size = (size_t)((((double) survivor_capacity)*TargetSurvivorRatio)/100); size_t total = 0;
--- a/src/share/vm/gc_implementation/shared/ageTable.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/shared/ageTable.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -56,6 +56,7 @@ // Merge another age table with the current one. Used // for parallel young generation gc. void merge(ageTable* subTable); + void merge_par(ageTable* subTable); // calculate new tenuring threshold based on age information int compute_tenuring_threshold(size_t survivor_capacity);
--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -27,7 +27,7 @@ # include "incls/_mutableNUMASpace.cpp.incl" -MutableNUMASpace::MutableNUMASpace() { +MutableNUMASpace::MutableNUMASpace(size_t alignment) : MutableSpace(alignment) { _lgrp_spaces = new (ResourceObj::C_HEAP) GrowableArray<LGRPSpace*>(0, true); _page_size = os::vm_page_size(); _adaptation_cycles = 0; @@ -221,7 +221,7 @@ } } if (!found) { - lgrp_spaces()->append(new LGRPSpace(lgrp_ids[i])); + lgrp_spaces()->append(new LGRPSpace(lgrp_ids[i], alignment())); } } @@ -443,10 +443,10 @@ // Is there bottom? if (new_region.start() < intersection.start()) { // Yes // Try to coalesce small pages into a large one. - if (UseLargePages && page_size() >= os::large_page_size()) { - HeapWord* p = (HeapWord*)round_to((intptr_t) intersection.start(), os::large_page_size()); + if (UseLargePages && page_size() >= alignment()) { + HeapWord* p = (HeapWord*)round_to((intptr_t) intersection.start(), alignment()); if (new_region.contains(p) - && pointer_delta(p, new_region.start(), sizeof(char)) >= os::large_page_size()) { + && pointer_delta(p, new_region.start(), sizeof(char)) >= alignment()) { if (intersection.contains(p)) { intersection = MemRegion(p, intersection.end()); } else { @@ -462,10 +462,10 @@ // Is there top? if (intersection.end() < new_region.end()) { // Yes // Try to coalesce small pages into a large one. - if (UseLargePages && page_size() >= os::large_page_size()) { - HeapWord* p = (HeapWord*)round_down((intptr_t) intersection.end(), os::large_page_size()); + if (UseLargePages && page_size() >= alignment()) { + HeapWord* p = (HeapWord*)round_down((intptr_t) intersection.end(), alignment()); if (new_region.contains(p) - && pointer_delta(new_region.end(), p, sizeof(char)) >= os::large_page_size()) { + && pointer_delta(new_region.end(), p, sizeof(char)) >= alignment()) { if (intersection.contains(p)) { intersection = MemRegion(intersection.start(), p); } else { @@ -504,12 +504,12 @@ // That's the only case we have to make an additional bias_region() call. HeapWord* start = invalid_region->start(); HeapWord* end = invalid_region->end(); - if (UseLargePages && page_size() >= os::large_page_size()) { - HeapWord *p = (HeapWord*)round_down((intptr_t) start, os::large_page_size()); + if (UseLargePages && page_size() >= alignment()) { + HeapWord *p = (HeapWord*)round_down((intptr_t) start, alignment()); if (new_region.contains(p)) { start = p; } - p = (HeapWord*)round_to((intptr_t) end, os::large_page_size()); + p = (HeapWord*)round_to((intptr_t) end, alignment()); if (new_region.contains(end)) { end = p; } @@ -526,7 +526,8 @@ void MutableNUMASpace::initialize(MemRegion mr, bool clear_space, - bool mangle_space) { + bool mangle_space, + bool setup_pages) { assert(clear_space, "Reallocation will destory data!"); assert(lgrp_spaces()->length() > 0, "There should be at least one space"); @@ -538,7 +539,7 @@ // Compute chunk sizes size_t prev_page_size = page_size(); - set_page_size(UseLargePages ? os::large_page_size() : os::vm_page_size()); + set_page_size(UseLargePages ? alignment() : os::vm_page_size()); HeapWord* rounded_bottom = (HeapWord*)round_to((intptr_t) bottom(), page_size()); HeapWord* rounded_end = (HeapWord*)round_down((intptr_t) end(), page_size()); size_t base_space_size_pages = pointer_delta(rounded_end, rounded_bottom, sizeof(char)) / page_size(); @@ -666,7 +667,7 @@ } // Clear space (set top = bottom) but never mangle. - s->initialize(new_region, SpaceDecorator::Clear, SpaceDecorator::DontMangle); + s->initialize(new_region, SpaceDecorator::Clear, SpaceDecorator::DontMangle, MutableSpace::DontSetupPages); set_adaptation_cycles(samples_count()); }
--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -82,8 +82,8 @@ char* last_page_scanned() { return _last_page_scanned; } void set_last_page_scanned(char* p) { _last_page_scanned = p; } public: - LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) { - _space = new MutableSpace(); + LGRPSpace(int l, size_t alignment) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) { + _space = new MutableSpace(alignment); _alloc_rate = new AdaptiveWeightedAverage(NUMAChunkResizeWeight); } ~LGRPSpace() { @@ -183,10 +183,10 @@ public: GrowableArray<LGRPSpace*>* lgrp_spaces() const { return _lgrp_spaces; } - MutableNUMASpace(); + MutableNUMASpace(size_t alignment); virtual ~MutableNUMASpace(); // Space initialization. - virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space); + virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space, bool setup_pages = SetupPages); // Update space layout if necessary. Do all adaptive resizing job. virtual void update(); // Update allocation rate averages.
--- a/src/share/vm/gc_implementation/shared/mutableSpace.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/shared/mutableSpace.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -25,7 +25,10 @@ # include "incls/_precompiled.incl" # include "incls/_mutableSpace.cpp.incl" -MutableSpace::MutableSpace(): ImmutableSpace(), _top(NULL) { +MutableSpace::MutableSpace(size_t alignment): ImmutableSpace(), _top(NULL), _alignment(alignment) { + assert(MutableSpace::alignment() >= 0 && + MutableSpace::alignment() % os::vm_page_size() == 0, + "Space should be aligned"); _mangler = new MutableSpaceMangler(this); } @@ -33,16 +36,88 @@ delete _mangler; } +void MutableSpace::numa_setup_pages(MemRegion mr, bool clear_space) { + if (!mr.is_empty()) { + size_t page_size = UseLargePages ? alignment() : os::vm_page_size(); + HeapWord *start = (HeapWord*)round_to((intptr_t) mr.start(), page_size); + HeapWord *end = (HeapWord*)round_down((intptr_t) mr.end(), page_size); + if (end > start) { + size_t size = pointer_delta(end, start, sizeof(char)); + if (clear_space) { + // Prefer page reallocation to migration. + os::free_memory((char*)start, size); + } + os::numa_make_global((char*)start, size); + } + } +} + +void MutableSpace::pretouch_pages(MemRegion mr) { + for (volatile char *p = (char*)mr.start(); p < (char*)mr.end(); p += os::vm_page_size()) { + char t = *p; *p = t; + } +} + void MutableSpace::initialize(MemRegion mr, bool clear_space, - bool mangle_space) { - HeapWord* bottom = mr.start(); - HeapWord* end = mr.end(); + bool mangle_space, + bool setup_pages) { + + assert(Universe::on_page_boundary(mr.start()) && Universe::on_page_boundary(mr.end()), + "invalid space boundaries"); - assert(Universe::on_page_boundary(bottom) && Universe::on_page_boundary(end), - "invalid space boundaries"); - set_bottom(bottom); - set_end(end); + if (setup_pages && (UseNUMA || AlwaysPreTouch)) { + // The space may move left and right or expand/shrink. + // We'd like to enforce the desired page placement. + MemRegion head, tail; + if (last_setup_region().is_empty()) { + // If it's the first initialization don't limit the amount of work. + head = mr; + tail = MemRegion(mr.end(), mr.end()); + } else { + // Is there an intersection with the address space? + MemRegion intersection = last_setup_region().intersection(mr); + if (intersection.is_empty()) { + intersection = MemRegion(mr.end(), mr.end()); + } + // All the sizes below are in words. + size_t head_size = 0, tail_size = 0; + if (mr.start() <= intersection.start()) { + head_size = pointer_delta(intersection.start(), mr.start()); + } + if(intersection.end() <= mr.end()) { + tail_size = pointer_delta(mr.end(), intersection.end()); + } + // Limit the amount of page manipulation if necessary. + if (NUMASpaceResizeRate > 0 && !AlwaysPreTouch) { + const size_t change_size = head_size + tail_size; + const float setup_rate_words = NUMASpaceResizeRate >> LogBytesPerWord; + head_size = MIN2((size_t)(setup_rate_words * head_size / change_size), + head_size); + tail_size = MIN2((size_t)(setup_rate_words * tail_size / change_size), + tail_size); + } + head = MemRegion(intersection.start() - head_size, intersection.start()); + tail = MemRegion(intersection.end(), intersection.end() + tail_size); + } + assert(mr.contains(head) && mr.contains(tail), "Sanity"); + + if (UseNUMA) { + numa_setup_pages(head, clear_space); + numa_setup_pages(tail, clear_space); + } + + if (AlwaysPreTouch) { + pretouch_pages(head); + pretouch_pages(tail); + } + + // Remember where we stopped so that we can continue later. + set_last_setup_region(MemRegion(head.start(), tail.end())); + } + + set_bottom(mr.start()); + set_end(mr.end()); if (clear_space) { clear(mangle_space);
--- a/src/share/vm/gc_implementation/shared/mutableSpace.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_implementation/shared/mutableSpace.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -25,7 +25,10 @@ // A MutableSpace is a subtype of ImmutableSpace that supports the // concept of allocation. This includes the concepts that a space may // be only partially full, and the querry methods that go with such -// an assumption. +// an assumption. MutableSpace is also responsible for minimizing the +// page allocation time by having the memory pretouched (with +// AlwaysPretouch) and for optimizing page placement on NUMA systems +// by make the underlying region interleaved (with UseNUMA). // // Invariant: (ImmutableSpace +) bottom() <= top() <= end() // top() is inclusive and end() is exclusive. @@ -37,15 +40,23 @@ // Helper for mangling unused space in debug builds MutableSpaceMangler* _mangler; - + // The last region which page had been setup to be interleaved. + MemRegion _last_setup_region; + size_t _alignment; protected: HeapWord* _top; MutableSpaceMangler* mangler() { return _mangler; } + void numa_setup_pages(MemRegion mr, bool clear_space); + void pretouch_pages(MemRegion mr); + + void set_last_setup_region(MemRegion mr) { _last_setup_region = mr; } + MemRegion last_setup_region() const { return _last_setup_region; } + public: virtual ~MutableSpace(); - MutableSpace(); + MutableSpace(size_t page_size); // Accessors HeapWord* top() const { return _top; } @@ -57,13 +68,20 @@ virtual void set_bottom(HeapWord* value) { _bottom = value; } virtual void set_end(HeapWord* value) { _end = value; } + size_t alignment() { return _alignment; } + // Returns a subregion containing all objects in this space. MemRegion used_region() { return MemRegion(bottom(), top()); } + static const bool SetupPages = true; + static const bool DontSetupPages = false; + // Initialization virtual void initialize(MemRegion mr, bool clear_space, - bool mangle_space); + bool mangle_space, + bool setup_pages = SetupPages); + virtual void clear(bool mangle_space); // Does the usual initialization but optionally resets top to bottom. #if 0 // MANGLE_SPACE
--- a/src/share/vm/gc_interface/collectedHeap.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/gc_interface/collectedHeap.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -42,6 +42,7 @@ class CollectedHeap : public CHeapObj { friend class VMStructs; friend class IsGCActiveMark; // Block structured external access to _is_gc_active + friend class constantPoolCacheKlass; // allocate() method inserts is_conc_safe #ifdef ASSERT static int _fire_out_of_memory_count; @@ -82,8 +83,6 @@ // Reinitialize tlabs before resuming mutators. virtual void resize_all_tlabs(); - debug_only(static void check_for_valid_allocation_state();) - protected: // Allocate from the current thread's TLAB, with broken-out slow path. inline static HeapWord* allocate_from_tlab(Thread* thread, size_t size); @@ -142,6 +141,7 @@ PRODUCT_RETURN; virtual void check_for_non_bad_heap_word_value(HeapWord* addr, size_t size) PRODUCT_RETURN; + debug_only(static void check_for_valid_allocation_state();) public: enum Name {
--- a/src/share/vm/interpreter/rewriter.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/interpreter/rewriter.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -48,9 +48,14 @@ // Creates a constant pool cache given an inverse_index_map +// This creates the constant pool cache initially in a state +// that is unsafe for concurrent GC processing but sets it to +// a safe mode before the constant pool cache is returned. constantPoolCacheHandle Rewriter::new_constant_pool_cache(intArray& inverse_index_map, TRAPS) { const int length = inverse_index_map.length(); - constantPoolCacheOop cache = oopFactory::new_constantPoolCache(length, CHECK_(constantPoolCacheHandle())); + constantPoolCacheOop cache = oopFactory::new_constantPoolCache(length, + methodOopDesc::IsUnsafeConc, + CHECK_(constantPoolCacheHandle())); cache->initialize(inverse_index_map); return constantPoolCacheHandle(THREAD, cache); }
--- a/src/share/vm/memory/genCollectedHeap.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/memory/genCollectedHeap.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -610,6 +610,10 @@ Universe::print_heap_after_gc(); } +#ifdef TRACESPINNING + ParallelTaskTerminator::print_termination_counts(); +#endif + if (ExitAfterGCNum > 0 && total_collections() == ExitAfterGCNum) { tty->print_cr("Stopping after GC #%d", ExitAfterGCNum); vm_exit(-1);
--- a/src/share/vm/memory/oopFactory.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/memory/oopFactory.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -90,9 +90,11 @@ } -constantPoolCacheOop oopFactory::new_constantPoolCache(int length, TRAPS) { +constantPoolCacheOop oopFactory::new_constantPoolCache(int length, + bool is_conc_safe, + TRAPS) { constantPoolCacheKlass* ck = constantPoolCacheKlass::cast(Universe::constantPoolCacheKlassObj()); - return ck->allocate(length, CHECK_NULL); + return ck->allocate(length, is_conc_safe, CHECK_NULL); }
--- a/src/share/vm/memory/oopFactory.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/memory/oopFactory.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -84,7 +84,9 @@ static constantPoolOop new_constantPool (int length, bool is_conc_safe, TRAPS); - static constantPoolCacheOop new_constantPoolCache(int length, TRAPS); + static constantPoolCacheOop new_constantPoolCache(int length, + bool is_conc_safe, + TRAPS); // Instance classes static klassOop new_instanceKlass(int vtable_len, int itable_len, int static_field_size,
--- a/src/share/vm/memory/referenceProcessor.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/memory/referenceProcessor.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -721,12 +721,6 @@ iter.obj(), iter.obj()->blueprint()->internal_name()); } assert(iter.obj()->is_oop(UseConcMarkSweepGC), "Adding a bad reference"); - // If discovery is concurrent, we may have objects with null referents, - // being those that were concurrently cleared after they were discovered - // (and not subsequently precleaned). - assert( (discovery_is_atomic() && iter.referent()->is_oop()) - || (!discovery_is_atomic() && iter.referent()->is_oop_or_null(UseConcMarkSweepGC)), - "Adding a bad referent"); iter.next(); } // Remember to keep sentinel pointer around
--- a/src/share/vm/oops/cpCacheKlass.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/oops/cpCacheKlass.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -32,13 +32,43 @@ } -constantPoolCacheOop constantPoolCacheKlass::allocate(int length, TRAPS) { +constantPoolCacheOop constantPoolCacheKlass::allocate(int length, + bool is_conc_safe, + TRAPS) { // allocate memory int size = constantPoolCacheOopDesc::object_size(length); + KlassHandle klass (THREAD, as_klassOop()); - constantPoolCacheOop cache = (constantPoolCacheOop) - CollectedHeap::permanent_obj_allocate(klass, size, CHECK_NULL); + + // This is the original code. The code from permanent_obj_allocate() + // was in-lined to allow the setting of is_conc_safe before the klass + // is installed. + // constantPoolCacheOop cache = (constantPoolCacheOop) + // CollectedHeap::permanent_obj_allocate(klass, size, CHECK_NULL); + + oop obj = CollectedHeap::permanent_obj_allocate_no_klass_install(klass, size, CHECK_NULL); + constantPoolCacheOop cache = (constantPoolCacheOop) obj; + cache->set_is_conc_safe(is_conc_safe); + // The store to is_conc_safe must be visible before the klass + // is set. This should be done safely because _is_conc_safe has + // been declared volatile. If there are any problems, consider adding + // OrderAccess::storestore(); + CollectedHeap::post_allocation_install_obj_klass(klass, obj, size); + NOT_PRODUCT(Universe::heap()->check_for_bad_heap_word_value((HeapWord*) obj, + size)); + + // The length field affects the size of the object. The allocation + // above allocates the correct size (see calculation of "size") but + // the size() method of the constant pool cache oop will not reflect + // that size until the correct length is set. cache->set_length(length); + + // The store of the length must be visible before is_conc_safe is + // set to a safe state. + // This should be done safely because _is_conc_safe has + // been declared volatile. If there are any problems, consider adding + // OrderAccess::storestore(); + cache->set_is_conc_safe(methodOopDesc::IsSafeConc); cache->set_constant_pool(NULL); return cache; } @@ -114,7 +144,6 @@ return size; } - int constantPoolCacheKlass::oop_adjust_pointers(oop obj) { assert(obj->is_constantPoolCache(), "obj must be constant pool cache"); constantPoolCacheOop cache = (constantPoolCacheOop)obj; @@ -131,6 +160,11 @@ return size; } +bool constantPoolCacheKlass::oop_is_conc_safe(oop obj) const { + assert(obj->is_constantPoolCache(), "should be constant pool"); + return constantPoolCacheOop(obj)->is_conc_safe(); +} + #ifndef SERIALGC void constantPoolCacheKlass::oop_copy_contents(PSPromotionManager* pm, oop obj) {
--- a/src/share/vm/oops/cpCacheKlass.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/oops/cpCacheKlass.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -32,7 +32,7 @@ // Allocation DEFINE_ALLOCATE_PERMANENT(constantPoolCacheKlass); - constantPoolCacheOop allocate(int length, TRAPS); + constantPoolCacheOop allocate(int length, bool is_conc_safe, TRAPS); static klassOop create_klass(TRAPS); // Casting from klassOop @@ -48,6 +48,7 @@ // Garbage collection void oop_follow_contents(oop obj); int oop_adjust_pointers(oop obj); + virtual bool oop_is_conc_safe(oop obj) const; // Parallel Scavenge and Parallel Old PARALLEL_GC_DECLS
--- a/src/share/vm/oops/cpCacheOop.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/oops/cpCacheOop.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -291,6 +291,9 @@ private: int _length; constantPoolOop _constant_pool; // the corresponding constant pool + // If true, safe for concurrent GC processing, + // Set unconditionally in constantPoolCacheKlass::allocate() + volatile bool _is_conc_safe; // Sizing debug_only(friend class ClassVerifier;) @@ -316,6 +319,12 @@ constantPoolOop constant_pool() const { return _constant_pool; } ConstantPoolCacheEntry* entry_at(int i) const { assert(0 <= i && i < length(), "index out of bounds"); return base() + i; } + // GC support + // If the _length field has not been set, the size of the + // constantPoolCache cannot be correctly calculated. + bool is_conc_safe() { return _is_conc_safe; } + void set_is_conc_safe(bool v) { _is_conc_safe = v; } + // Code generation static ByteSize base_offset() { return in_ByteSize(sizeof(constantPoolCacheOopDesc)); }
--- a/src/share/vm/opto/graphKit.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/opto/graphKit.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -3233,12 +3233,11 @@ // Now some of the values - Node* marking = __ load(no_ctrl, marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw); - Node* index = __ load(no_ctrl, index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw); - Node* buffer = __ load(no_ctrl, buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); + Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw); // if (!marking) __ if_then(marking, BoolTest::ne, zero); { + Node* index = __ load(__ ctrl(), index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw); const Type* t1 = adr->bottom_type(); const Type* t2 = val->bottom_type(); @@ -3246,6 +3245,7 @@ Node* orig = __ load(no_ctrl, adr, val_type, bt, alias_idx); // if (orig != NULL) __ if_then(orig, BoolTest::ne, null()); { + Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); // load original value // alias_idx correct??
--- a/src/share/vm/opto/memnode.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/opto/memnode.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -1303,6 +1303,7 @@ Node* base = AddPNode::Ideal_base_and_offset(address, phase, ignore); if (base != NULL && phase->type(base)->higher_equal(TypePtr::NOTNULL) + && phase->C->get_alias_index(phase->type(address)->is_ptr()) != Compile::AliasIdxRaw && all_controls_dominate(base, phase->C->start())) { // A method-invariant, non-null address (constant or 'this' argument). set_req(MemNode::Control, NULL);
--- a/src/share/vm/runtime/globals.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/runtime/globals.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -1307,7 +1307,14 @@ product(intx, ParGCArrayScanChunk, 50, \ "Scan a subset and push remainder, if array is bigger than this") \ \ - product(intx, ParGCDesiredObjsFromOverflowList, 20, \ + notproduct(bool, ParGCWorkQueueOverflowALot, false, \ + "Whether we should simulate work queue overflow in ParNew") \ + \ + notproduct(uintx, ParGCWorkQueueOverflowInterval, 1000, \ + "An `interval' counter that determines how frequently" \ + " we simulate overflow; a smaller number increases frequency") \ + \ + product(uintx, ParGCDesiredObjsFromOverflowList, 20, \ "The desired number of objects to claim from the overflow list") \ \ product(uintx, CMSParPromoteBlocksToClaim, 50, \ @@ -1419,18 +1426,18 @@ develop(bool, CMSOverflowEarlyRestoration, false, \ "Whether preserved marks should be restored early") \ \ - product(uintx, CMSMarkStackSize, 32*K, \ + product(uintx, CMSMarkStackSize, NOT_LP64(32*K) LP64_ONLY(4*M), \ "Size of CMS marking stack") \ \ - product(uintx, CMSMarkStackSizeMax, 4*M, \ + product(uintx, CMSMarkStackSizeMax, NOT_LP64(4*M) LP64_ONLY(512*M), \ "Max size of CMS marking stack") \ \ notproduct(bool, CMSMarkStackOverflowALot, false, \ "Whether we should simulate frequent marking stack / work queue" \ " overflow") \ \ - notproduct(intx, CMSMarkStackOverflowInterval, 1000, \ - "A per-thread `interval' counter that determines how frequently" \ + notproduct(uintx, CMSMarkStackOverflowInterval, 1000, \ + "An `interval' counter that determines how frequently" \ " we simulate overflow; a smaller number increases frequency") \ \ product(uintx, CMSMaxAbortablePrecleanLoops, 0, \ @@ -1648,7 +1655,14 @@ develop(uintx, WorkStealingYieldsBeforeSleep, 1000, \ "Number of yields before a sleep is done during workstealing") \ \ - product(uintx, PreserveMarkStackSize, 40, \ + develop(uintx, WorkStealingHardSpins, 4096, \ + "Number of iterations in a spin loop between checks on " \ + "time out of hard spin") \ + \ + develop(uintx, WorkStealingSpinToYieldRatio, 10, \ + "Ratio of hard spins to calls to yield") \ + \ + product(uintx, PreserveMarkStackSize, 1024, \ "Size for stack used in promotion failure handling") \ \ product_pd(bool, UseTLAB, "Use thread-local object allocation") \
--- a/src/share/vm/runtime/os.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/runtime/os.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -74,13 +74,11 @@ const int milliseconds_after_second = milliseconds_since_19700101 % milliseconds_per_microsecond; // Convert the time value to a tm and timezone variable - const struct tm *time_struct_temp = localtime(&seconds_since_19700101); - if (time_struct_temp == NULL) { - assert(false, "Failed localtime"); + struct tm time_struct; + if (localtime_pd(&seconds_since_19700101, &time_struct) == NULL) { + assert(false, "Failed localtime_pd"); return NULL; } - // Save the results of localtime - const struct tm time_struct = *time_struct_temp; const time_t zone = timezone; // If daylight savings time is in effect, @@ -93,10 +91,10 @@ UTC_to_local = UTC_to_local - seconds_per_hour; } // Compute the time zone offset. - // localtime(3C) sets timezone to the difference (in seconds) + // localtime_pd() sets timezone to the difference (in seconds) // between UTC and and local time. // ISO 8601 says we need the difference between local time and UTC, - // we change the sign of the localtime(3C) result. + // we change the sign of the localtime_pd() result. const time_t local_to_UTC = -(UTC_to_local); // Then we have to figure out if if we are ahead (+) or behind (-) UTC. char sign_local_to_UTC = '+';
--- a/src/share/vm/runtime/os.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/runtime/os.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -120,7 +120,8 @@ // Return current local time in a string (YYYY-MM-DD HH:MM:SS). // It is MT safe, but not async-safe, as reading time zone // information may require a lock on some platforms. - static char* local_time_string(char *buf, size_t buflen); + static char* local_time_string(char *buf, size_t buflen); + static struct tm* localtime_pd (const time_t* clock, struct tm* res); // Fill in buffer with current local time as an ISO-8601 string. // E.g., YYYY-MM-DDThh:mm:ss.mmm+zzzz. // Returns buffer, or NULL if it failed.
--- a/src/share/vm/utilities/taskqueue.cpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/utilities/taskqueue.cpp Sun Feb 15 20:09:02 2009 -0800 @@ -25,6 +25,12 @@ # include "incls/_precompiled.incl" # include "incls/_taskqueue.cpp.incl" +#ifdef TRACESPINNING +uint ParallelTaskTerminator::_total_yields = 0; +uint ParallelTaskTerminator::_total_spins = 0; +uint ParallelTaskTerminator::_total_peeks = 0; +#endif + bool TaskQueueSuper::peek() { return _bottom != _age.top(); } @@ -69,15 +75,62 @@ ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) { Atomic::inc(&_offered_termination); - juint yield_count = 0; + uint yield_count = 0; + // Number of hard spin loops done since last yield + uint hard_spin_count = 0; + // Number of iterations in the hard spin loop. + uint hard_spin_limit = WorkStealingHardSpins; + + // If WorkStealingSpinToYieldRatio is 0, no hard spinning is done. + // If it is greater than 0, then start with a small number + // of spins and increase number with each turn at spinning until + // the count of hard spins exceeds WorkStealingSpinToYieldRatio. + // Then do a yield() call and start spinning afresh. + if (WorkStealingSpinToYieldRatio > 0) { + hard_spin_limit = WorkStealingHardSpins >> WorkStealingSpinToYieldRatio; + hard_spin_limit = MAX2(hard_spin_limit, 1U); + } + // Remember the initial spin limit. + uint hard_spin_start = hard_spin_limit; + + // Loop waiting for all threads to offer termination or + // more work. while (true) { + // Are all threads offering termination? if (_offered_termination == _n_threads) { - //inner_termination_loop(); return true; } else { + // Look for more work. + // Periodically sleep() instead of yield() to give threads + // waiting on the cores the chance to grab this code if (yield_count <= WorkStealingYieldsBeforeSleep) { + // Do a yield or hardspin. For purposes of deciding whether + // to sleep, count this as a yield. yield_count++; - yield(); + + // Periodically call yield() instead spinning + // After WorkStealingSpinToYieldRatio spins, do a yield() call + // and reset the counts and starting limit. + if (hard_spin_count > WorkStealingSpinToYieldRatio) { + yield(); + hard_spin_count = 0; + hard_spin_limit = hard_spin_start; +#ifdef TRACESPINNING + _total_yields++; +#endif + } else { + // Hard spin this time + // Increase the hard spinning period but only up to a limit. + hard_spin_limit = MIN2(2*hard_spin_limit, + (uint) WorkStealingHardSpins); + for (uint j = 0; j < hard_spin_limit; j++) { + SpinPause(); + } + hard_spin_count++; +#ifdef TRACESPINNING + _total_spins++; +#endif + } } else { if (PrintGCDetails && Verbose) { gclog_or_tty->print_cr("ParallelTaskTerminator::offer_termination() " @@ -92,6 +145,9 @@ sleep(WorkStealingSleepMillis); } +#ifdef TRACESPINNING + _total_peeks++; +#endif if (peek_in_queue_set() || (terminator != NULL && terminator->should_exit_termination())) { Atomic::dec(&_offered_termination); @@ -101,6 +157,16 @@ } } +#ifdef TRACESPINNING +void ParallelTaskTerminator::print_termination_counts() { + gclog_or_tty->print_cr("ParallelTaskTerminator Total yields: %lld " + "Total spins: %lld Total peeks: %lld", + total_yields(), + total_spins(), + total_peeks()); +} +#endif + void ParallelTaskTerminator::reset_for_reuse() { if (_offered_termination != 0) { assert(_offered_termination == _n_threads,
--- a/src/share/vm/utilities/taskqueue.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/utilities/taskqueue.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -22,67 +22,76 @@ * */ +#ifdef LP64 +typedef juint TAG_TYPE; +// for a taskqueue size of 4M +#define LOG_TASKQ_SIZE 22 +#else +typedef jushort TAG_TYPE; +// for a taskqueue size of 16K +#define LOG_TASKQ_SIZE 14 +#endif + class TaskQueueSuper: public CHeapObj { protected: // The first free element after the last one pushed (mod _n). - // (For now we'll assume only 32-bit CAS). - volatile juint _bottom; + volatile uint _bottom; // log2 of the size of the queue. enum SomeProtectedConstants { - Log_n = 14 + Log_n = LOG_TASKQ_SIZE }; +#undef LOG_TASKQ_SIZE // Size of the queue. - juint n() { return (1 << Log_n); } + uint n() { return (1 << Log_n); } // For computing "x mod n" efficiently. - juint n_mod_mask() { return n() - 1; } + uint n_mod_mask() { return n() - 1; } struct Age { - jushort _top; - jushort _tag; + TAG_TYPE _top; + TAG_TYPE _tag; - jushort tag() const { return _tag; } - jushort top() const { return _top; } + TAG_TYPE tag() const { return _tag; } + TAG_TYPE top() const { return _top; } Age() { _tag = 0; _top = 0; } friend bool operator ==(const Age& a1, const Age& a2) { return a1.tag() == a2.tag() && a1.top() == a2.top(); } - }; Age _age; // These make sure we do single atomic reads and writes. Age get_age() { - jint res = *(volatile jint*)(&_age); + uint res = *(volatile uint*)(&_age); return *(Age*)(&res); } void set_age(Age a) { - *(volatile jint*)(&_age) = *(int*)(&a); + *(volatile uint*)(&_age) = *(uint*)(&a); } - jushort get_top() { + TAG_TYPE get_top() { return get_age().top(); } // These both operate mod _n. - juint increment_index(juint ind) { + uint increment_index(uint ind) { return (ind + 1) & n_mod_mask(); } - juint decrement_index(juint ind) { + uint decrement_index(uint ind) { return (ind - 1) & n_mod_mask(); } // Returns a number in the range [0.._n). If the result is "n-1", it // should be interpreted as 0. - juint dirty_size(juint bot, juint top) { - return ((jint)bot - (jint)top) & n_mod_mask(); + uint dirty_size(uint bot, uint top) { + return ((int)bot - (int)top) & n_mod_mask(); } // Returns the size corresponding to the given "bot" and "top". - juint size(juint bot, juint top) { - juint sz = dirty_size(bot, top); + uint size(uint bot, uint top) { + uint sz = dirty_size(bot, top); // Has the queue "wrapped", so that bottom is less than top? // There's a complicated special case here. A pair of threads could // perform pop_local and pop_global operations concurrently, starting @@ -94,7 +103,7 @@ // owner performs pop_local's, and several concurrent threads // attempting to perform the pop_global will all perform the same CAS, // and only one can succeed. Any stealing thread that reads after - // either the increment or decrement will seen an empty queue, and will + // either the increment or decrement will see an empty queue, and will // not join the competitors. The "sz == -1 || sz == _n-1" state will // not be modified by concurrent queues, so the owner thread can reset // the state to _bottom == top so subsequent pushes will be performed @@ -112,11 +121,11 @@ // Return an estimate of the number of elements in the queue. // The "careful" version admits the possibility of pop_local/pop_global // races. - juint size() { + uint size() { return size(_bottom, get_top()); } - juint dirty_size() { + uint dirty_size() { return dirty_size(_bottom, get_top()); } @@ -127,15 +136,15 @@ // Maximum number of elements allowed in the queue. This is two less // than the actual queue size, for somewhat complicated reasons. - juint max_elems() { return n() - 2; } + uint max_elems() { return n() - 2; } }; template<class E> class GenericTaskQueue: public TaskQueueSuper { private: // Slow paths for push, pop_local. (pop_global has no fast path.) - bool push_slow(E t, juint dirty_n_elems); - bool pop_local_slow(juint localBot, Age oldAge); + bool push_slow(E t, uint dirty_n_elems); + bool pop_local_slow(uint localBot, Age oldAge); public: // Initializes the queue to empty. @@ -170,7 +179,7 @@ template<class E> GenericTaskQueue<E>::GenericTaskQueue():TaskQueueSuper() { - assert(sizeof(Age) == sizeof(jint), "Depends on this."); + assert(sizeof(Age) == sizeof(int), "Depends on this."); } template<class E> @@ -182,9 +191,9 @@ template<class E> void GenericTaskQueue<E>::oops_do(OopClosure* f) { // tty->print_cr("START OopTaskQueue::oops_do"); - int iters = size(); - juint index = _bottom; - for (int i = 0; i < iters; ++i) { + uint iters = size(); + uint index = _bottom; + for (uint i = 0; i < iters; ++i) { index = decrement_index(index); // tty->print_cr(" doing entry %d," INTPTR_T " -> " INTPTR_T, // index, &_elems[index], _elems[index]); @@ -198,10 +207,10 @@ template<class E> -bool GenericTaskQueue<E>::push_slow(E t, juint dirty_n_elems) { +bool GenericTaskQueue<E>::push_slow(E t, uint dirty_n_elems) { if (dirty_n_elems == n() - 1) { // Actually means 0, so do the push. - juint localBot = _bottom; + uint localBot = _bottom; _elems[localBot] = t; _bottom = increment_index(localBot); return true; @@ -211,7 +220,7 @@ template<class E> bool GenericTaskQueue<E>:: -pop_local_slow(juint localBot, Age oldAge) { +pop_local_slow(uint localBot, Age oldAge) { // This queue was observed to contain exactly one element; either this // thread will claim it, or a competing "pop_global". In either case, // the queue will be logically empty afterwards. Create a new Age value @@ -230,9 +239,8 @@ Age tempAge; // No competing pop_global has yet incremented "top"; we'll try to // install new_age, thus claiming the element. - assert(sizeof(Age) == sizeof(jint) && sizeof(jint) == sizeof(juint), - "Assumption about CAS unit."); - *(jint*)&tempAge = Atomic::cmpxchg(*(jint*)&newAge, (volatile jint*)&_age, *(jint*)&oldAge); + assert(sizeof(Age) == sizeof(int), "Assumption about CAS unit."); + *(uint*)&tempAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge); if (tempAge == oldAge) { // We win. assert(dirty_size(localBot, get_top()) != n() - 1, @@ -253,8 +261,8 @@ bool GenericTaskQueue<E>::pop_global(E& t) { Age newAge; Age oldAge = get_age(); - juint localBot = _bottom; - juint n_elems = size(localBot, oldAge.top()); + uint localBot = _bottom; + uint n_elems = size(localBot, oldAge.top()); if (n_elems == 0) { return false; } @@ -263,7 +271,7 @@ newAge._top = increment_index(newAge.top()); if ( newAge._top == 0 ) newAge._tag++; Age resAge; - *(jint*)&resAge = Atomic::cmpxchg(*(jint*)&newAge, (volatile jint*)&_age, *(jint*)&oldAge); + *(uint*)&resAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge); // Note that using "_bottom" here might fail, since a pop_local might // have decremented it. assert(dirty_size(localBot, newAge._top) != n() - 1, @@ -287,7 +295,7 @@ template<class E> class GenericTaskQueueSet: public TaskQueueSetSuper { private: - int _n; + uint _n; GenericTaskQueue<E>** _queues; public: @@ -300,51 +308,51 @@ } } - bool steal_1_random(int queue_num, int* seed, E& t); - bool steal_best_of_2(int queue_num, int* seed, E& t); - bool steal_best_of_all(int queue_num, int* seed, E& t); + bool steal_1_random(uint queue_num, int* seed, E& t); + bool steal_best_of_2(uint queue_num, int* seed, E& t); + bool steal_best_of_all(uint queue_num, int* seed, E& t); - void register_queue(int i, GenericTaskQueue<E>* q); + void register_queue(uint i, GenericTaskQueue<E>* q); - GenericTaskQueue<E>* queue(int n); + GenericTaskQueue<E>* queue(uint n); // The thread with queue number "queue_num" (and whose random number seed // is at "seed") is trying to steal a task from some other queue. (It // may try several queues, according to some configuration parameter.) // If some steal succeeds, returns "true" and sets "t" the stolen task, // otherwise returns false. - bool steal(int queue_num, int* seed, E& t); + bool steal(uint queue_num, int* seed, E& t); bool peek(); }; template<class E> -void GenericTaskQueueSet<E>::register_queue(int i, GenericTaskQueue<E>* q) { - assert(0 <= i && i < _n, "index out of range."); +void GenericTaskQueueSet<E>::register_queue(uint i, GenericTaskQueue<E>* q) { + assert(i < _n, "index out of range."); _queues[i] = q; } template<class E> -GenericTaskQueue<E>* GenericTaskQueueSet<E>::queue(int i) { +GenericTaskQueue<E>* GenericTaskQueueSet<E>::queue(uint i) { return _queues[i]; } template<class E> -bool GenericTaskQueueSet<E>::steal(int queue_num, int* seed, E& t) { - for (int i = 0; i < 2 * _n; i++) +bool GenericTaskQueueSet<E>::steal(uint queue_num, int* seed, E& t) { + for (uint i = 0; i < 2 * _n; i++) if (steal_best_of_2(queue_num, seed, t)) return true; return false; } template<class E> -bool GenericTaskQueueSet<E>::steal_best_of_all(int queue_num, int* seed, E& t) { +bool GenericTaskQueueSet<E>::steal_best_of_all(uint queue_num, int* seed, E& t) { if (_n > 2) { int best_k; - jint best_sz = 0; - for (int k = 0; k < _n; k++) { + uint best_sz = 0; + for (uint k = 0; k < _n; k++) { if (k == queue_num) continue; - jint sz = _queues[k]->size(); + uint sz = _queues[k]->size(); if (sz > best_sz) { best_sz = sz; best_k = k; @@ -362,9 +370,9 @@ } template<class E> -bool GenericTaskQueueSet<E>::steal_1_random(int queue_num, int* seed, E& t) { +bool GenericTaskQueueSet<E>::steal_1_random(uint queue_num, int* seed, E& t) { if (_n > 2) { - int k = queue_num; + uint k = queue_num; while (k == queue_num) k = randomParkAndMiller(seed) % _n; return _queues[2]->pop_global(t); } else if (_n == 2) { @@ -378,20 +386,20 @@ } template<class E> -bool GenericTaskQueueSet<E>::steal_best_of_2(int queue_num, int* seed, E& t) { +bool GenericTaskQueueSet<E>::steal_best_of_2(uint queue_num, int* seed, E& t) { if (_n > 2) { - int k1 = queue_num; + uint k1 = queue_num; while (k1 == queue_num) k1 = randomParkAndMiller(seed) % _n; - int k2 = queue_num; + uint k2 = queue_num; while (k2 == queue_num || k2 == k1) k2 = randomParkAndMiller(seed) % _n; // Sample both and try the larger. - juint sz1 = _queues[k1]->size(); - juint sz2 = _queues[k2]->size(); + uint sz1 = _queues[k1]->size(); + uint sz2 = _queues[k2]->size(); if (sz2 > sz1) return _queues[k2]->pop_global(t); else return _queues[k1]->pop_global(t); } else if (_n == 2) { // Just try the other one. - int k = (queue_num + 1) % 2; + uint k = (queue_num + 1) % 2; return _queues[k]->pop_global(t); } else { assert(_n == 1, "can't be zero."); @@ -402,7 +410,7 @@ template<class E> bool GenericTaskQueueSet<E>::peek() { // Try all the queues. - for (int j = 0; j < _n; j++) { + for (uint j = 0; j < _n; j++) { if (_queues[j]->peek()) return true; } @@ -418,11 +426,19 @@ // A class to aid in the termination of a set of parallel tasks using // TaskQueueSet's for work stealing. +#undef TRACESPINNING + class ParallelTaskTerminator: public StackObj { private: int _n_threads; TaskQueueSetSuper* _queue_set; - jint _offered_termination; + int _offered_termination; + +#ifdef TRACESPINNING + static uint _total_yields; + static uint _total_spins; + static uint _total_peeks; +#endif bool peek_in_queue_set(); protected: @@ -454,13 +470,19 @@ // the terminator is finished. void reset_for_reuse(); +#ifdef TRACESPINNING + static uint total_yields() { return _total_yields; } + static uint total_spins() { return _total_spins; } + static uint total_peeks() { return _total_peeks; } + static void print_termination_counts(); +#endif }; #define SIMPLE_STACK 0 template<class E> inline bool GenericTaskQueue<E>::push(E t) { #if SIMPLE_STACK - juint localBot = _bottom; + uint localBot = _bottom; if (_bottom < max_elems()) { _elems[localBot] = t; _bottom = localBot + 1; @@ -469,10 +491,10 @@ return false; } #else - juint localBot = _bottom; + uint localBot = _bottom; assert((localBot >= 0) && (localBot < n()), "_bottom out of range."); - jushort top = get_top(); - juint dirty_n_elems = dirty_size(localBot, top); + TAG_TYPE top = get_top(); + uint dirty_n_elems = dirty_size(localBot, top); assert((dirty_n_elems >= 0) && (dirty_n_elems < n()), "n_elems out of range."); if (dirty_n_elems < max_elems()) { @@ -487,19 +509,19 @@ template<class E> inline bool GenericTaskQueue<E>::pop_local(E& t) { #if SIMPLE_STACK - juint localBot = _bottom; + uint localBot = _bottom; assert(localBot > 0, "precondition."); localBot--; t = _elems[localBot]; _bottom = localBot; return true; #else - juint localBot = _bottom; + uint localBot = _bottom; // This value cannot be n-1. That can only occur as a result of // the assignment to bottom in this method. If it does, this method // resets the size( to 0 before the next call (which is sequential, // since this is pop_local.) - juint dirty_n_elems = dirty_size(localBot, get_top()); + uint dirty_n_elems = dirty_size(localBot, get_top()); assert(dirty_n_elems != n() - 1, "Shouldn't be possible..."); if (dirty_n_elems == 0) return false; localBot = decrement_index(localBot); @@ -512,7 +534,7 @@ // If there's still at least one element in the queue, based on the // "_bottom" and "age" we've read, then there can be no interference with // a "pop_global" operation, and we're done. - juint tp = get_top(); + TAG_TYPE tp = get_top(); // XXX if (size(localBot, tp) > 0) { assert(dirty_size(localBot, tp) != n() - 1, "Shouldn't be possible..."); @@ -581,7 +603,7 @@ bool is_empty(); bool stealable_is_empty(); bool overflow_is_empty(); - juint stealable_size() { return _region_queue.size(); } + uint stealable_size() { return _region_queue.size(); } RegionTaskQueue* task_queue() { return &_region_queue; } };
--- a/src/share/vm/utilities/workgroup.hpp Mon Feb 09 13:47:26 2009 -0800 +++ b/src/share/vm/utilities/workgroup.hpp Sun Feb 15 20:09:02 2009 -0800 @@ -32,7 +32,7 @@ // An abstract task to be worked on by a gang. // You subclass this to supply your own work() method -class AbstractGangTask: public CHeapObj { +class AbstractGangTask VALUE_OBJ_CLASS_SPEC { public: // The abstract work method. // The argument tells you which member of the gang you are.