# HG changeset patch # User trims # Date 1238814774 25200 # Node ID fafab5d5349c7c066d677538db67a1ee0fb33bd2 # Parent eae95c5579a4b64091ffe009d48c9c03ecf7627c# Parent 922aedc96ef5da2a3aebe795277e5f0797edf62e Merge diff -r eae95c5579a4 -r fafab5d5349c src/share/vm/gc_implementation/parNew/parNewGeneration.cpp --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Fri Apr 03 19:54:45 2009 -0700 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Fri Apr 03 20:12:54 2009 -0700 @@ -34,10 +34,12 @@ Generation* old_gen_, int thread_num_, ObjToScanQueueSet* work_queue_set_, + GrowableArray** overflow_stack_set_, size_t desired_plab_sz_, ParallelTaskTerminator& term_) : _to_space(to_space_), _old_gen(old_gen_), _young_gen(gen_), _thread_num(thread_num_), _work_queue(work_queue_set_->queue(thread_num_)), _to_space_full(false), + _overflow_stack(overflow_stack_set_[thread_num_]), _ageTable(false), // false ==> not the global age table, no perf data. _to_space_alloc_buffer(desired_plab_sz_), _to_space_closure(gen_, this), _old_gen_closure(gen_, this), @@ -57,11 +59,6 @@ _start = os::elapsedTime(); _old_gen_closure.set_generation(old_gen_); _old_gen_root_closure.set_generation(old_gen_); - if (UseCompressedOops) { - _overflow_stack = new (ResourceObj::C_HEAP) GrowableArray(512, true); - } else { - _overflow_stack = NULL; - } } #ifdef _MSC_VER #pragma warning( pop ) @@ -155,7 +152,7 @@ } bool ParScanThreadState::take_from_overflow_stack() { - assert(UseCompressedOops, "Else should not call"); + assert(ParGCUseLocalOverflow, "Else should not call"); assert(young_gen()->overflow_list() == NULL, "Error"); ObjToScanQueue* queue = work_queue(); GrowableArray* of_stack = overflow_stack(); @@ -183,7 +180,7 @@ } void ParScanThreadState::push_on_overflow_stack(oop p) { - assert(UseCompressedOops, "Else should not call"); + assert(ParGCUseLocalOverflow, "Else should not call"); overflow_stack()->push(p); assert(young_gen()->overflow_list() == NULL, "Error"); } @@ -260,6 +257,7 @@ ParNewGeneration& gen, Generation& old_gen, ObjToScanQueueSet& queue_set, + GrowableArray** overflow_stacks_, size_t desired_plab_sz, ParallelTaskTerminator& term); inline ParScanThreadState& thread_sate(int i); @@ -282,6 +280,7 @@ ParScanThreadStateSet::ParScanThreadStateSet( int num_threads, Space& to_space, ParNewGeneration& gen, Generation& old_gen, ObjToScanQueueSet& queue_set, + GrowableArray** overflow_stack_set_, size_t desired_plab_sz, ParallelTaskTerminator& term) : ResourceArray(sizeof(ParScanThreadState), num_threads), _gen(gen), _next_gen(old_gen), _term(term), @@ -292,7 +291,7 @@ for (int i = 0; i < num_threads; ++i) { new ((ParScanThreadState*)_data + i) ParScanThreadState(&to_space, &gen, &old_gen, i, &queue_set, - desired_plab_sz, term); + overflow_stack_set_, desired_plab_sz, term); } } @@ -519,6 +518,17 @@ for (uint i2 = 0; i2 < ParallelGCThreads; i2++) _task_queues->queue(i2)->initialize(); + _overflow_stacks = NEW_C_HEAP_ARRAY(GrowableArray*, ParallelGCThreads); + guarantee(_overflow_stacks != NULL, "Overflow stack set allocation failure"); + for (uint i = 0; i < ParallelGCThreads; i++) { + if (ParGCUseLocalOverflow) { + _overflow_stacks[i] = new (ResourceObj::C_HEAP) GrowableArray(512, true); + guarantee(_overflow_stacks[i] != NULL, "Overflow Stack allocation failure."); + } else { + _overflow_stacks[i] = NULL; + } + } + if (UsePerfData) { EXCEPTION_MARK; ResourceMark rm; @@ -784,7 +794,7 @@ ParallelTaskTerminator _term(workers->total_workers(), task_queues()); ParScanThreadStateSet thread_state_set(workers->total_workers(), *to(), *this, *_next_gen, *task_queues(), - desired_plab_sz(), _term); + _overflow_stacks, desired_plab_sz(), _term); ParNewGenTask tsk(this, _next_gen, reserved().end(), &thread_state_set); int n_workers = workers->total_workers(); @@ -1238,11 +1248,12 @@ #define BUSY (oop(0x1aff1aff)) void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) { assert(is_in_reserved(from_space_obj), "Should be from this generation"); - if (UseCompressedOops) { + if (ParGCUseLocalOverflow) { // In the case of compressed oops, we use a private, not-shared // overflow stack. par_scan_state->push_on_overflow_stack(from_space_obj); } else { + assert(!UseCompressedOops, "Error"); // if the object has been forwarded to itself, then we cannot // use the klass pointer for the linked list. Instead we have // to allocate an oopDesc in the C-Heap and use that for the linked list. @@ -1275,9 +1286,10 @@ bool ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) { bool res; - if (UseCompressedOops) { + if (ParGCUseLocalOverflow) { res = par_scan_state->take_from_overflow_stack(); } else { + assert(!UseCompressedOops, "Error"); res = take_from_overflow_list_work(par_scan_state); } return res; @@ -1305,6 +1317,7 @@ (size_t)ParGCDesiredObjsFromOverflowList); assert(par_scan_state->overflow_stack() == NULL, "Error"); + assert(!UseCompressedOops, "Error"); if (_overflow_list == NULL) return false; // Otherwise, there was something there; try claiming the list. diff -r eae95c5579a4 -r fafab5d5349c src/share/vm/gc_implementation/parNew/parNewGeneration.hpp --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Fri Apr 03 19:54:45 2009 -0700 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Fri Apr 03 20:12:54 2009 -0700 @@ -33,8 +33,8 @@ // but they must be here to allow ParScanClosure::do_oop_work to be defined // in genOopClosures.inline.hpp. -typedef OopTaskQueue ObjToScanQueue; -typedef OopTaskQueueSet ObjToScanQueueSet; +typedef OopTaskQueue ObjToScanQueue; +typedef OopTaskQueueSet ObjToScanQueueSet; // Enable this to get push/pop/steal stats. const int PAR_STATS_ENABLED = 0; @@ -116,7 +116,9 @@ ParScanThreadState(Space* to_space_, ParNewGeneration* gen_, Generation* old_gen_, int thread_num_, - ObjToScanQueueSet* work_queue_set_, size_t desired_plab_sz_, + ObjToScanQueueSet* work_queue_set_, + GrowableArray** overflow_stack_set_, + size_t desired_plab_sz_, ParallelTaskTerminator& term_); public: @@ -296,9 +298,12 @@ char pad[64 - sizeof(ObjToScanQueue)]; // prevent false sharing }; - // The per-thread work queues, available here for stealing. + // The per-worker-thread work queues ObjToScanQueueSet* _task_queues; + // Per-worker-thread local overflow stacks + GrowableArray** _overflow_stacks; + // Desired size of survivor space plab's PLABStats _plab_stats; diff -r eae95c5579a4 -r fafab5d5349c src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Fri Apr 03 19:54:45 2009 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Fri Apr 03 20:12:54 2009 -0700 @@ -508,6 +508,7 @@ assert(destination <= target_end, "sanity"); assert(destination + _region_data[src_region].data_size() > target_end, "region should not fit into target space"); + assert(is_region_aligned(target_end), "sanity"); size_t split_region = src_region; HeapWord* split_destination = destination; @@ -538,14 +539,12 @@ // max(top, max(new_top, clear_top)) // // where clear_top is a new field in SpaceInfo. Would have to set clear_top - // to destination + partial_obj_size, where both have the values passed to - // this routine. + // to target_end. const RegionData* const sr = region(split_region); const size_t beg_idx = addr_to_region_idx(region_align_up(sr->destination() + sr->partial_obj_size())); - const size_t end_idx = - addr_to_region_idx(region_align_up(destination + partial_obj_size)); + const size_t end_idx = addr_to_region_idx(target_end); if (TraceParallelOldGCSummaryPhase) { gclog_or_tty->print_cr("split: clearing source_region field in [" diff -r eae95c5579a4 -r fafab5d5349c src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Fri Apr 03 19:54:45 2009 -0700 +++ b/src/share/vm/runtime/arguments.cpp Fri Apr 03 20:12:54 2009 -0700 @@ -969,7 +969,7 @@ } else { no_shared_spaces(); - // By default YoungPLABSize and OldPLABSize are set to 4096 and 1024 correspondinly, + // By default YoungPLABSize and OldPLABSize are set to 4096 and 1024 respectively, // these settings are default for Parallel Scavenger. For ParNew+Tenured configuration // we set them to 1024 and 1024. // See CR 6362902. @@ -985,6 +985,16 @@ if (AlwaysTenure) { FLAG_SET_CMDLINE(intx, MaxTenuringThreshold, 0); } + // When using compressed oops, we use local overflow stacks, + // rather than using a global overflow list chained through + // the klass word of the object's pre-image. + if (UseCompressedOops && !ParGCUseLocalOverflow) { + if (!FLAG_IS_DEFAULT(ParGCUseLocalOverflow)) { + warning("Forcing +ParGCUseLocalOverflow: needed if using compressed references"); + } + FLAG_SET_DEFAULT(ParGCUseLocalOverflow, true); + } + assert(ParGCUseLocalOverflow || !UseCompressedOops, "Error"); } } diff -r eae95c5579a4 -r fafab5d5349c src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Fri Apr 03 19:54:45 2009 -0700 +++ b/src/share/vm/runtime/globals.hpp Fri Apr 03 20:12:54 2009 -0700 @@ -1323,8 +1323,11 @@ product(intx, ParGCArrayScanChunk, 50, \ "Scan a subset and push remainder, if array is bigger than this") \ \ + product(bool, ParGCUseLocalOverflow, false, \ + "Instead of a global overflow list, use local overflow stacks") \ + \ product(bool, ParGCTrimOverflow, true, \ - "Eagerly trim the overflow lists (useful for UseCompressedOops") \ + "Eagerly trim the local overflow lists (when ParGCUseLocalOverflow") \ \ notproduct(bool, ParGCWorkQueueOverflowALot, false, \ "Whether we should simulate work queue overflow in ParNew") \