Mercurial > hg > graal-compiler
changeset 828:b64314863098
Merge
author | kvn |
---|---|
date | Wed, 01 Jul 2009 15:06:54 -0700 |
parents | 32c83fb84370 (diff) bf3489cc0aa0 (current diff) |
children | e7d5557ad624 acba6af809c8 |
files | |
diffstat | 45 files changed, 713 insertions(+), 881 deletions(-) [+] |
line wrap: on
line diff
--- a/.hgtags Wed Jul 01 12:22:23 2009 -0700 +++ b/.hgtags Wed Jul 01 15:06:54 2009 -0700 @@ -33,3 +33,7 @@ a3fd9e40ff2e854f6169eb6d09d491a28634d04f jdk7-b56 f4cbf78110c726919f46b59a3b054c54c7e889b4 jdk7-b57 53d9bf689e80fcc76b221bbe6c5d58e08b80cbc6 jdk7-b58 +c55be0c7bd32c016c52218eb4c8b5da8a75450b5 jdk7-b59 +a77eddcd510c3972717c025cfcef9a60bfa4ecac jdk7-b60 +27b728fd1281ab62e9d7e4424f8bbb6ca438d803 jdk7-b61 +a88386380bdaaa5ab4ffbedf22c57bac5dbec034 jdk7-b62
--- a/make/README Wed Jul 01 12:22:23 2009 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ -README: - This file should be located at the top of the hotspot Mercurial repository. - - See http://openjdk.java.net/ for more information about the OpenJDK. - - See ../README-builds.html for complete details on build machine requirements. - -Simple Build Instructions: - - cd make && gnumake - - The files that will be imported into the jdk build will be in the "build" - directory. -
--- a/make/hotspot_version Wed Jul 01 12:22:23 2009 -0700 +++ b/make/hotspot_version Wed Jul 01 15:06:54 2009 -0700 @@ -35,7 +35,7 @@ HS_MAJOR_VER=16 HS_MINOR_VER=0 -HS_BUILD_NUMBER=03 +HS_BUILD_NUMBER=05 JDK_MAJOR_VER=1 JDK_MINOR_VER=7
--- a/src/cpu/sparc/vm/assembler_sparc.cpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/cpu/sparc/vm/assembler_sparc.cpp Wed Jul 01 15:06:54 2009 -0700 @@ -4454,43 +4454,26 @@ delayed()->nop(); } - // Now we decide how to generate the card table write. If we're - // enqueueing, we call out to a generated function. Otherwise, we do it - // inline here. - - if (G1RSBarrierUseQueue) { - // If the "store_addr" register is an "in" or "local" register, move it to - // a scratch reg so we can pass it as an argument. - bool use_scr = !(store_addr->is_global() || store_addr->is_out()); - // Pick a scratch register different from "tmp". - Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch); - // Make sure we use up the delay slot! - if (use_scr) { - post_filter_masm->mov(store_addr, scr); - } else { - post_filter_masm->nop(); - } - generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base); - save_frame(0); - call(dirty_card_log_enqueue); - if (use_scr) { - delayed()->mov(scr, O0); - } else { - delayed()->mov(store_addr->after_save(), O0); - } - restore(); - + // If the "store_addr" register is an "in" or "local" register, move it to + // a scratch reg so we can pass it as an argument. + bool use_scr = !(store_addr->is_global() || store_addr->is_out()); + // Pick a scratch register different from "tmp". + Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch); + // Make sure we use up the delay slot! + if (use_scr) { + post_filter_masm->mov(store_addr, scr); } else { - -#ifdef _LP64 - post_filter_masm->srlx(store_addr, CardTableModRefBS::card_shift, store_addr); -#else - post_filter_masm->srl(store_addr, CardTableModRefBS::card_shift, store_addr); -#endif - assert(tmp != store_addr, "need separate temp reg"); - set(bs->byte_map_base, tmp); - stb(G0, tmp, store_addr); + post_filter_masm->nop(); } + generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base); + save_frame(0); + call(dirty_card_log_enqueue); + if (use_scr) { + delayed()->mov(scr, O0); + } else { + delayed()->mov(store_addr->after_save(), O0); + } + restore(); bind(filtered);
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Wed Jul 01 15:06:54 2009 -0700 @@ -555,6 +555,7 @@ _collector_policy(cp), _should_unload_classes(false), _concurrent_cycles_since_last_unload(0), + _roots_scanning_options(0), _sweep_estimate(CMS_SweepWeight, CMS_SweepPadding) { if (ExplicitGCInvokesConcurrentAndUnloadsClasses) {
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp Wed Jul 01 15:06:54 2009 -0700 @@ -545,6 +545,11 @@ bool unloaded_classes_last_cycle() const { return concurrent_cycles_since_last_unload() == 0; } + // Root scanning options for perm gen + int _roots_scanning_options; + int roots_scanning_options() const { return _roots_scanning_options; } + void add_root_scanning_option(int o) { _roots_scanning_options |= o; } + void remove_root_scanning_option(int o) { _roots_scanning_options &= ~o; } // Verification support CMSBitMap _verification_mark_bm; @@ -719,11 +724,6 @@ NOT_PRODUCT(bool simulate_overflow();) // sequential NOT_PRODUCT(bool par_simulate_overflow();) // MT version - int _roots_scanning_options; - int roots_scanning_options() const { return _roots_scanning_options; } - void add_root_scanning_option(int o) { _roots_scanning_options |= o; } - void remove_root_scanning_option(int o) { _roots_scanning_options &= ~o; } - // CMS work methods void checkpointRootsInitialWork(bool asynch); // initial checkpoint work
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Wed Jul 01 15:06:54 2009 -0700 @@ -25,26 +25,37 @@ #include "incls/_precompiled.incl" #include "incls/_concurrentG1Refine.cpp.incl" -bool ConcurrentG1Refine::_enabled = false; - ConcurrentG1Refine::ConcurrentG1Refine() : - _pya(PYA_continue), _last_pya(PYA_continue), - _last_cards_during(), _first_traversal(false), _card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL), _hot_cache(NULL), _def_use_cache(false), _use_cache(false), - _n_periods(0), _total_cards(0), _total_travs(0) + _n_periods(0), _total_cards(0), _total_travs(0), + _threads(NULL), _n_threads(0) { if (G1ConcRefine) { - _cg1rThread = new ConcurrentG1RefineThread(this); - assert(cg1rThread() != NULL, "Conc refine should have been created"); - assert(cg1rThread()->cg1r() == this, - "Conc refine thread should refer to this"); - } else { - _cg1rThread = NULL; + _n_threads = (int)thread_num(); + if (_n_threads > 0) { + _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads); + int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids(); + ConcurrentG1RefineThread *next = NULL; + for (int i = _n_threads - 1; i >= 0; i--) { + ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i); + assert(t != NULL, "Conc refine should have been created"); + assert(t->cg1r() == this, "Conc refine thread should refer to this"); + _threads[i] = t; + next = t; + } + } } } +size_t ConcurrentG1Refine::thread_num() { + if (G1ConcRefine) { + return (G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads; + } + return 0; +} + void ConcurrentG1Refine::init() { if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) { G1CollectedHeap* g1h = G1CollectedHeap::heap(); @@ -75,6 +86,14 @@ } } +void ConcurrentG1Refine::stop() { + if (_threads != NULL) { + for (int i = 0; i < _n_threads; i++) { + _threads[i]->stop(); + } + } +} + ConcurrentG1Refine::~ConcurrentG1Refine() { if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) { assert(_card_counts != NULL, "Logic"); @@ -88,104 +107,22 @@ assert(_hot_cache != NULL, "Logic"); FREE_C_HEAP_ARRAY(jbyte*, _hot_cache); } -} - -bool ConcurrentG1Refine::refine() { - G1CollectedHeap* g1h = G1CollectedHeap::heap(); - unsigned cards_before = g1h->g1_rem_set()->conc_refine_cards(); - clear_hot_cache(); // Any previous values in this are now invalid. - g1h->g1_rem_set()->concurrentRefinementPass(this); - _traversals++; - unsigned cards_after = g1h->g1_rem_set()->conc_refine_cards(); - unsigned cards_during = cards_after-cards_before; - // If this is the first traversal in the current enabling - // and we did some cards, or if the number of cards found is decreasing - // sufficiently quickly, then keep going. Otherwise, sleep a while. - bool res = - (_first_traversal && cards_during > 0) - || - (!_first_traversal && cards_during * 3 < _last_cards_during * 2); - _last_cards_during = cards_during; - _first_traversal = false; - return res; -} - -void ConcurrentG1Refine::enable() { - MutexLocker x(G1ConcRefine_mon); - if (!_enabled) { - _enabled = true; - _first_traversal = true; _last_cards_during = 0; - G1ConcRefine_mon->notify_all(); - } -} - -unsigned ConcurrentG1Refine::disable() { - MutexLocker x(G1ConcRefine_mon); - if (_enabled) { - _enabled = false; - return _traversals; - } else { - return 0; + if (_threads != NULL) { + for (int i = 0; i < _n_threads; i++) { + delete _threads[i]; + } + FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads); } } -void ConcurrentG1Refine::wait_for_ConcurrentG1Refine_enabled() { - G1ConcRefine_mon->lock(); - while (!_enabled) { - G1ConcRefine_mon->wait(Mutex::_no_safepoint_check_flag); - } - G1ConcRefine_mon->unlock(); - _traversals = 0; -}; - -void ConcurrentG1Refine::set_pya_restart() { - // If we're using the log-based RS barrier, the above will cause - // in-progress traversals of completed log buffers to quit early; we will - // also abandon all other buffers. - if (G1RSBarrierUseQueue) { - DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); - dcqs.abandon_logs(); - // Reset the post-yield actions. - _pya = PYA_continue; - _last_pya = PYA_continue; - } else { - _pya = PYA_restart; +void ConcurrentG1Refine::threads_do(ThreadClosure *tc) { + if (_threads != NULL) { + for (int i = 0; i < _n_threads; i++) { + tc->do_thread(_threads[i]); + } } } -void ConcurrentG1Refine::set_pya_cancel() { - _pya = PYA_cancel; -} - -PostYieldAction ConcurrentG1Refine::get_pya() { - if (_pya != PYA_continue) { - jint val = _pya; - while (true) { - jint val_read = Atomic::cmpxchg(PYA_continue, &_pya, val); - if (val_read == val) { - PostYieldAction res = (PostYieldAction)val; - assert(res != PYA_continue, "Only the refine thread should reset."); - _last_pya = res; - return res; - } else { - val = val_read; - } - } - } - // QQQ WELL WHAT DO WE RETURN HERE??? - // make up something! - return PYA_continue; -} - -PostYieldAction ConcurrentG1Refine::get_last_pya() { - PostYieldAction res = _last_pya; - _last_pya = PYA_continue; - return res; -} - -bool ConcurrentG1Refine::do_traversal() { - return _cg1rThread->do_traversal(); -} int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) { size_t card_num = (card_ptr - _ct_bot);
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp Wed Jul 01 15:06:54 2009 -0700 @@ -26,26 +26,9 @@ class ConcurrentG1RefineThread; class G1RemSet; -// What to do after a yield: -enum PostYieldAction { - PYA_continue, // Continue the traversal - PYA_restart, // Restart - PYA_cancel // It's been completed by somebody else: cancel. -}; - class ConcurrentG1Refine: public CHeapObj { - ConcurrentG1RefineThread* _cg1rThread; - - volatile jint _pya; - PostYieldAction _last_pya; - - static bool _enabled; // Protected by G1ConcRefine_mon. - unsigned _traversals; - - // Number of cards processed during last refinement traversal. - unsigned _first_traversal; - unsigned _last_cards_during; - + ConcurrentG1RefineThread** _threads; + int _n_threads; // The cache for card refinement. bool _use_cache; bool _def_use_cache; @@ -74,37 +57,10 @@ ~ConcurrentG1Refine(); void init(); // Accomplish some initialization that has to wait. - - // Enabled Conc refinement, waking up thread if necessary. - void enable(); - - // Returns the number of traversals performed since this refiner was enabled. - unsigned disable(); - - // Requires G1ConcRefine_mon to be held. - bool enabled() { return _enabled; } - - // Returns only when G1 concurrent refinement has been enabled. - void wait_for_ConcurrentG1Refine_enabled(); + void stop(); - // Do one concurrent refinement pass over the card table. Returns "true" - // if heuristics determine that another pass should be done immediately. - bool refine(); - - // Indicate that an in-progress refinement pass should start over. - void set_pya_restart(); - // Indicate that an in-progress refinement pass should quit. - void set_pya_cancel(); - - // Get the appropriate post-yield action. Also sets last_pya. - PostYieldAction get_pya(); - - // The last PYA read by "get_pya". - PostYieldAction get_last_pya(); - - bool do_traversal(); - - ConcurrentG1RefineThread* cg1rThread() { return _cg1rThread; } + // Iterate over the conc refine threads + void threads_do(ThreadClosure *tc); // If this is the first entry for the slot, writes into the cache and // returns NULL. If it causes an eviction, returns the evicted pointer. @@ -129,4 +85,6 @@ void clear_and_record_card_counts(); void print_final_card_counts(); + + static size_t thread_num(); };
--- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Wed Jul 01 15:06:54 2009 -0700 @@ -30,12 +30,14 @@ // The CM thread is created when the G1 garbage collector is used ConcurrentG1RefineThread:: -ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r) : +ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread *next, + int worker_id_offset, int worker_id) : ConcurrentGCThread(), + _worker_id_offset(worker_id_offset), + _worker_id(worker_id), + _active(false), + _next(next), _cg1r(cg1r), - _started(false), - _in_progress(false), - _do_traversal(false), _vtime_accum(0.0), _co_tracker(G1CRGroup), _interval_ms(5.0) @@ -43,112 +45,6 @@ create_and_start(); } -const long timeout = 200; // ms. - -void ConcurrentG1RefineThread::traversalBasedRefinement() { - _cg1r->wait_for_ConcurrentG1Refine_enabled(); - MutexLocker x(G1ConcRefine_mon); - while (_cg1r->enabled()) { - MutexUnlocker ux(G1ConcRefine_mon); - ResourceMark rm; - HandleMark hm; - - if (G1TraceConcurrentRefinement) { - gclog_or_tty->print_cr("G1-Refine starting pass"); - } - _sts.join(); - bool no_sleep = _cg1r->refine(); - _sts.leave(); - if (!no_sleep) { - MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag); - // We do this only for the timeout; we don't expect this to be signalled. - CGC_lock->wait(Mutex::_no_safepoint_check_flag, timeout); - } - } -} - -void ConcurrentG1RefineThread::queueBasedRefinement() { - DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); - // Wait for completed log buffers to exist. - { - MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); - while (!_do_traversal && !dcqs.process_completed_buffers() && - !_should_terminate) { - DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag); - } - } - - if (_should_terminate) { - return; - } - - // Now we take them off (this doesn't hold locks while it applies - // closures.) (If we did a full collection, then we'll do a full - // traversal. - _sts.join(); - if (_do_traversal) { - (void)_cg1r->refine(); - switch (_cg1r->get_last_pya()) { - case PYA_cancel: case PYA_continue: - // Continue was caught and handled inside "refine". If it's still - // "continue" when we get here, we're done. - _do_traversal = false; - break; - case PYA_restart: - assert(_do_traversal, "Because of Full GC."); - break; - } - } else { - int n_logs = 0; - int lower_limit = 0; - double start_vtime_sec; // only used when G1SmoothConcRefine is on - int prev_buffer_num; // only used when G1SmoothConcRefine is on - - if (G1SmoothConcRefine) { - lower_limit = 0; - start_vtime_sec = os::elapsedVTime(); - prev_buffer_num = (int) dcqs.completed_buffers_num(); - } else { - lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now. - } - while (dcqs.apply_closure_to_completed_buffer(0, lower_limit)) { - double end_vtime_sec; - double elapsed_vtime_sec; - int elapsed_vtime_ms; - int curr_buffer_num; - - if (G1SmoothConcRefine) { - end_vtime_sec = os::elapsedVTime(); - elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; - elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0); - curr_buffer_num = (int) dcqs.completed_buffers_num(); - - if (curr_buffer_num > prev_buffer_num || - curr_buffer_num > DCQBarrierProcessCompletedThreshold) { - decreaseInterval(elapsed_vtime_ms); - } else if (curr_buffer_num < prev_buffer_num) { - increaseInterval(elapsed_vtime_ms); - } - } - - sample_young_list_rs_lengths(); - _co_tracker.update(false); - - if (G1SmoothConcRefine) { - prev_buffer_num = curr_buffer_num; - _sts.leave(); - os::sleep(Thread::current(), (jlong) _interval_ms, false); - _sts.join(); - start_vtime_sec = os::elapsedVTime(); - } - n_logs++; - } - // Make sure we harvest the PYA, if any. - (void)_cg1r->get_pya(); - } - _sts.leave(); -} - void ConcurrentG1RefineThread::sample_young_list_rs_lengths() { G1CollectedHeap* g1h = G1CollectedHeap::heap(); G1CollectorPolicy* g1p = g1h->g1_policy(); @@ -184,15 +80,97 @@ _co_tracker.start(); while (!_should_terminate) { - // wait until started is set. - if (G1RSBarrierUseQueue) { - queueBasedRefinement(); + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + // Wait for completed log buffers to exist. + { + MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); + while (((_worker_id == 0 && !dcqs.process_completed_buffers()) || + (_worker_id > 0 && !is_active())) && + !_should_terminate) { + DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag); + } + } + + if (_should_terminate) { + return; + } + + // Now we take them off (this doesn't hold locks while it applies + // closures.) (If we did a full collection, then we'll do a full + // traversal. + _sts.join(); + int n_logs = 0; + int lower_limit = 0; + double start_vtime_sec; // only used when G1SmoothConcRefine is on + int prev_buffer_num; // only used when G1SmoothConcRefine is on + // This thread activation threshold + int threshold = DCQBarrierProcessCompletedThreshold * _worker_id; + // Next thread activation threshold + int next_threshold = threshold + DCQBarrierProcessCompletedThreshold; + int deactivation_threshold = MAX2<int>(threshold - DCQBarrierProcessCompletedThreshold / 2, 0); + + if (G1SmoothConcRefine) { + lower_limit = 0; + start_vtime_sec = os::elapsedVTime(); + prev_buffer_num = (int) dcqs.completed_buffers_num(); } else { - traversalBasedRefinement(); + lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now. } - _sts.join(); - _co_tracker.update(); + while (dcqs.apply_closure_to_completed_buffer(_worker_id + _worker_id_offset, lower_limit)) { + double end_vtime_sec; + double elapsed_vtime_sec; + int elapsed_vtime_ms; + int curr_buffer_num = (int) dcqs.completed_buffers_num(); + + if (G1SmoothConcRefine) { + end_vtime_sec = os::elapsedVTime(); + elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; + elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0); + + if (curr_buffer_num > prev_buffer_num || + curr_buffer_num > next_threshold) { + decreaseInterval(elapsed_vtime_ms); + } else if (curr_buffer_num < prev_buffer_num) { + increaseInterval(elapsed_vtime_ms); + } + } + if (_worker_id == 0) { + sample_young_list_rs_lengths(); + } else if (curr_buffer_num < deactivation_threshold) { + // If the number of the buffer has fallen below our threshold + // we should deactivate. The predecessor will reactivate this + // thread should the number of the buffers cross the threshold again. + MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); + deactivate(); + if (G1TraceConcurrentRefinement) { + gclog_or_tty->print_cr("G1-Refine-deactivated worker %d", _worker_id); + } + break; + } + _co_tracker.update(false); + + // Check if we need to activate the next thread. + if (curr_buffer_num > next_threshold && _next != NULL && !_next->is_active()) { + MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); + _next->activate(); + DirtyCardQ_CBL_mon->notify_all(); + if (G1TraceConcurrentRefinement) { + gclog_or_tty->print_cr("G1-Refine-activated worker %d", _next->_worker_id); + } + } + + if (G1SmoothConcRefine) { + prev_buffer_num = curr_buffer_num; + _sts.leave(); + os::sleep(Thread::current(), (jlong) _interval_ms, false); + _sts.join(); + start_vtime_sec = os::elapsedVTime(); + } + n_logs++; + } + _co_tracker.update(false); _sts.leave(); + if (os::supports_vtime()) { _vtime_accum = (os::elapsedVTime() - _vtime_start); } else { @@ -240,7 +218,3 @@ Thread::print(); gclog_or_tty->cr(); } - -void ConcurrentG1RefineThread::set_do_traversal(bool b) { - _do_traversal = b; -}
--- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp Wed Jul 01 15:06:54 2009 -0700 @@ -33,21 +33,27 @@ double _vtime_start; // Initial virtual time. double _vtime_accum; // Initial virtual time. + int _worker_id; + int _worker_id_offset; + // The refinement threads collection is linked list. A predecessor can activate a successor + // when the number of the rset update buffer crosses a certain threshold. A successor + // would self-deactivate when the number of the buffers falls below the threshold. + bool _active; + ConcurrentG1RefineThread * _next; public: virtual void run(); + bool is_active() { return _active; } + void activate() { _active = true; } + void deactivate() { _active = false; } + private: ConcurrentG1Refine* _cg1r; - bool _started; - bool _in_progress; - volatile bool _restart; COTracker _co_tracker; double _interval_ms; - bool _do_traversal; - void decreaseInterval(int processing_time_ms) { double min_interval_ms = (double) processing_time_ms; _interval_ms = 0.8 * _interval_ms; @@ -63,16 +69,13 @@ void sleepBeforeNextCycle(); - void traversalBasedRefinement(); - - void queueBasedRefinement(); - // For use by G1CollectedHeap, which is a friend. static SuspendibleThreadSet* sts() { return &_sts; } public: // Constructor - ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r); + ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread* next, + int worker_id_offset, int worker_id); // Printing void print(); @@ -82,23 +85,11 @@ ConcurrentG1Refine* cg1r() { return _cg1r; } - - void set_started() { _started = true; } - void clear_started() { _started = false; } - bool started() { return _started; } - - void set_in_progress() { _in_progress = true; } - void clear_in_progress() { _in_progress = false; } - bool in_progress() { return _in_progress; } - - void set_do_traversal(bool b); - bool do_traversal() { return _do_traversal; } - void sample_young_list_rs_lengths(); // Yield for GC void yield(); // shutdown - static void stop(); + void stop(); };
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Wed Jul 01 15:06:54 2009 -0700 @@ -1157,6 +1157,13 @@ } else { // We're done with marking. JavaThread::satb_mark_queue_set().set_active_all_threads(false); + + if (VerifyDuringGC) { + g1h->prepare_for_verify(); + g1h->verify(/* allow_dirty */ true, + /* silent */ false, + /* use_prev_marking */ false); + } } #if VERIFY_OBJS_PROCESSED @@ -1747,12 +1754,12 @@ // races with it goes around and waits for completeCleanup to finish. g1h->increment_total_collections(); -#ifndef PRODUCT if (VerifyDuringGC) { - G1CollectedHeap::heap()->prepare_for_verify(); - G1CollectedHeap::heap()->verify(true,false); + g1h->prepare_for_verify(); + g1h->verify(/* allow_dirty */ true, + /* silent */ false, + /* use_prev_marking */ true); } -#endif } void ConcurrentMark::completeCleanup() {
--- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp Wed Jul 01 15:06:54 2009 -0700 @@ -80,5 +80,5 @@ void yield(); // shutdown - static void stop(); + void stop(); };
--- a/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp Wed Jul 01 15:06:54 2009 -0700 @@ -73,7 +73,7 @@ // while holding the ZF_needed_mon lock. // shutdown - static void stop(); + void stop(); // Stats static void note_region_alloc() {_region_allocs++; }
--- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp Wed Jul 01 15:06:54 2009 -0700 @@ -71,11 +71,11 @@ _all_active = true; } +// Determines how many mutator threads can process the buffers in parallel. size_t DirtyCardQueueSet::num_par_ids() { - return MAX2(ParallelGCThreads, (size_t)2); + return os::processor_count(); } - void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock, int max_completed_queue, Mutex* lock, PtrQueueSet* fl_owner) { @@ -85,8 +85,6 @@ _shared_dirty_card_queue.set_lock(lock); _free_ids = new FreeIdSet((int) num_par_ids(), _cbl_mon); - bool b = _free_ids->claim_perm_id(0); - guarantee(b, "Must reserve id zero for concurrent refinement thread."); } void DirtyCardQueueSet::handle_zero_index_for_thread(JavaThread* t) { @@ -234,7 +232,7 @@ nd = get_completed_buffer_lock(stop_at); } bool res = apply_closure_to_completed_buffer_helper(worker_i, nd); - if (res) _processed_buffers_rs_thread++; + if (res) Atomic::inc(&_processed_buffers_rs_thread); return res; }
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Wed Jul 01 15:06:54 2009 -0700 @@ -446,8 +446,61 @@ gclog_or_tty->print_cr(""); } +void G1CollectedHeap::push_dirty_cards_region(HeapRegion* hr) +{ + // Claim the right to put the region on the dirty cards region list + // by installing a self pointer. + HeapRegion* next = hr->get_next_dirty_cards_region(); + if (next == NULL) { + HeapRegion* res = (HeapRegion*) + Atomic::cmpxchg_ptr(hr, hr->next_dirty_cards_region_addr(), + NULL); + if (res == NULL) { + HeapRegion* head; + do { + // Put the region to the dirty cards region list. + head = _dirty_cards_region_list; + next = (HeapRegion*) + Atomic::cmpxchg_ptr(hr, &_dirty_cards_region_list, head); + if (next == head) { + assert(hr->get_next_dirty_cards_region() == hr, + "hr->get_next_dirty_cards_region() != hr"); + if (next == NULL) { + // The last region in the list points to itself. + hr->set_next_dirty_cards_region(hr); + } else { + hr->set_next_dirty_cards_region(next); + } + } + } while (next != head); + } + } +} + +HeapRegion* G1CollectedHeap::pop_dirty_cards_region() +{ + HeapRegion* head; + HeapRegion* hr; + do { + head = _dirty_cards_region_list; + if (head == NULL) { + return NULL; + } + HeapRegion* new_head = head->get_next_dirty_cards_region(); + if (head == new_head) { + // The last region. + new_head = NULL; + } + hr = (HeapRegion*)Atomic::cmpxchg_ptr(new_head, &_dirty_cards_region_list, + head); + } while (hr != head); + assert(hr != NULL, "invariant"); + hr->set_next_dirty_cards_region(NULL); + return hr; +} + void G1CollectedHeap::stop_conc_gc_threads() { - _cg1r->cg1rThread()->stop(); + _cg1r->stop(); _czft->stop(); _cmThread->stop(); } @@ -1001,12 +1054,8 @@ gc_epilogue(true); - // Abandon concurrent refinement. This must happen last: in the - // dirty-card logging system, some cards may be dirty by weak-ref - // processing, and may be enqueued. But the whole card table is - // dirtied, so this should abandon those logs, and set "do_traversal" - // to true. - concurrent_g1_refine()->set_pya_restart(); + // Discard all rset updates + JavaThread::dirty_card_queue_set().abandon_logs(); assert(!G1DeferredRSUpdate || (G1DeferredRSUpdate && (dirty_card_queue_set().completed_buffers_num() == 0)), "Should not be any"); assert(regions_accounted_for(), "Region leakage!"); @@ -1333,7 +1382,8 @@ _gc_time_stamp(0), _surviving_young_words(NULL), _in_cset_fast_test(NULL), - _in_cset_fast_test_base(NULL) { + _in_cset_fast_test_base(NULL), + _dirty_cards_region_list(NULL) { _g1h = this; // To catch bugs. if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) { vm_exit_during_initialization("Failed necessary allocation."); @@ -1485,6 +1535,15 @@ guarantee(_hrs != NULL, "Couldn't allocate HeapRegionSeq"); guarantee(_cur_alloc_region == NULL, "from constructor"); + // 6843694 - ensure that the maximum region index can fit + // in the remembered set structures. + const size_t max_region_idx = ((size_t)1 << (sizeof(RegionIdx_t)*BitsPerByte-1)) - 1; + guarantee((max_regions() - 1) <= max_region_idx, "too many regions"); + + const size_t cards_per_region = HeapRegion::GrainBytes >> CardTableModRefBS::card_shift; + size_t max_cards_per_region = ((size_t)1 << (sizeof(CardIdx_t)*BitsPerByte-1)) - 1; + guarantee(cards_per_region < max_cards_per_region, "too many cards per region"); + _bot_shared = new G1BlockOffsetSharedArray(_reserved, heap_word_size(init_byte_size)); @@ -1521,12 +1580,12 @@ SATB_Q_FL_lock, 0, Shared_SATB_Q_lock); - if (G1RSBarrierUseQueue) { - JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, - DirtyCardQ_FL_lock, - G1DirtyCardQueueMax, - Shared_DirtyCardQ_lock); - } + + JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, + DirtyCardQ_FL_lock, + G1DirtyCardQueueMax, + Shared_DirtyCardQ_lock); + if (G1DeferredRSUpdate) { dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, DirtyCardQ_FL_lock, @@ -2077,17 +2136,22 @@ }; class VerifyObjsInRegionClosure: public ObjectClosure { +private: G1CollectedHeap* _g1h; size_t _live_bytes; HeapRegion *_hr; + bool _use_prev_marking; public: - VerifyObjsInRegionClosure(HeapRegion *hr) : _live_bytes(0), _hr(hr) { + // use_prev_marking == true -> use "prev" marking information, + // use_prev_marking == false -> use "next" marking information + VerifyObjsInRegionClosure(HeapRegion *hr, bool use_prev_marking) + : _live_bytes(0), _hr(hr), _use_prev_marking(use_prev_marking) { _g1h = G1CollectedHeap::heap(); } void do_object(oop o) { VerifyLivenessOopClosure isLive(_g1h); assert(o != NULL, "Huh?"); - if (!_g1h->is_obj_dead(o)) { + if (!_g1h->is_obj_dead_cond(o, _use_prev_marking)) { o->oop_iterate(&isLive); if (!_hr->obj_allocated_since_prev_marking(o)) _live_bytes += (o->size() * HeapWordSize); @@ -2126,17 +2190,22 @@ }; class VerifyRegionClosure: public HeapRegionClosure { -public: +private: bool _allow_dirty; bool _par; - VerifyRegionClosure(bool allow_dirty, bool par = false) - : _allow_dirty(allow_dirty), _par(par) {} + bool _use_prev_marking; +public: + // use_prev_marking == true -> use "prev" marking information, + // use_prev_marking == false -> use "next" marking information + VerifyRegionClosure(bool allow_dirty, bool par, bool use_prev_marking) + : _allow_dirty(allow_dirty), _par(par), + _use_prev_marking(use_prev_marking) {} bool doHeapRegion(HeapRegion* r) { guarantee(_par || r->claim_value() == HeapRegion::InitialClaimValue, "Should be unclaimed at verify points."); if (!r->continuesHumongous()) { - VerifyObjsInRegionClosure not_dead_yet_cl(r); - r->verify(_allow_dirty); + VerifyObjsInRegionClosure not_dead_yet_cl(r, _use_prev_marking); + r->verify(_allow_dirty, _use_prev_marking); r->object_iterate(¬_dead_yet_cl); guarantee(r->max_live_bytes() >= not_dead_yet_cl.live_bytes(), "More live objects than counted in last complete marking."); @@ -2149,10 +2218,13 @@ private: G1CollectedHeap* _g1h; bool _failures; - + bool _use_prev_marking; public: - VerifyRootsClosure() : - _g1h(G1CollectedHeap::heap()), _failures(false) { } + // use_prev_marking == true -> use "prev" marking information, + // use_prev_marking == false -> use "next" marking information + VerifyRootsClosure(bool use_prev_marking) : + _g1h(G1CollectedHeap::heap()), _failures(false), + _use_prev_marking(use_prev_marking) { } bool failures() { return _failures; } @@ -2163,7 +2235,7 @@ void do_oop(oop* p) { oop obj = *p; if (obj != NULL) { - if (_g1h->is_obj_dead(obj)) { + if (_g1h->is_obj_dead_cond(obj, _use_prev_marking)) { gclog_or_tty->print_cr("Root location "PTR_FORMAT" " "points to dead obj "PTR_FORMAT, p, (void*) obj); obj->print_on(gclog_or_tty); @@ -2179,24 +2251,35 @@ private: G1CollectedHeap* _g1h; bool _allow_dirty; + bool _use_prev_marking; public: - G1ParVerifyTask(G1CollectedHeap* g1h, bool allow_dirty) : + // use_prev_marking == true -> use "prev" marking information, + // use_prev_marking == false -> use "next" marking information + G1ParVerifyTask(G1CollectedHeap* g1h, bool allow_dirty, + bool use_prev_marking) : AbstractGangTask("Parallel verify task"), - _g1h(g1h), _allow_dirty(allow_dirty) { } + _g1h(g1h), _allow_dirty(allow_dirty), + _use_prev_marking(use_prev_marking) { } void work(int worker_i) { HandleMark hm; - VerifyRegionClosure blk(_allow_dirty, true); + VerifyRegionClosure blk(_allow_dirty, true, _use_prev_marking); _g1h->heap_region_par_iterate_chunked(&blk, worker_i, HeapRegion::ParVerifyClaimValue); } }; void G1CollectedHeap::verify(bool allow_dirty, bool silent) { + verify(allow_dirty, silent, /* use_prev_marking */ true); +} + +void G1CollectedHeap::verify(bool allow_dirty, + bool silent, + bool use_prev_marking) { if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) { if (!silent) { gclog_or_tty->print("roots "); } - VerifyRootsClosure rootsCl; + VerifyRootsClosure rootsCl(use_prev_marking); process_strong_roots(false, SharedHeap::SO_AllClasses, &rootsCl, @@ -2207,7 +2290,7 @@ assert(check_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity check"); - G1ParVerifyTask task(this, allow_dirty); + G1ParVerifyTask task(this, allow_dirty, use_prev_marking); int n_workers = workers()->total_workers(); set_par_threads(n_workers); workers()->run_task(&task); @@ -2221,7 +2304,7 @@ assert(check_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity check"); } else { - VerifyRegionClosure blk(allow_dirty); + VerifyRegionClosure blk(allow_dirty, false, use_prev_marking); _hrs->iterate(&blk); } if (!silent) gclog_or_tty->print("remset "); @@ -2249,6 +2332,15 @@ _hrs->iterate(&blk); } +class PrintOnThreadsClosure : public ThreadClosure { + outputStream* _st; +public: + PrintOnThreadsClosure(outputStream* st) : _st(st) { } + virtual void do_thread(Thread *t) { + t->print_on(_st); + } +}; + void G1CollectedHeap::print_gc_threads_on(outputStream* st) const { if (ParallelGCThreads > 0) { workers()->print_worker_threads(); @@ -2256,8 +2348,9 @@ st->print("\"G1 concurrent mark GC Thread\" "); _cmThread->print(); st->cr(); - st->print("\"G1 concurrent refinement GC Thread\" "); - _cg1r->cg1rThread()->print_on(st); + st->print("\"G1 concurrent refinement GC Threads\" "); + PrintOnThreadsClosure p(st); + _cg1r->threads_do(&p); st->cr(); st->print("\"G1 zero-fill GC Thread\" "); _czft->print_on(st); @@ -2269,7 +2362,7 @@ workers()->threads_do(tc); } tc->do_thread(_cmThread); - tc->do_thread(_cg1r->cg1rThread()); + _cg1r->threads_do(tc); tc->do_thread(_czft); } @@ -4685,15 +4778,58 @@ } } + +class G1ParCleanupCTTask : public AbstractGangTask { + CardTableModRefBS* _ct_bs; + G1CollectedHeap* _g1h; +public: + G1ParCleanupCTTask(CardTableModRefBS* ct_bs, + G1CollectedHeap* g1h) : + AbstractGangTask("G1 Par Cleanup CT Task"), + _ct_bs(ct_bs), + _g1h(g1h) + { } + + void work(int i) { + HeapRegion* r; + while (r = _g1h->pop_dirty_cards_region()) { + clear_cards(r); + } + } + void clear_cards(HeapRegion* r) { + // Cards for Survivor and Scan-Only regions will be dirtied later. + if (!r->is_scan_only() && !r->is_survivor()) { + _ct_bs->clear(MemRegion(r->bottom(), r->end())); + } + } +}; + + void G1CollectedHeap::cleanUpCardTable() { CardTableModRefBS* ct_bs = (CardTableModRefBS*) (barrier_set()); double start = os::elapsedTime(); - ct_bs->clear(_g1_committed); - + // Iterate over the dirty cards region list. + G1ParCleanupCTTask cleanup_task(ct_bs, this); + if (ParallelGCThreads > 0) { + set_par_threads(workers()->total_workers()); + workers()->run_task(&cleanup_task); + set_par_threads(0); + } else { + while (_dirty_cards_region_list) { + HeapRegion* r = _dirty_cards_region_list; + cleanup_task.clear_cards(r); + _dirty_cards_region_list = r->get_next_dirty_cards_region(); + if (_dirty_cards_region_list == r) { + // The last region. + _dirty_cards_region_list = NULL; + } + r->set_next_dirty_cards_region(NULL); + } + } // now, redirty the cards of the scan-only and survivor regions // (it seemed faster to do it this way, instead of iterating over - // all regions and then clearing / dirtying as approprite) + // all regions and then clearing / dirtying as appropriate) dirtyCardsForYoungRegions(ct_bs, _young_list->first_scan_only_region()); dirtyCardsForYoungRegions(ct_bs, _young_list->first_survivor_region());
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Wed Jul 01 15:06:54 2009 -0700 @@ -59,6 +59,9 @@ typedef GenericTaskQueue<oop*> RefToScanQueue; typedef GenericTaskQueueSet<oop*> RefToScanQueueSet; +typedef int RegionIdx_t; // needs to hold [ 0..max_regions() ) +typedef int CardIdx_t; // needs to hold [ 0..CardsPerRegion ) + enum G1GCThreadGroups { G1CRGroup = 0, G1ZFGroup = 1, @@ -158,6 +161,7 @@ friend class RegionSorter; friend class CountRCClosure; friend class EvacPopObjClosure; + friend class G1ParCleanupCTTask; // Other related classes. friend class G1MarkSweep; @@ -1045,6 +1049,17 @@ virtual void prepare_for_verify(); // Perform verification. + + // use_prev_marking == true -> use "prev" marking information, + // use_prev_marking == false -> use "next" marking information + // NOTE: Only the "prev" marking information is guaranteed to be + // consistent most of the time, so most calls to this should use + // use_prev_marking == true. Currently, there is only one case where + // this is called with use_prev_marking == false, which is to verify + // the "next" marking information at the end of remark. + void verify(bool allow_dirty, bool silent, bool use_prev_marking); + + // Override; it uses the "prev" marking information virtual void verify(bool allow_dirty, bool silent); virtual void print() const; virtual void print_on(outputStream* st) const; @@ -1121,6 +1136,18 @@ bool isMarkedPrev(oop obj) const; bool isMarkedNext(oop obj) const; + // use_prev_marking == true -> use "prev" marking information, + // use_prev_marking == false -> use "next" marking information + bool is_obj_dead_cond(const oop obj, + const HeapRegion* hr, + const bool use_prev_marking) const { + if (use_prev_marking) { + return is_obj_dead(obj, hr); + } else { + return is_obj_ill(obj, hr); + } + } + // Determine if an object is dead, given the object and also // the region to which the object belongs. An object is dead // iff a) it was not allocated since the last mark and b) it @@ -1158,8 +1185,19 @@ // Added if it is in permanent gen it isn't dead. // Added if it is NULL it isn't dead. - bool is_obj_dead(oop obj) { - HeapRegion* hr = heap_region_containing(obj); + // use_prev_marking == true -> use "prev" marking information, + // use_prev_marking == false -> use "next" marking information + bool is_obj_dead_cond(const oop obj, + const bool use_prev_marking) { + if (use_prev_marking) { + return is_obj_dead(obj); + } else { + return is_obj_ill(obj); + } + } + + bool is_obj_dead(const oop obj) { + const HeapRegion* hr = heap_region_containing(obj); if (hr == NULL) { if (Universe::heap()->is_in_permanent(obj)) return false; @@ -1169,8 +1207,8 @@ else return is_obj_dead(obj, hr); } - bool is_obj_ill(oop obj) { - HeapRegion* hr = heap_region_containing(obj); + bool is_obj_ill(const oop obj) { + const HeapRegion* hr = heap_region_containing(obj); if (hr == NULL) { if (Universe::heap()->is_in_permanent(obj)) return false; @@ -1191,6 +1229,16 @@ ConcurrentMark* concurrent_mark() const { return _cm; } ConcurrentG1Refine* concurrent_g1_refine() const { return _cg1r; } + // The dirty cards region list is used to record a subset of regions + // whose cards need clearing. The list if populated during the + // remembered set scanning and drained during the card table + // cleanup. Although the methods are reentrant, population/draining + // phases must not overlap. For synchronization purposes the last + // element on the list points to itself. + HeapRegion* _dirty_cards_region_list; + void push_dirty_cards_region(HeapRegion* hr); + HeapRegion* pop_dirty_cards_region(); + public: void stop_conc_gc_threads();
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Wed Jul 01 15:06:54 2009 -0700 @@ -167,11 +167,6 @@ _all_full_gc_times_ms(new NumberSeq()), - _conc_refine_enabled(0), - _conc_refine_zero_traversals(0), - _conc_refine_max_traversals(0), - _conc_refine_current_delta(G1ConcRefineInitialDelta), - // G1PausesBtwnConcMark defaults to -1 // so the hack is to do the cast QQQ FIXME _pauses_btwn_concurrent_mark((size_t)G1PausesBtwnConcMark), @@ -1634,9 +1629,8 @@ print_stats(1, "Parallel Time", _cur_collection_par_time_ms); print_par_stats(2, "Update RS (Start)", _par_last_update_rs_start_times_ms, false); print_par_stats(2, "Update RS", _par_last_update_rs_times_ms); - if (G1RSBarrierUseQueue) - print_par_buffers(3, "Processed Buffers", - _par_last_update_rs_processed_buffers, true); + print_par_buffers(3, "Processed Buffers", + _par_last_update_rs_processed_buffers, true); print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms); print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms); print_par_stats(2, "Scan-Only Scanning", _par_last_scan_only_times_ms); @@ -1649,9 +1643,8 @@ print_stats(1, "Clear CT", _cur_clear_ct_time_ms); } else { print_stats(1, "Update RS", update_rs_time); - if (G1RSBarrierUseQueue) - print_stats(2, "Processed Buffers", - (int)update_rs_processed_buffers); + print_stats(2, "Processed Buffers", + (int)update_rs_processed_buffers); print_stats(1, "Ext Root Scanning", ext_root_scan_time); print_stats(1, "Mark Stack Scanning", mark_stack_scan_time); print_stats(1, "Scan-Only Scanning", scan_only_time); @@ -2467,18 +2460,6 @@ (double) _region_num_young / (double) all_region_num * 100.0, _region_num_tenured, (double) _region_num_tenured / (double) all_region_num * 100.0); - - if (!G1RSBarrierUseQueue) { - gclog_or_tty->print_cr("Of %d times conc refinement was enabled, %d (%7.2f%%) " - "did zero traversals.", - _conc_refine_enabled, _conc_refine_zero_traversals, - _conc_refine_enabled > 0 ? - 100.0 * (float)_conc_refine_zero_traversals/ - (float)_conc_refine_enabled : 0.0); - gclog_or_tty->print_cr(" Max # of traversals = %d.", - _conc_refine_max_traversals); - gclog_or_tty->print_cr(""); - } } if (TraceGen1Time) { if (_all_full_gc_times_ms->num() > 0) { @@ -2500,38 +2481,6 @@ #endif // PRODUCT } -void G1CollectorPolicy::update_conc_refine_data() { - unsigned traversals = _g1->concurrent_g1_refine()->disable(); - if (traversals == 0) _conc_refine_zero_traversals++; - _conc_refine_max_traversals = MAX2(_conc_refine_max_traversals, - (size_t)traversals); - - if (G1PolicyVerbose > 1) - gclog_or_tty->print_cr("Did a CR traversal series: %d traversals.", traversals); - double multiplier = 1.0; - if (traversals == 0) { - multiplier = 4.0; - } else if (traversals > (size_t)G1ConcRefineTargTraversals) { - multiplier = 1.0/1.5; - } else if (traversals < (size_t)G1ConcRefineTargTraversals) { - multiplier = 1.5; - } - if (G1PolicyVerbose > 1) { - gclog_or_tty->print_cr(" Multiplier = %7.2f.", multiplier); - gclog_or_tty->print(" Delta went from %d regions to ", - _conc_refine_current_delta); - } - _conc_refine_current_delta = - MIN2(_g1->n_regions(), - (size_t)(_conc_refine_current_delta * multiplier)); - _conc_refine_current_delta = - MAX2(_conc_refine_current_delta, (size_t)1); - if (G1PolicyVerbose > 1) { - gclog_or_tty->print_cr("%d regions.", _conc_refine_current_delta); - } - _conc_refine_enabled++; -} - bool G1CollectorPolicy::should_add_next_region_to_young_list() { assert(in_young_gc_mode(), "should be in young GC mode");
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Wed Jul 01 15:06:54 2009 -0700 @@ -637,18 +637,6 @@ // The number of collection pauses at the end of the last mark. size_t _n_pauses_at_mark_end; - // ==== This section is for stats related to starting Conc Refinement on time. - size_t _conc_refine_enabled; - size_t _conc_refine_zero_traversals; - size_t _conc_refine_max_traversals; - // In # of heap regions. - size_t _conc_refine_current_delta; - - // At the beginning of a collection pause, update the variables above, - // especially the "delta". - void update_conc_refine_data(); - // ==== - // Stash a pointer to the g1 heap. G1CollectedHeap* _g1;
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp Wed Jul 01 15:06:54 2009 -0700 @@ -105,28 +105,6 @@ _g1->heap_region_iterate(&rc); } -class UpdateRSOutOfRegionClosure: public HeapRegionClosure { - G1CollectedHeap* _g1h; - ModRefBarrierSet* _mr_bs; - UpdateRSOopClosure _cl; - int _worker_i; -public: - UpdateRSOutOfRegionClosure(G1CollectedHeap* g1, int worker_i = 0) : - _cl(g1->g1_rem_set()->as_HRInto_G1RemSet(), worker_i), - _mr_bs(g1->mr_bs()), - _worker_i(worker_i), - _g1h(g1) - {} - bool doHeapRegion(HeapRegion* r) { - if (!r->in_collection_set() && !r->continuesHumongous()) { - _cl.set_from(r); - r->set_next_filter_kind(HeapRegionDCTOC::OutOfRegionFilterKind); - _mr_bs->mod_oop_in_space_iterate(r, &_cl, true, true); - } - return false; - } -}; - class VerifyRSCleanCardOopClosure: public OopClosure { G1CollectedHeap* _g1; public: @@ -241,6 +219,7 @@ HeapRegionRemSet* hrrs = r->rem_set(); if (hrrs->iter_is_complete()) return false; // All done. if (!_try_claimed && !hrrs->claim_iter()) return false; + _g1h->push_dirty_cards_region(r); // If we didn't return above, then // _try_claimed || r->claim_iter() // is true: either we're supposed to work on claimed-but-not-complete @@ -264,6 +243,10 @@ assert(card_region != NULL, "Yielding cards not in the heap?"); _cards++; + if (!card_region->is_on_dirty_cards_region_list()) { + _g1h->push_dirty_cards_region(card_region); + } + // If the card is dirty, then we will scan it during updateRS. if (!card_region->in_collection_set() && !_ct_bs->is_card_dirty(card_index)) { if (!_ct_bs->is_card_claimed(card_index) && _ct_bs->claim_card(card_index)) { @@ -350,30 +333,17 @@ double start = os::elapsedTime(); _g1p->record_update_rs_start_time(worker_i, start * 1000.0); - if (G1RSBarrierUseQueue && !cg1r->do_traversal()) { - // Apply the appropriate closure to all remaining log entries. - _g1->iterate_dirty_card_closure(false, worker_i); - // Now there should be no dirty cards. - if (G1RSLogCheckCardTable) { - CountNonCleanMemRegionClosure cl(_g1); - _ct_bs->mod_card_iterate(&cl); - // XXX This isn't true any more: keeping cards of young regions - // marked dirty broke it. Need some reasonable fix. - guarantee(cl.n() == 0, "Card table should be clean."); - } - } else { - UpdateRSOutOfRegionClosure update_rs(_g1, worker_i); - _g1->heap_region_iterate(&update_rs); - // We did a traversal; no further one is necessary. - if (G1RSBarrierUseQueue) { - assert(cg1r->do_traversal(), "Or we shouldn't have gotten here."); - cg1r->set_pya_cancel(); - } - if (_cg1r->use_cache()) { - _cg1r->clear_and_record_card_counts(); - _cg1r->clear_hot_cache(); - } + // Apply the appropriate closure to all remaining log entries. + _g1->iterate_dirty_card_closure(false, worker_i); + // Now there should be no dirty cards. + if (G1RSLogCheckCardTable) { + CountNonCleanMemRegionClosure cl(_g1); + _ct_bs->mod_card_iterate(&cl); + // XXX This isn't true any more: keeping cards of young regions + // marked dirty broke it. Need some reasonable fix. + guarantee(cl.n() == 0, "Card table should be clean."); } + _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0); } @@ -486,11 +456,6 @@ * 1000.0); } -void HRInto_G1RemSet::set_par_traversal(bool b) { - _par_traversal_in_progress = b; - HeapRegionRemSet::set_par_traversal(b); -} - void HRInto_G1RemSet::cleanupHRRS() { HeapRegionRemSet::cleanup(); } @@ -527,7 +492,7 @@ updateRS(worker_i); scanNewRefsRS(oc, worker_i); } else { - _g1p->record_update_rs_start_time(worker_i, os::elapsedTime()); + _g1p->record_update_rs_start_time(worker_i, os::elapsedTime() * 1000.0); _g1p->record_update_rs_processed_buffers(worker_i, 0.0); _g1p->record_update_rs_time(worker_i, 0.0); _g1p->record_scan_new_refs_time(worker_i, 0.0); @@ -535,7 +500,7 @@ if (G1ParallelRSetScanningEnabled || (worker_i == 0)) { scanRS(oc, worker_i); } else { - _g1p->record_scan_rs_start_time(worker_i, os::elapsedTime()); + _g1p->record_scan_rs_start_time(worker_i, os::elapsedTime() * 1000.0); _g1p->record_scan_rs_time(worker_i, 0.0); } } else { @@ -562,11 +527,6 @@ if (ParallelGCThreads > 0) { set_par_traversal(true); _seq_task->set_par_threads((int)n_workers()); - if (cg1r->do_traversal()) { - updateRS(0); - // Have to do this again after updaters - cleanupHRRS(); - } } guarantee( _cards_scanned == NULL, "invariant" ); _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers()); @@ -647,11 +607,8 @@ _g1->collection_set_iterate(&iterClosure); // Set all cards back to clean. _g1->cleanUpCardTable(); + if (ParallelGCThreads > 0) { - ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine(); - if (cg1r->do_traversal()) { - cg1r->cg1rThread()->set_do_traversal(false); - } set_par_traversal(false); } @@ -721,139 +678,8 @@ } -class ConcRefineRegionClosure: public HeapRegionClosure { - G1CollectedHeap* _g1h; - CardTableModRefBS* _ctbs; - ConcurrentGCThread* _cgc_thrd; - ConcurrentG1Refine* _cg1r; - unsigned _cards_processed; - UpdateRSOopClosure _update_rs_oop_cl; -public: - ConcRefineRegionClosure(CardTableModRefBS* ctbs, - ConcurrentG1Refine* cg1r, - HRInto_G1RemSet* g1rs) : - _ctbs(ctbs), _cg1r(cg1r), _cgc_thrd(cg1r->cg1rThread()), - _update_rs_oop_cl(g1rs), _cards_processed(0), - _g1h(G1CollectedHeap::heap()) - {} - - bool doHeapRegion(HeapRegion* r) { - if (!r->in_collection_set() && - !r->continuesHumongous() && - !r->is_young()) { - _update_rs_oop_cl.set_from(r); - UpdateRSObjectClosure update_rs_obj_cl(&_update_rs_oop_cl); - - // For each run of dirty card in the region: - // 1) Clear the cards. - // 2) Process the range corresponding to the run, adding any - // necessary RS entries. - // 1 must precede 2, so that a concurrent modification redirties the - // card. If a processing attempt does not succeed, because it runs - // into an unparseable region, we will do binary search to find the - // beginning of the next parseable region. - HeapWord* startAddr = r->bottom(); - HeapWord* endAddr = r->used_region().end(); - HeapWord* lastAddr; - HeapWord* nextAddr; - - for (nextAddr = lastAddr = startAddr; - nextAddr < endAddr; - nextAddr = lastAddr) { - MemRegion dirtyRegion; - - // Get and clear dirty region from card table - MemRegion next_mr(nextAddr, endAddr); - dirtyRegion = - _ctbs->dirty_card_range_after_reset( - next_mr, - true, CardTableModRefBS::clean_card_val()); - assert(dirtyRegion.start() >= nextAddr, - "returned region inconsistent?"); - - if (!dirtyRegion.is_empty()) { - HeapWord* stop_point = - r->object_iterate_mem_careful(dirtyRegion, - &update_rs_obj_cl); - if (stop_point == NULL) { - lastAddr = dirtyRegion.end(); - _cards_processed += - (int) (dirtyRegion.word_size() / CardTableModRefBS::card_size_in_words); - } else { - // We're going to skip one or more cards that we can't parse. - HeapWord* next_parseable_card = - r->next_block_start_careful(stop_point); - // Round this up to a card boundary. - next_parseable_card = - _ctbs->addr_for(_ctbs->byte_after_const(next_parseable_card)); - // Now we invalidate the intervening cards so we'll see them - // again. - MemRegion remaining_dirty = - MemRegion(stop_point, dirtyRegion.end()); - MemRegion skipped = - MemRegion(stop_point, next_parseable_card); - _ctbs->invalidate(skipped.intersection(remaining_dirty)); - - // Now start up again where we can parse. - lastAddr = next_parseable_card; - - // Count how many we did completely. - _cards_processed += - (stop_point - dirtyRegion.start()) / - CardTableModRefBS::card_size_in_words; - } - // Allow interruption at regular intervals. - // (Might need to make them more regular, if we get big - // dirty regions.) - if (_cgc_thrd != NULL) { - if (_cgc_thrd->should_yield()) { - _cgc_thrd->yield(); - switch (_cg1r->get_pya()) { - case PYA_continue: - // This may have changed: re-read. - endAddr = r->used_region().end(); - continue; - case PYA_restart: case PYA_cancel: - return true; - } - } - } - } else { - break; - } - } - } - // A good yield opportunity. - if (_cgc_thrd != NULL) { - if (_cgc_thrd->should_yield()) { - _cgc_thrd->yield(); - switch (_cg1r->get_pya()) { - case PYA_restart: case PYA_cancel: - return true; - default: - break; - } - - } - } - return false; - } - - unsigned cards_processed() { return _cards_processed; } -}; - - -void HRInto_G1RemSet::concurrentRefinementPass(ConcurrentG1Refine* cg1r) { - ConcRefineRegionClosure cr_cl(ct_bs(), cg1r, this); - _g1->heap_region_iterate(&cr_cl); - _conc_refine_traversals++; - _conc_refine_cards += cr_cl.cards_processed(); -} - static IntHistogram out_of_histo(50, 50); - - void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) { // If the card is no longer dirty, nothing to do. if (*card_ptr != CardTableModRefBS::dirty_card_val()) return; @@ -983,10 +809,16 @@ HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; } }; +class PrintRSThreadVTimeClosure : public ThreadClosure { +public: + virtual void do_thread(Thread *t) { + ConcurrentG1RefineThread* crt = (ConcurrentG1RefineThread*) t; + gclog_or_tty->print(" %5.2f", crt->vtime_accum()); + } +}; + void HRInto_G1RemSet::print_summary_info() { G1CollectedHeap* g1 = G1CollectedHeap::heap(); - ConcurrentG1RefineThread* cg1r_thrd = - g1->concurrent_g1_refine()->cg1rThread(); #if CARD_REPEAT_HISTO gclog_or_tty->print_cr("\nG1 card_repeat count histogram: "); @@ -999,15 +831,13 @@ gclog_or_tty->print_cr(" # of CS ptrs --> # of cards with that number."); out_of_histo.print_on(gclog_or_tty); } - gclog_or_tty->print_cr("\n Concurrent RS processed %d cards in " - "%5.2fs.", - _conc_refine_cards, cg1r_thrd->vtime_accum()); - + gclog_or_tty->print_cr("\n Concurrent RS processed %d cards", + _conc_refine_cards); DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); jint tot_processed_buffers = dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread(); gclog_or_tty->print_cr(" Of %d completed buffers:", tot_processed_buffers); - gclog_or_tty->print_cr(" %8d (%5.1f%%) by conc RS thread.", + gclog_or_tty->print_cr(" %8d (%5.1f%%) by conc RS threads.", dcqs.processed_buffers_rs_thread(), 100.0*(float)dcqs.processed_buffers_rs_thread()/ (float)tot_processed_buffers); @@ -1015,15 +845,12 @@ dcqs.processed_buffers_mut(), 100.0*(float)dcqs.processed_buffers_mut()/ (float)tot_processed_buffers); - gclog_or_tty->print_cr(" Did %d concurrent refinement traversals.", - _conc_refine_traversals); - if (!G1RSBarrierUseQueue) { - gclog_or_tty->print_cr(" Scanned %8.2f cards/traversal.", - _conc_refine_traversals > 0 ? - (float)_conc_refine_cards/(float)_conc_refine_traversals : - 0); - } + gclog_or_tty->print_cr(" Conc RS threads times(s)"); + PrintRSThreadVTimeClosure p; + gclog_or_tty->print(" "); + g1->concurrent_g1_refine()->threads_do(&p); gclog_or_tty->print_cr(""); + if (G1UseHRIntoRS) { HRRSStatsIter blk; g1->heap_region_iterate(&blk);
--- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp Wed Jul 01 15:06:54 2009 -0700 @@ -33,15 +33,12 @@ class G1RemSet: public CHeapObj { protected: G1CollectedHeap* _g1; - - unsigned _conc_refine_traversals; unsigned _conc_refine_cards; - size_t n_workers(); public: G1RemSet(G1CollectedHeap* g1) : - _g1(g1), _conc_refine_traversals(0), _conc_refine_cards(0) + _g1(g1), _conc_refine_cards(0) {} // Invoke "blk->do_oop" on all pointers into the CS in object in regions @@ -81,19 +78,11 @@ virtual void scrub_par(BitMap* region_bm, BitMap* card_bm, int worker_num, int claim_val) = 0; - // Do any "refinement" activity that might be appropriate to the given - // G1RemSet. If "refinement" has iterateive "passes", do one pass. - // If "t" is non-NULL, it is the thread performing the refinement. - // Default implementation does nothing. - virtual void concurrentRefinementPass(ConcurrentG1Refine* cg1r) {} - // Refine the card corresponding to "card_ptr". If "sts" is non-NULL, // join and leave around parts that must be atomic wrt GC. (NULL means // being done at a safepoint.) virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {} - unsigned conc_refine_cards() { return _conc_refine_cards; } - // Print any relevant summary info. virtual void print_summary_info() {} @@ -153,7 +142,7 @@ // progress. If so, then cards added to remembered sets should also have // their references into the collection summarized in "_new_refs". bool _par_traversal_in_progress; - void set_par_traversal(bool b); + void set_par_traversal(bool b) { _par_traversal_in_progress = b; } GrowableArray<oop*>** _new_refs; void new_refs_iterate(OopClosure* cl); @@ -194,7 +183,6 @@ void scrub_par(BitMap* region_bm, BitMap* card_bm, int worker_num, int claim_val); - virtual void concurrentRefinementPass(ConcurrentG1Refine* t); virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i); virtual void print_summary_info();
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Wed Jul 01 15:06:54 2009 -0700 @@ -147,9 +147,6 @@ develop(bool, G1PrintCTFilterStats, false, \ "If true, print stats on RS filtering effectiveness") \ \ - develop(bool, G1RSBarrierUseQueue, true, \ - "If true, use queueing RS barrier") \ - \ develop(bool, G1DeferredRSUpdate, true, \ "If true, use deferred RS updates") \ \ @@ -253,6 +250,10 @@ \ experimental(bool, G1ParallelRSetScanningEnabled, false, \ "Enables the parallelization of remembered set scanning " \ - "during evacuation pauses") + "during evacuation pauses") \ + \ + product(uintx, G1ParallelRSetThreads, 0, \ + "If non-0 is the number of parallel rem set update threads, " \ + "otherwise the value is determined ergonomically.") G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp Wed Jul 01 15:06:54 2009 -0700 @@ -40,15 +40,19 @@ {} class VerifyLiveClosure: public OopClosure { +private: G1CollectedHeap* _g1h; CardTableModRefBS* _bs; oop _containing_obj; bool _failures; int _n_failures; + bool _use_prev_marking; public: - VerifyLiveClosure(G1CollectedHeap* g1h) : + // use_prev_marking == true -> use "prev" marking information, + // use_prev_marking == false -> use "next" marking information + VerifyLiveClosure(G1CollectedHeap* g1h, bool use_prev_marking) : _g1h(g1h), _bs(NULL), _containing_obj(NULL), - _failures(false), _n_failures(0) + _failures(false), _n_failures(0), _use_prev_marking(use_prev_marking) { BarrierSet* bs = _g1h->barrier_set(); if (bs->is_a(BarrierSet::CardTableModRef)) @@ -68,11 +72,13 @@ void do_oop(oop* p) { assert(_containing_obj != NULL, "Precondition"); - assert(!_g1h->is_obj_dead(_containing_obj), "Precondition"); + assert(!_g1h->is_obj_dead_cond(_containing_obj, _use_prev_marking), + "Precondition"); oop obj = *p; if (obj != NULL) { bool failed = false; - if (!_g1h->is_in_closed_subset(obj) || _g1h->is_obj_dead(obj)) { + if (!_g1h->is_in_closed_subset(obj) || + _g1h->is_obj_dead_cond(obj, _use_prev_marking)) { if (!_failures) { gclog_or_tty->print_cr(""); gclog_or_tty->print_cr("----------"); @@ -351,6 +357,7 @@ _claimed(InitialClaimValue), _evacuation_failed(false), _prev_marked_bytes(0), _next_marked_bytes(0), _sort_index(-1), _young_type(NotYoung), _next_young_region(NULL), + _next_dirty_cards_region(NULL), _young_index_in_cset(-1), _surv_rate_group(NULL), _age_index(-1), _rem_set(NULL), _zfs(NotZeroFilled) { @@ -646,19 +653,23 @@ G1OffsetTableContigSpace::print_on(st); } +void HeapRegion::verify(bool allow_dirty) const { + verify(allow_dirty, /* use_prev_marking */ true); +} + #define OBJ_SAMPLE_INTERVAL 0 #define BLOCK_SAMPLE_INTERVAL 100 // This really ought to be commoned up into OffsetTableContigSpace somehow. // We would need a mechanism to make that code skip dead objects. -void HeapRegion::verify(bool allow_dirty) const { +void HeapRegion::verify(bool allow_dirty, bool use_prev_marking) const { G1CollectedHeap* g1 = G1CollectedHeap::heap(); HeapWord* p = bottom(); HeapWord* prev_p = NULL; int objs = 0; int blocks = 0; - VerifyLiveClosure vl_cl(g1); + VerifyLiveClosure vl_cl(g1, use_prev_marking); while (p < top()) { size_t size = oop(p)->size(); if (blocks == BLOCK_SAMPLE_INTERVAL) { @@ -670,7 +681,7 @@ } if (objs == OBJ_SAMPLE_INTERVAL) { oop obj = oop(p); - if (!g1->is_obj_dead(obj, this)) { + if (!g1->is_obj_dead_cond(obj, this, use_prev_marking)) { obj->verify(); vl_cl.set_containing_obj(obj); obj->oop_iterate(&vl_cl);
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp Wed Jul 01 15:06:54 2009 -0700 @@ -227,6 +227,9 @@ // next region in the young "generation" region set HeapRegion* _next_young_region; + // Next region whose cards need cleaning + HeapRegion* _next_dirty_cards_region; + // For parallel heapRegion traversal. jint _claimed; @@ -468,6 +471,11 @@ _next_young_region = hr; } + HeapRegion* get_next_dirty_cards_region() const { return _next_dirty_cards_region; } + HeapRegion** next_dirty_cards_region_addr() { return &_next_dirty_cards_region; } + void set_next_dirty_cards_region(HeapRegion* hr) { _next_dirty_cards_region = hr; } + bool is_on_dirty_cards_region_list() const { return get_next_dirty_cards_region() != NULL; } + // Allows logical separation between objects allocated before and after. void save_marks(); @@ -774,7 +782,16 @@ void print() const; void print_on(outputStream* st) const; - // Override + // use_prev_marking == true -> use "prev" marking information, + // use_prev_marking == false -> use "next" marking information + // NOTE: Only the "prev" marking information is guaranteed to be + // consistent most of the time, so most calls to this should use + // use_prev_marking == true. Currently, there is only one case where + // this is called with use_prev_marking == false, which is to verify + // the "next" marking information at the end of remark. + void verify(bool allow_dirty, bool use_prev_marking) const; + + // Override; it uses the "prev" marking information virtual void verify(bool allow_dirty) const; #ifdef DEBUG
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Wed Jul 01 15:06:54 2009 -0700 @@ -109,7 +109,7 @@ return new PerRegionTable(hr); } - void add_card_work(short from_card, bool par) { + void add_card_work(CardIdx_t from_card, bool par) { if (!_bm.at(from_card)) { if (par) { if (_bm.par_at_put(from_card, 1)) { @@ -141,11 +141,11 @@ // and adding a bit to the new table is never incorrect. if (loc_hr->is_in_reserved(from)) { size_t hw_offset = pointer_delta((HeapWord*)from, loc_hr->bottom()); - size_t from_card = - hw_offset >> - (CardTableModRefBS::card_shift - LogHeapWordSize); + CardIdx_t from_card = (CardIdx_t) + hw_offset >> (CardTableModRefBS::card_shift - LogHeapWordSize); - add_card_work((short) from_card, par); + assert(0 <= from_card && from_card < CardsPerRegion, "Must be in range."); + add_card_work(from_card, par); } } @@ -190,11 +190,11 @@ #endif } - void add_card(short from_card_index) { + void add_card(CardIdx_t from_card_index) { add_card_work(from_card_index, /*parallel*/ true); } - void seq_add_card(short from_card_index) { + void seq_add_card(CardIdx_t from_card_index) { add_card_work(from_card_index, /*parallel*/ false); } @@ -604,7 +604,7 @@ // Note that this may be a continued H region. HeapRegion* from_hr = _g1h->heap_region_containing_raw(from); - size_t from_hrs_ind = (size_t)from_hr->hrs_index(); + RegionIdx_t from_hrs_ind = (RegionIdx_t) from_hr->hrs_index(); // If the region is already coarsened, return. if (_coarse_map.at(from_hrs_ind)) { @@ -627,11 +627,11 @@ uintptr_t from_hr_bot_card_index = uintptr_t(from_hr->bottom()) >> CardTableModRefBS::card_shift; - int card_index = from_card - from_hr_bot_card_index; + CardIdx_t card_index = from_card - from_hr_bot_card_index; assert(0 <= card_index && card_index < PosParPRT::CardsPerRegion, "Must be in range."); if (G1HRRSUseSparseTable && - _sparse_table.add_card((short) from_hrs_ind, card_index)) { + _sparse_table.add_card(from_hrs_ind, card_index)) { if (G1RecordHRRSOops) { HeapRegionRemSet::record(hr(), from); #if HRRS_VERBOSE @@ -656,9 +656,9 @@ } // Otherwise, transfer from sparse to fine-grain. - short cards[SparsePRTEntry::CardsPerEntry]; + CardIdx_t cards[SparsePRTEntry::CardsPerEntry]; if (G1HRRSUseSparseTable) { - bool res = _sparse_table.get_cards((short) from_hrs_ind, &cards[0]); + bool res = _sparse_table.get_cards(from_hrs_ind, &cards[0]); assert(res, "There should have been an entry"); } @@ -679,13 +679,13 @@ // Add in the cards from the sparse table. if (G1HRRSUseSparseTable) { for (int i = 0; i < SparsePRTEntry::CardsPerEntry; i++) { - short c = cards[i]; + CardIdx_t c = cards[i]; if (c != SparsePRTEntry::NullEntry) { prt->add_card(c); } } // Now we can delete the sparse entry. - bool res = _sparse_table.delete_entry((short) from_hrs_ind); + bool res = _sparse_table.delete_entry(from_hrs_ind); assert(res, "It should have been there."); } } @@ -1030,7 +1030,7 @@ bool OtherRegionsTable::contains_reference_locked(oop* from) const { HeapRegion* hr = _g1h->heap_region_containing_raw(from); if (hr == NULL) return false; - size_t hr_ind = hr->hrs_index(); + RegionIdx_t hr_ind = (RegionIdx_t) hr->hrs_index(); // Is this region in the coarse map? if (_coarse_map.at(hr_ind)) return true; @@ -1045,25 +1045,19 @@ uintptr_t hr_bot_card_index = uintptr_t(hr->bottom()) >> CardTableModRefBS::card_shift; assert(from_card >= hr_bot_card_index, "Inv"); - int card_index = from_card - hr_bot_card_index; - return _sparse_table.contains_card((short)hr_ind, card_index); + CardIdx_t card_index = from_card - hr_bot_card_index; + assert(0 <= card_index && card_index < PosParPRT::CardsPerRegion, "Must be in range."); + return _sparse_table.contains_card(hr_ind, card_index); } } - -bool HeapRegionRemSet::_par_traversal = false; - -void HeapRegionRemSet::set_par_traversal(bool b) { - assert(_par_traversal != b, "Proper alternation..."); - _par_traversal = b; -} - +// Determines how many threads can add records to an rset in parallel. +// This can be done by either mutator threads together with the +// concurrent refinement threads or GC threads. int HeapRegionRemSet::num_par_rem_sets() { - // We always have at least two, so that a mutator thread can claim an - // id and add to a rem set. - return (int) MAX2(ParallelGCThreads, (size_t)2); + return (int)MAX2(DirtyCardQueueSet::num_par_ids() + ConcurrentG1Refine::thread_num(), ParallelGCThreads); } HeapRegionRemSet::HeapRegionRemSet(G1BlockOffsetSharedArray* bosa,
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Wed Jul 01 15:06:54 2009 -0700 @@ -177,8 +177,6 @@ G1BlockOffsetSharedArray* _bosa; G1BlockOffsetSharedArray* bosa() const { return _bosa; } - static bool _par_traversal; - OtherRegionsTable _other_regions; // One set bit for every region that has an entry for this one. @@ -211,8 +209,6 @@ HeapRegion* hr); static int num_par_rem_sets(); - static bool par_traversal() { return _par_traversal; } - static void set_par_traversal(bool b); HeapRegion* hr() const { return _other_regions.hr();
--- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp Wed Jul 01 15:06:54 2009 -0700 @@ -172,7 +172,7 @@ _n_completed_buffers++; if (!_process_completed && - _n_completed_buffers == _process_completed_threshold) { + _n_completed_buffers >= _process_completed_threshold) { _process_completed = true; if (_notify_when_complete) _cbl_mon->notify_all();
--- a/src/share/vm/gc_implementation/g1/sparsePRT.cpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/sparsePRT.cpp Wed Jul 01 15:06:54 2009 -0700 @@ -33,7 +33,7 @@ sprt_iter->init(this); } -void SparsePRTEntry::init(short region_ind) { +void SparsePRTEntry::init(RegionIdx_t region_ind) { _region_ind = region_ind; _next_index = NullEntry; #if UNROLL_CARD_LOOPS @@ -43,11 +43,12 @@ _cards[2] = NullEntry; _cards[3] = NullEntry; #else - for (int i = 0; i < CardsPerEntry; i++) _cards[i] = NullEntry; + for (int i = 0; i < CardsPerEntry; i++) + _cards[i] = NullEntry; #endif } -bool SparsePRTEntry::contains_card(short card_index) const { +bool SparsePRTEntry::contains_card(CardIdx_t card_index) const { #if UNROLL_CARD_LOOPS assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll."); if (_cards[0] == card_index) return true; @@ -80,10 +81,10 @@ return sum; } -SparsePRTEntry::AddCardResult SparsePRTEntry::add_card(short card_index) { +SparsePRTEntry::AddCardResult SparsePRTEntry::add_card(CardIdx_t card_index) { #if UNROLL_CARD_LOOPS assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll."); - short c = _cards[0]; + CardIdx_t c = _cards[0]; if (c == card_index) return found; if (c == NullEntry) { _cards[0] = card_index; return added; } c = _cards[1]; @@ -97,16 +98,19 @@ if (c == NullEntry) { _cards[3] = card_index; return added; } #else for (int i = 0; i < CardsPerEntry; i++) { - short c = _cards[i]; + CardIdx_t c = _cards[i]; if (c == card_index) return found; - if (c == NullEntry) { _cards[i] = card_index; return added; } + if (c == NullEntry) { + _cards[i] = card_index; + return added; + } } #endif // Otherwise, we're full. return overflow; } -void SparsePRTEntry::copy_cards(short* cards) const { +void SparsePRTEntry::copy_cards(CardIdx_t* cards) const { #if UNROLL_CARD_LOOPS assert(CardsPerEntry == 4, "Assumption. If changes, un-unroll."); cards[0] = _cards[0]; @@ -130,7 +134,7 @@ _capacity(capacity), _capacity_mask(capacity-1), _occupied_entries(0), _occupied_cards(0), _entries(NEW_C_HEAP_ARRAY(SparsePRTEntry, capacity)), - _buckets(NEW_C_HEAP_ARRAY(short, capacity)), + _buckets(NEW_C_HEAP_ARRAY(int, capacity)), _next_deleted(NULL), _deleted(false), _free_list(NullEntry), _free_region(0) { @@ -143,7 +147,7 @@ _entries = NULL; } if (_buckets != NULL) { - FREE_C_HEAP_ARRAY(short, _buckets); + FREE_C_HEAP_ARRAY(int, _buckets); _buckets = NULL; } } @@ -153,14 +157,18 @@ _occupied_cards = 0; guarantee(_entries != NULL, "INV"); guarantee(_buckets != NULL, "INV"); + + guarantee(_capacity <= ((size_t)1 << (sizeof(int)*BitsPerByte-1)) - 1, + "_capacity too large"); + // This will put -1 == NullEntry in the key field of all entries. memset(_entries, -1, _capacity * sizeof(SparsePRTEntry)); - memset(_buckets, -1, _capacity * sizeof(short)); + memset(_buckets, -1, _capacity * sizeof(int)); _free_list = NullEntry; _free_region = 0; } -bool RSHashTable::add_card(short region_ind, short card_index) { +bool RSHashTable::add_card(RegionIdx_t region_ind, CardIdx_t card_index) { SparsePRTEntry* e = entry_for_region_ind_create(region_ind); assert(e != NULL && e->r_ind() == region_ind, "Postcondition of call above."); @@ -175,9 +183,9 @@ return res != SparsePRTEntry::overflow; } -bool RSHashTable::get_cards(short region_ind, short* cards) { - short ind = (short) (region_ind & capacity_mask()); - short cur_ind = _buckets[ind]; +bool RSHashTable::get_cards(RegionIdx_t region_ind, CardIdx_t* cards) { + int ind = (int) (region_ind & capacity_mask()); + int cur_ind = _buckets[ind]; SparsePRTEntry* cur; while (cur_ind != NullEntry && (cur = entry(cur_ind))->r_ind() != region_ind) { @@ -192,10 +200,10 @@ return true; } -bool RSHashTable::delete_entry(short region_ind) { - short ind = (short) (region_ind & capacity_mask()); - short* prev_loc = &_buckets[ind]; - short cur_ind = *prev_loc; +bool RSHashTable::delete_entry(RegionIdx_t region_ind) { + int ind = (int) (region_ind & capacity_mask()); + int* prev_loc = &_buckets[ind]; + int cur_ind = *prev_loc; SparsePRTEntry* cur; while (cur_ind != NullEntry && (cur = entry(cur_ind))->r_ind() != region_ind) { @@ -212,10 +220,11 @@ return true; } -SparsePRTEntry* RSHashTable::entry_for_region_ind(short region_ind) const { +SparsePRTEntry* +RSHashTable::entry_for_region_ind(RegionIdx_t region_ind) const { assert(occupied_entries() < capacity(), "Precondition"); - short ind = (short) (region_ind & capacity_mask()); - short cur_ind = _buckets[ind]; + int ind = (int) (region_ind & capacity_mask()); + int cur_ind = _buckets[ind]; SparsePRTEntry* cur; // XXX // int k = 0; @@ -242,15 +251,16 @@ } } -SparsePRTEntry* RSHashTable::entry_for_region_ind_create(short region_ind) { +SparsePRTEntry* +RSHashTable::entry_for_region_ind_create(RegionIdx_t region_ind) { SparsePRTEntry* res = entry_for_region_ind(region_ind); if (res == NULL) { - short new_ind = alloc_entry(); + int new_ind = alloc_entry(); assert(0 <= new_ind && (size_t)new_ind < capacity(), "There should be room."); res = entry(new_ind); res->init(region_ind); // Insert at front. - short ind = (short) (region_ind & capacity_mask()); + int ind = (int) (region_ind & capacity_mask()); res->set_next_index(_buckets[ind]); _buckets[ind] = new_ind; _occupied_entries++; @@ -258,8 +268,8 @@ return res; } -short RSHashTable::alloc_entry() { - short res; +int RSHashTable::alloc_entry() { + int res; if (_free_list != NullEntry) { res = _free_list; _free_list = entry(res)->next_index(); @@ -273,13 +283,11 @@ } } - -void RSHashTable::free_entry(short fi) { +void RSHashTable::free_entry(int fi) { entry(fi)->set_next_index(_free_list); _free_list = fi; } - void RSHashTable::add_entry(SparsePRTEntry* e) { assert(e->num_valid_cards() > 0, "Precondition."); SparsePRTEntry* e2 = entry_for_region_ind_create(e->r_ind()); @@ -322,8 +330,8 @@ return NULL; } -short /* RSHashTable:: */ RSHashTableIter::find_first_card_in_list() { - short res; +CardIdx_t /* RSHashTable:: */ RSHashTableIter::find_first_card_in_list() { + CardIdx_t res; while (_bl_ind != RSHashTable::NullEntry) { res = _rsht->entry(_bl_ind)->card(0); if (res != SparsePRTEntry::NullEntry) { @@ -336,7 +344,7 @@ return SparsePRTEntry::NullEntry; } -size_t /* RSHashTable:: */ RSHashTableIter::compute_card_ind(short ci) { +size_t /* RSHashTable:: */ RSHashTableIter::compute_card_ind(CardIdx_t ci) { return _heap_bot_card_ind + (_rsht->entry(_bl_ind)->r_ind() * CardsPerRegion) @@ -345,7 +353,7 @@ bool /* RSHashTable:: */ RSHashTableIter::has_next(size_t& card_index) { _card_ind++; - short ci; + CardIdx_t ci; if (_card_ind < SparsePRTEntry::CardsPerEntry && ((ci = _rsht->entry(_bl_ind)->card(_card_ind)) != SparsePRTEntry::NullEntry)) { @@ -379,16 +387,16 @@ return false; } -bool RSHashTable::contains_card(short region_index, short card_index) const { +bool RSHashTable::contains_card(RegionIdx_t region_index, CardIdx_t card_index) const { SparsePRTEntry* e = entry_for_region_ind(region_index); return (e != NULL && e->contains_card(card_index)); } size_t RSHashTable::mem_size() const { - return sizeof(this) + capacity() * (sizeof(SparsePRTEntry) + sizeof(short)); + return sizeof(this) + + capacity() * (sizeof(SparsePRTEntry) + sizeof(int)); } - // ---------------------------------------------------------------------- SparsePRT* SparsePRT::_head_expanded_list = NULL; @@ -408,6 +416,7 @@ } } + SparsePRT* SparsePRT::get_from_expanded_list() { SparsePRT* hd = _head_expanded_list; while (hd != NULL) { @@ -452,6 +461,7 @@ _next = _cur; } + SparsePRT::~SparsePRT() { assert(_next != NULL && _cur != NULL, "Inv"); if (_cur != _next) { delete _cur; } @@ -465,7 +475,7 @@ return sizeof(this) + _next->mem_size(); } -bool SparsePRT::add_card(short region_id, short card_index) { +bool SparsePRT::add_card(RegionIdx_t region_id, CardIdx_t card_index) { #if SPARSE_PRT_VERBOSE gclog_or_tty->print_cr(" Adding card %d from region %d to region %d sparse.", card_index, region_id, _hr->hrs_index()); @@ -476,11 +486,11 @@ return _next->add_card(region_id, card_index); } -bool SparsePRT::get_cards(short region_id, short* cards) { +bool SparsePRT::get_cards(RegionIdx_t region_id, CardIdx_t* cards) { return _next->get_cards(region_id, cards); } -bool SparsePRT::delete_entry(short region_id) { +bool SparsePRT::delete_entry(RegionIdx_t region_id) { return _next->delete_entry(region_id); }
--- a/src/share/vm/gc_implementation/g1/sparsePRT.hpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/sparsePRT.hpp Wed Jul 01 15:06:54 2009 -0700 @@ -35,32 +35,32 @@ class SparsePRTEntry: public CHeapObj { public: + enum SomePublicConstants { - CardsPerEntry = (short)4, - NullEntry = (short)-1, - DeletedEntry = (short)-2 + CardsPerEntry = 4, + NullEntry = -1 }; private: - short _region_ind; - short _next_index; - short _cards[CardsPerEntry]; + RegionIdx_t _region_ind; + int _next_index; + CardIdx_t _cards[CardsPerEntry]; public: // Set the region_ind to the given value, and delete all cards. - inline void init(short region_ind); + inline void init(RegionIdx_t region_ind); - short r_ind() const { return _region_ind; } + RegionIdx_t r_ind() const { return _region_ind; } bool valid_entry() const { return r_ind() >= 0; } - void set_r_ind(short rind) { _region_ind = rind; } + void set_r_ind(RegionIdx_t rind) { _region_ind = rind; } - short next_index() const { return _next_index; } - short* next_index_addr() { return &_next_index; } - void set_next_index(short ni) { _next_index = ni; } + int next_index() const { return _next_index; } + int* next_index_addr() { return &_next_index; } + void set_next_index(int ni) { _next_index = ni; } // Returns "true" iff the entry contains the given card index. - inline bool contains_card(short card_index) const; + inline bool contains_card(CardIdx_t card_index) const; // Returns the number of non-NULL card entries. inline int num_valid_cards() const; @@ -73,14 +73,14 @@ found, added }; - inline AddCardResult add_card(short card_index); + inline AddCardResult add_card(CardIdx_t card_index); // Copy the current entry's cards into "cards". - inline void copy_cards(short* cards) const; + inline void copy_cards(CardIdx_t* cards) const; // Copy the current entry's cards into the "_card" array of "e." inline void copy_cards(SparsePRTEntry* e) const; - inline short card(int i) const { return _cards[i]; } + inline CardIdx_t card(int i) const { return _cards[i]; } }; @@ -98,9 +98,9 @@ size_t _occupied_cards; SparsePRTEntry* _entries; - short* _buckets; - short _free_region; - short _free_list; + int* _buckets; + int _free_region; + int _free_list; static RSHashTable* _head_deleted_list; RSHashTable* _next_deleted; @@ -113,20 +113,20 @@ // operations, and that the the table be less than completely full. If // an entry for "region_ind" is already in the table, finds it and // returns its address; otherwise returns "NULL." - SparsePRTEntry* entry_for_region_ind(short region_ind) const; + SparsePRTEntry* entry_for_region_ind(RegionIdx_t region_ind) const; // Requires that the caller hold a lock preventing parallel modifying // operations, and that the the table be less than completely full. If // an entry for "region_ind" is already in the table, finds it and // returns its address; otherwise allocates, initializes, inserts and // returns a new entry for "region_ind". - SparsePRTEntry* entry_for_region_ind_create(short region_ind); + SparsePRTEntry* entry_for_region_ind_create(RegionIdx_t region_ind); // Returns the index of the next free entry in "_entries". - short alloc_entry(); + int alloc_entry(); // Declares the entry "fi" to be free. (It must have already been // deleted from any bucket lists. - void free_entry(short fi); + void free_entry(int fi); public: RSHashTable(size_t capacity); @@ -138,12 +138,12 @@ // Otherwise, returns "false" to indicate that the addition would // overflow the entry for the region. The caller must transfer these // entries to a larger-capacity representation. - bool add_card(short region_id, short card_index); + bool add_card(RegionIdx_t region_id, CardIdx_t card_index); - bool get_cards(short region_id, short* cards); - bool delete_entry(short region_id); + bool get_cards(RegionIdx_t region_id, CardIdx_t* cards); + bool delete_entry(RegionIdx_t region_id); - bool contains_card(short region_id, short card_index) const; + bool contains_card(RegionIdx_t region_id, CardIdx_t card_index) const; void add_entry(SparsePRTEntry* e); @@ -162,51 +162,49 @@ static void add_to_deleted_list(RSHashTable* rsht); static RSHashTable* get_from_deleted_list(); - - }; - // ValueObj because will be embedded in HRRS iterator. +// ValueObj because will be embedded in HRRS iterator. class RSHashTableIter VALUE_OBJ_CLASS_SPEC { - short _tbl_ind; - short _bl_ind; - short _card_ind; - RSHashTable* _rsht; - size_t _heap_bot_card_ind; + int _tbl_ind; // [-1, 0.._rsht->_capacity) + int _bl_ind; // [-1, 0.._rsht->_capacity) + short _card_ind; // [0..CardsPerEntry) + RSHashTable* _rsht; + size_t _heap_bot_card_ind; - enum SomePrivateConstants { - CardsPerRegion = HeapRegion::GrainBytes >> CardTableModRefBS::card_shift - }; + enum SomePrivateConstants { + CardsPerRegion = HeapRegion::GrainBytes >> CardTableModRefBS::card_shift + }; + + // If the bucket list pointed to by _bl_ind contains a card, sets + // _bl_ind to the index of that entry, and returns the card. + // Otherwise, returns SparseEntry::NullEntry. + CardIdx_t find_first_card_in_list(); - // If the bucket list pointed to by _bl_ind contains a card, sets - // _bl_ind to the index of that entry, and returns the card. - // Otherwise, returns SparseEntry::NullEnty. - short find_first_card_in_list(); - // Computes the proper card index for the card whose offset in the - // current region (as indicated by _bl_ind) is "ci". - // This is subject to errors when there is iteration concurrent with - // modification, but these errors should be benign. - size_t compute_card_ind(short ci); + // Computes the proper card index for the card whose offset in the + // current region (as indicated by _bl_ind) is "ci". + // This is subject to errors when there is iteration concurrent with + // modification, but these errors should be benign. + size_t compute_card_ind(CardIdx_t ci); - public: - RSHashTableIter(size_t heap_bot_card_ind) : - _tbl_ind(RSHashTable::NullEntry), - _bl_ind(RSHashTable::NullEntry), - _card_ind((SparsePRTEntry::CardsPerEntry-1)), - _rsht(NULL), - _heap_bot_card_ind(heap_bot_card_ind) - {} +public: + RSHashTableIter(size_t heap_bot_card_ind) : + _tbl_ind(RSHashTable::NullEntry), + _bl_ind(RSHashTable::NullEntry), + _card_ind((SparsePRTEntry::CardsPerEntry-1)), + _rsht(NULL), + _heap_bot_card_ind(heap_bot_card_ind) + {} - void init(RSHashTable* rsht) { - _rsht = rsht; - _tbl_ind = -1; // So that first increment gets to 0. - _bl_ind = RSHashTable::NullEntry; - _card_ind = (SparsePRTEntry::CardsPerEntry-1); - } + void init(RSHashTable* rsht) { + _rsht = rsht; + _tbl_ind = -1; // So that first increment gets to 0. + _bl_ind = RSHashTable::NullEntry; + _card_ind = (SparsePRTEntry::CardsPerEntry-1); + } - bool has_next(size_t& card_index); - - }; + bool has_next(size_t& card_index); +}; // Concurrent accesss to a SparsePRT must be serialized by some external // mutex. @@ -238,7 +236,6 @@ SparsePRT* next_expanded() { return _next_expanded; } void set_next_expanded(SparsePRT* nxt) { _next_expanded = nxt; } - static SparsePRT* _head_expanded_list; public: @@ -255,16 +252,16 @@ // Otherwise, returns "false" to indicate that the addition would // overflow the entry for the region. The caller must transfer these // entries to a larger-capacity representation. - bool add_card(short region_id, short card_index); + bool add_card(RegionIdx_t region_id, CardIdx_t card_index); // If the table hold an entry for "region_ind", Copies its // cards into "cards", which must be an array of length at least // "CardsPerEntry", and returns "true"; otherwise, returns "false". - bool get_cards(short region_ind, short* cards); + bool get_cards(RegionIdx_t region_ind, CardIdx_t* cards); // If there is an entry for "region_ind", removes it and return "true"; // otherwise returns "false." - bool delete_entry(short region_ind); + bool delete_entry(RegionIdx_t region_ind); // Clear the table, and reinitialize to initial capacity. void clear(); @@ -276,13 +273,12 @@ static void cleanup_all(); RSHashTable* cur() const { return _cur; } - void init_iterator(SparsePRTIter* sprt_iter); static void add_to_expanded_list(SparsePRT* sprt); static SparsePRT* get_from_expanded_list(); - bool contains_card(short region_id, short card_index) const { + bool contains_card(RegionIdx_t region_id, CardIdx_t card_index) const { return _next->contains_card(region_id, card_index); }
--- a/src/share/vm/gc_implementation/includeDB_gc_g1 Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/includeDB_gc_g1 Wed Jul 01 15:06:54 2009 -0700 @@ -49,6 +49,7 @@ concurrentG1Refine.hpp globalDefinitions.hpp concurrentG1Refine.hpp allocation.hpp +concurrentG1Refine.hpp thread.hpp concurrentG1RefineThread.cpp concurrentG1Refine.hpp concurrentG1RefineThread.cpp concurrentG1RefineThread.hpp @@ -280,6 +281,7 @@ heapRegionRemSet.cpp allocation.hpp heapRegionRemSet.cpp bitMap.inline.hpp +heapRegionRemSet.cpp concurrentG1Refine.hpp heapRegionRemSet.cpp g1BlockOffsetTable.inline.hpp heapRegionRemSet.cpp g1CollectedHeap.inline.hpp heapRegionRemSet.cpp heapRegionRemSet.hpp @@ -331,6 +333,7 @@ sparsePRT.hpp allocation.hpp sparsePRT.hpp cardTableModRefBS.hpp sparsePRT.hpp globalDefinitions.hpp +sparsePRT.hpp g1CollectedHeap.inline.hpp sparsePRT.hpp heapRegion.hpp sparsePRT.hpp mutex.hpp
--- a/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.hpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.hpp Wed Jul 01 15:06:54 2009 -0700 @@ -177,6 +177,7 @@ // are double-word aligned in 32-bit VMs, but not in 64-bit VMs, so the 32-bit // granularity is 2, 64-bit is 1. static inline size_t obj_granularity() { return size_t(MinObjAlignment); } + static inline int obj_granularity_shift() { return LogMinObjAlignment; } HeapWord* _region_start; size_t _region_size; @@ -299,13 +300,13 @@ inline size_t ParMarkBitMap::bits_to_words(idx_t bits) { - return bits * obj_granularity(); + return bits << obj_granularity_shift(); } inline ParMarkBitMap::idx_t ParMarkBitMap::words_to_bits(size_t words) { - return words / obj_granularity(); + return words >> obj_granularity_shift(); } inline size_t ParMarkBitMap::obj_size(idx_t beg_bit, idx_t end_bit) const
--- a/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp Wed Jul 01 15:06:54 2009 -0700 @@ -27,13 +27,12 @@ # include "incls/_precompiled.incl" # include "incls/_concurrentGCThread.cpp.incl" -bool ConcurrentGCThread::_should_terminate = false; -bool ConcurrentGCThread::_has_terminated = false; int ConcurrentGCThread::_CGC_flag = CGC_nil; SuspendibleThreadSet ConcurrentGCThread::_sts; -ConcurrentGCThread::ConcurrentGCThread() { +ConcurrentGCThread::ConcurrentGCThread() : + _should_terminate(false), _has_terminated(false) { _sts.initialize(); };
--- a/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp Wed Jul 01 15:06:54 2009 -0700 @@ -72,8 +72,8 @@ friend class VMStructs; protected: - static bool _should_terminate; - static bool _has_terminated; + bool _should_terminate; + bool _has_terminated; enum CGC_flag_type { CGC_nil = 0x0,
--- a/src/share/vm/includeDB_compiler1 Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/includeDB_compiler1 Wed Jul 01 15:06:54 2009 -0700 @@ -387,7 +387,7 @@ c1_ValueSet.cpp c1_ValueSet.hpp c1_ValueSet.hpp allocation.hpp -c1_ValueSet.hpp bitMap.hpp +c1_ValueSet.hpp bitMap.inline.hpp c1_ValueSet.hpp c1_Instruction.hpp c1_ValueStack.cpp c1_IR.hpp
--- a/src/share/vm/memory/cardTableRS.cpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/memory/cardTableRS.cpp Wed Jul 01 15:06:54 2009 -0700 @@ -33,12 +33,8 @@ { #ifndef SERIALGC if (UseG1GC) { - if (G1RSBarrierUseQueue) { _ct_bs = new G1SATBCardTableLoggingModRefBS(whole_heap, max_covered_regions); - } else { - _ct_bs = new G1SATBCardTableModRefBS(whole_heap, max_covered_regions); - } } else { _ct_bs = new CardTableModRefBSForCTRS(whole_heap, max_covered_regions); }
--- a/src/share/vm/memory/gcLocker.hpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/memory/gcLocker.hpp Wed Jul 01 15:06:54 2009 -0700 @@ -242,6 +242,31 @@ #endif }; +// A SkipGCALot object is used to elide the usual effect of gc-a-lot +// over a section of execution by a thread. Currently, it's used only to +// prevent re-entrant calls to GC. +class SkipGCALot : public StackObj { + private: + bool _saved; + Thread* _t; + + public: +#ifdef ASSERT + SkipGCALot(Thread* t) : _t(t) { + _saved = _t->skip_gcalot(); + _t->set_skip_gcalot(true); + } + + ~SkipGCALot() { + assert(_t->skip_gcalot(), "Save-restore protocol invariant"); + _t->set_skip_gcalot(_saved); + } +#else + SkipGCALot(Thread* t) { } + ~SkipGCALot() { } +#endif +}; + // JRT_LEAF currently can be called from either _thread_in_Java or // _thread_in_native mode. In _thread_in_native, it is ok // for another thread to trigger GC. The rest of the JRT_LEAF
--- a/src/share/vm/runtime/interfaceSupport.cpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/runtime/interfaceSupport.cpp Wed Jul 01 15:06:54 2009 -0700 @@ -66,11 +66,14 @@ void InterfaceSupport::gc_alot() { Thread *thread = Thread::current(); - if (thread->is_VM_thread()) return; // Avoid concurrent calls + if (!thread->is_Java_thread()) return; // Avoid concurrent calls // Check for new, not quite initialized thread. A thread in new mode cannot initiate a GC. JavaThread *current_thread = (JavaThread *)thread; if (current_thread->active_handles() == NULL) return; + // Short-circuit any possible re-entrant gc-a-lot attempt + if (thread->skip_gcalot()) return; + if (is_init_completed()) { if (++_fullgc_alot_invocation < FullGCALotStart) {
--- a/src/share/vm/runtime/mutexLocker.cpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/runtime/mutexLocker.cpp Wed Jul 01 15:06:54 2009 -0700 @@ -70,7 +70,6 @@ Monitor* CMark_lock = NULL; Monitor* ZF_mon = NULL; Monitor* Cleanup_mon = NULL; -Monitor* G1ConcRefine_mon = NULL; Mutex* SATB_Q_FL_lock = NULL; Monitor* SATB_Q_CBL_mon = NULL; Mutex* Shared_SATB_Q_lock = NULL; @@ -168,7 +167,6 @@ def(CMark_lock , Monitor, nonleaf, true ); // coordinate concurrent mark thread def(ZF_mon , Monitor, leaf, true ); def(Cleanup_mon , Monitor, nonleaf, true ); - def(G1ConcRefine_mon , Monitor, nonleaf, true ); def(SATB_Q_FL_lock , Mutex , special, true ); def(SATB_Q_CBL_mon , Monitor, nonleaf, true ); def(Shared_SATB_Q_lock , Mutex, nonleaf, true );
--- a/src/share/vm/runtime/mutexLocker.hpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/runtime/mutexLocker.hpp Wed Jul 01 15:06:54 2009 -0700 @@ -63,9 +63,6 @@ extern Monitor* CMark_lock; // used for concurrent mark thread coordination extern Monitor* ZF_mon; // used for G1 conc zero-fill. extern Monitor* Cleanup_mon; // used for G1 conc cleanup. -extern Monitor* G1ConcRefine_mon; // used for G1 conc-refine - // coordination. - extern Mutex* SATB_Q_FL_lock; // Protects SATB Q // buffer free list. extern Monitor* SATB_Q_CBL_mon; // Protects SATB Q
--- a/src/share/vm/runtime/thread.cpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/runtime/thread.cpp Wed Jul 01 15:06:54 2009 -0700 @@ -127,6 +127,7 @@ debug_only(_owned_locks = NULL;) debug_only(_allow_allocation_count = 0;) NOT_PRODUCT(_allow_safepoint_count = 0;) + NOT_PRODUCT(_skip_gcalot = false;) CHECK_UNHANDLED_OOPS_ONLY(_gc_locked_out_count = 0;) _jvmti_env_iteration_count = 0; _vm_operation_started_count = 0; @@ -784,7 +785,6 @@ // We could enter a safepoint here and thus have a gc InterfaceSupport::check_gc_alot(); } - #endif } #endif
--- a/src/share/vm/runtime/thread.hpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/runtime/thread.hpp Wed Jul 01 15:06:54 2009 -0700 @@ -191,6 +191,9 @@ NOT_PRODUCT(int _allow_safepoint_count;) // If 0, thread allow a safepoint to happen debug_only (int _allow_allocation_count;) // If 0, the thread is allowed to allocate oops. + // Used by SkipGCALot class. + NOT_PRODUCT(bool _skip_gcalot;) // Should we elide gc-a-lot? + // Record when GC is locked out via the GC_locker mechanism CHECK_UNHANDLED_OOPS_ONLY(int _gc_locked_out_count;) @@ -308,6 +311,11 @@ bool is_gc_locked_out() { return _gc_locked_out_count > 0; } #endif // CHECK_UNHANDLED_OOPS +#ifndef PRODUCT + bool skip_gcalot() { return _skip_gcalot; } + void set_skip_gcalot(bool v) { _skip_gcalot = v; } +#endif + public: // Installs a pending exception to be inserted later static void send_async_exception(oop thread_oop, oop java_throwable);
--- a/src/share/vm/runtime/vmThread.cpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/runtime/vmThread.cpp Wed Jul 01 15:06:54 2009 -0700 @@ -531,6 +531,7 @@ Thread* t = Thread::current(); if (!t->is_VM_thread()) { + SkipGCALot sgcalot(t); // avoid re-entrant attempts to gc-a-lot // JavaThread or WatcherThread t->check_for_valid_safepoint_state(true);
--- a/src/share/vm/utilities/bitMap.cpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/utilities/bitMap.cpp Wed Jul 01 15:06:54 2009 -0700 @@ -41,19 +41,6 @@ resize(size_in_bits, in_resource_area); } - -void BitMap::verify_index(idx_t index) const { - assert(index < _size, "BitMap index out of bounds"); -} - -void BitMap::verify_range(idx_t beg_index, idx_t end_index) const { -#ifdef ASSERT - assert(beg_index <= end_index, "BitMap range error"); - // Note that [0,0) and [size,size) are both valid ranges. - if (end_index != _size) verify_index(end_index); -#endif -} - void BitMap::resize(idx_t size_in_bits, bool in_resource_area) { assert(size_in_bits >= 0, "just checking"); idx_t old_size_in_words = size_in_words();
--- a/src/share/vm/utilities/bitMap.hpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/utilities/bitMap.hpp Wed Jul 01 15:06:54 2009 -0700 @@ -93,10 +93,12 @@ // The index of the first full word in a range. idx_t word_index_round_up(idx_t bit) const; - // Verification, statistics. - void verify_index(idx_t index) const; - void verify_range(idx_t beg_index, idx_t end_index) const; + // Verification. + inline void verify_index(idx_t index) const NOT_DEBUG_RETURN; + inline void verify_range(idx_t beg_index, idx_t end_index) const + NOT_DEBUG_RETURN; + // Statistics. static idx_t* _pop_count_table; static void init_pop_count_table(); static idx_t num_set_bits(bm_word_t w); @@ -287,7 +289,6 @@ #endif }; - // Convenience class wrapping BitMap which provides multiple bits per slot. class BitMap2D VALUE_OBJ_CLASS_SPEC { public:
--- a/src/share/vm/utilities/bitMap.inline.hpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/utilities/bitMap.inline.hpp Wed Jul 01 15:06:54 2009 -0700 @@ -22,6 +22,17 @@ * */ +#ifdef ASSERT +inline void BitMap::verify_index(idx_t index) const { + assert(index < _size, "BitMap index out of bounds"); +} + +inline void BitMap::verify_range(idx_t beg_index, idx_t end_index) const { + assert(beg_index <= end_index, "BitMap range error"); + // Note that [0,0) and [size,size) are both valid ranges. + if (end_index != _size) verify_index(end_index); +} +#endif // #ifdef ASSERT inline void BitMap::set_bit(idx_t bit) { verify_index(bit);
--- a/src/share/vm/utilities/macros.hpp Wed Jul 01 12:22:23 2009 -0700 +++ b/src/share/vm/utilities/macros.hpp Wed Jul 01 15:06:54 2009 -0700 @@ -106,11 +106,13 @@ #ifdef ASSERT #define DEBUG_ONLY(code) code #define NOT_DEBUG(code) +#define NOT_DEBUG_RETURN /*next token must be ;*/ // Historical. #define debug_only(code) code #else // ASSERT #define DEBUG_ONLY(code) #define NOT_DEBUG(code) code +#define NOT_DEBUG_RETURN {} #define debug_only(code) #endif // ASSERT