# HG changeset patch # User tonyp # Date 1236612777 14400 # Node ID bcedf688d8822e651c3d85bbb0ce19afe3011224 # Parent 3db67f76d308233bf701cffdc3d9a44a0b0d63ef# Parent 7ea5ca260b28009116fc349ed677cad861296f80 Merge diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Mon Mar 09 11:32:57 2009 -0400 @@ -133,14 +133,12 @@ _co_tracker.update(false); if (G1SmoothConcRefine) { - start_vtime_sec = os::elapsedVTime(); prev_buffer_num = curr_buffer_num; - _sts.leave(); os::sleep(Thread::current(), (jlong) _interval_ms, false); _sts.join(); + start_vtime_sec = os::elapsedVTime(); } - n_logs++; } // Make sure we harvest the PYA, if any. diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_implementation/g1/concurrentMark.cpp --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Mon Mar 09 11:32:57 2009 -0400 @@ -420,6 +420,10 @@ _has_overflown(false), _concurrent(false), + _has_aborted(false), + _restart_for_overflow(false), + _concurrent_marking_in_progress(false), + _should_gray_objects(false), // _verbose_level set below diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp --- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp Mon Mar 09 11:32:57 2009 -0400 @@ -107,7 +107,7 @@ if (PrintGC) { gclog_or_tty->date_stamp(PrintGCDateStamps); gclog_or_tty->stamp(PrintGCTimeStamps); - tty->print_cr("[GC concurrent-mark-start]"); + gclog_or_tty->print_cr("[GC concurrent-mark-start]"); } if (!g1_policy->in_young_gc_mode()) { @@ -320,8 +320,6 @@ set_in_progress(); clear_started(); if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-starting"); - - return; } // Note: this method, although exported by the ConcurrentMarkSweepThread, diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp Mon Mar 09 11:32:57 2009 -0400 @@ -78,8 +78,8 @@ void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock, int max_completed_queue, - Mutex* lock) { - PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue); + Mutex* lock, PtrQueueSet* fl_owner) { + PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue, fl_owner); set_buffer_size(DCQBarrierQueueBufferSize); set_process_completed_threshold(DCQBarrierProcessCompletedThreshold); diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp Mon Mar 09 11:32:57 2009 -0400 @@ -88,7 +88,7 @@ void initialize(Monitor* cbl_mon, Mutex* fl_lock, int max_completed_queue = 0, - Mutex* lock = NULL); + Mutex* lock = NULL, PtrQueueSet* fl_owner = NULL); // The number of parallel ids that can be claimed to allow collector or // mutator threads to do card-processing work. diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Mon Mar 09 11:32:57 2009 -0400 @@ -136,6 +136,14 @@ int calls() { return _calls; } }; +class RedirtyLoggedCardTableEntryFastClosure : public CardTableEntryClosure { +public: + bool do_card_ptr(jbyte* card_ptr, int worker_i) { + *card_ptr = CardTableModRefBS::dirty_card_val(); + return true; + } +}; + YoungList::YoungList(G1CollectedHeap* g1h) : _g1h(g1h), _head(NULL), _scan_only_head(NULL), _scan_only_tail(NULL), _curr_scan_only(NULL), @@ -961,7 +969,8 @@ // dirtied, so this should abandon those logs, and set "do_traversal" // to true. concurrent_g1_refine()->set_pya_restart(); - + assert(!G1DeferredRSUpdate + || (G1DeferredRSUpdate && (dirty_card_queue_set().completed_buffers_num() == 0)), "Should not be any"); assert(regions_accounted_for(), "Region leakage!"); } @@ -1466,6 +1475,13 @@ G1DirtyCardQueueMax, Shared_DirtyCardQ_lock); } + if (G1DeferredRSUpdate) { + dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, + DirtyCardQ_FL_lock, + 0, + Shared_DirtyCardQ_lock, + &JavaThread::dirty_card_queue_set()); + } // In case we're keeping closure specialization stats, initialize those // counts and that mechanism. SpecializationStats::clear(); @@ -2316,7 +2332,6 @@ void G1CollectedHeap::checkConcurrentMark() { VerifyMarkedObjsClosure verifycl(this); - doConcurrentMark(); // MutexLockerEx x(getMarkBitMapLock(), // Mutex::_no_safepoint_check_flag); object_iterate(&verifycl); @@ -2493,7 +2508,7 @@ guarantee(_in_cset_fast_test == NULL, "invariant"); guarantee(_in_cset_fast_test_base == NULL, "invariant"); - _in_cset_fast_test_length = n_regions(); + _in_cset_fast_test_length = max_regions(); _in_cset_fast_test_base = NEW_C_HEAP_ARRAY(bool, _in_cset_fast_test_length); memset(_in_cset_fast_test_base, false, @@ -2918,27 +2933,51 @@ } }; -class RecreateRSetEntriesClosure: public OopClosure { +class UpdateRSetImmediate : public OopsInHeapRegionClosure { private: G1CollectedHeap* _g1; G1RemSet* _g1_rem_set; - HeapRegion* _from; public: - RecreateRSetEntriesClosure(G1CollectedHeap* g1, HeapRegion* from) : - _g1(g1), _g1_rem_set(g1->g1_rem_set()), _from(from) - {} + UpdateRSetImmediate(G1CollectedHeap* g1) : + _g1(g1), _g1_rem_set(g1->g1_rem_set()) {} void do_oop(narrowOop* p) { guarantee(false, "NYI"); } void do_oop(oop* p) { assert(_from->is_in_reserved(p), "paranoia"); - if (*p != NULL) { - _g1_rem_set->write_ref(_from, p); + if (*p != NULL && !_from->is_survivor()) { + _g1_rem_set->par_write_ref(_from, p, 0); } } }; +class UpdateRSetDeferred : public OopsInHeapRegionClosure { +private: + G1CollectedHeap* _g1; + DirtyCardQueue *_dcq; + CardTableModRefBS* _ct_bs; + +public: + UpdateRSetDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) : + _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) {} + + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + void do_oop(oop* p) { + assert(_from->is_in_reserved(p), "paranoia"); + if (!_from->is_in_reserved(*p) && !_from->is_survivor()) { + size_t card_index = _ct_bs->index_for(p); + if (_ct_bs->mark_card_deferred(card_index)) { + _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index)); + } + } + } +}; + + + class RemoveSelfPointerClosure: public ObjectClosure { private: G1CollectedHeap* _g1; @@ -2946,11 +2985,11 @@ HeapRegion* _hr; size_t _prev_marked_bytes; size_t _next_marked_bytes; + OopsInHeapRegionClosure *_cl; public: - RemoveSelfPointerClosure(G1CollectedHeap* g1, HeapRegion* hr) : - _g1(g1), _cm(_g1->concurrent_mark()), _hr(hr), - _prev_marked_bytes(0), _next_marked_bytes(0) - {} + RemoveSelfPointerClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* cl) : + _g1(g1), _cm(_g1->concurrent_mark()), _prev_marked_bytes(0), + _next_marked_bytes(0), _cl(cl) {} size_t prev_marked_bytes() { return _prev_marked_bytes; } size_t next_marked_bytes() { return _next_marked_bytes; } @@ -2988,8 +3027,7 @@ // that, if evacuation fails, we might have remembered set // entries missing given that we skipped cards on the // collection set. So, we'll recreate such entries now. - RecreateRSetEntriesClosure cl(_g1, _hr); - obj->oop_iterate(&cl); + obj->oop_iterate(_cl); assert(_cm->isPrevMarked(obj), "Should be marked!"); } else { // The object has been either evacuated or is dead. Fill it with a @@ -3002,14 +3040,23 @@ }; void G1CollectedHeap::remove_self_forwarding_pointers() { + UpdateRSetImmediate immediate_update(_g1h); + DirtyCardQueue dcq(&_g1h->dirty_card_queue_set()); + UpdateRSetDeferred deferred_update(_g1h, &dcq); + OopsInHeapRegionClosure *cl; + if (G1DeferredRSUpdate) { + cl = &deferred_update; + } else { + cl = &immediate_update; + } HeapRegion* cur = g1_policy()->collection_set(); - while (cur != NULL) { assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!"); + RemoveSelfPointerClosure rspc(_g1h, cl); if (cur->evacuation_failed()) { - RemoveSelfPointerClosure rspc(_g1h, cur); assert(cur->in_collection_set(), "bad CS"); + cl->set_region(cur); cur->object_iterate(&rspc); // A number of manipulations to make the TAMS be the current top, @@ -3518,6 +3565,9 @@ protected: G1CollectedHeap* _g1h; RefToScanQueue* _refs; + DirtyCardQueue _dcq; + CardTableModRefBS* _ct_bs; + G1RemSet* _g1_rem; typedef GrowableArray OverflowQueue; OverflowQueue* _overflowed_refs; @@ -3559,10 +3609,32 @@ void add_to_undo_waste(size_t waste) { _undo_waste += waste; } + DirtyCardQueue& dirty_card_queue() { return _dcq; } + CardTableModRefBS* ctbs() { return _ct_bs; } + + void immediate_rs_update(HeapRegion* from, oop* p, int tid) { + _g1_rem->par_write_ref(from, p, tid); + } + + void deferred_rs_update(HeapRegion* from, oop* p, int tid) { + // If the new value of the field points to the same region or + // is the to-space, we don't need to include it in the Rset updates. + if (!from->is_in_reserved(*p) && !from->is_survivor()) { + size_t card_index = ctbs()->index_for(p); + // If the card hasn't been added to the buffer, do it. + if (ctbs()->mark_card_deferred(card_index)) { + dirty_card_queue().enqueue((jbyte*)ctbs()->byte_for_index(card_index)); + } + } + } + public: G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num) : _g1h(g1h), _refs(g1h->task_queue(queue_num)), + _dcq(&g1h->dirty_card_queue_set()), + _ct_bs((CardTableModRefBS*)_g1h->barrier_set()), + _g1_rem(g1h->g1_rem_set()), _hash_seed(17), _queue_num(queue_num), _term_attempts(0), _age_table(false), @@ -3640,6 +3712,14 @@ int refs_to_scan() { return refs()->size(); } int overflowed_refs_to_scan() { return overflowed_refs()->length(); } + void update_rs(HeapRegion* from, oop* p, int tid) { + if (G1DeferredRSUpdate) { + deferred_rs_update(from, p, tid); + } else { + immediate_rs_update(from, p, tid); + } + } + HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz) { HeapWord* obj = NULL; @@ -3808,7 +3888,6 @@ } }; - G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()), _par_scan_state(par_scan_state) { } @@ -3834,7 +3913,7 @@ assert(obj == *p, "the value of *p should not have changed"); _par_scan_state->push_on_queue(p); } else { - _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num()); + _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num()); } } } @@ -3972,13 +4051,13 @@ } // When scanning the RS, we only care about objs in CS. if (barrier == G1BarrierRS) { - _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num()); + _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num()); } } // When scanning moved objs, must look at all oops. if (barrier == G1BarrierEvac && obj != NULL) { - _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num()); + _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num()); } if (do_gen_barrier && obj != NULL) { @@ -4127,6 +4206,7 @@ G1ParScanExtRootClosure only_scan_root_cl(_g1h, &pss); G1ParScanPermClosure only_scan_perm_cl(_g1h, &pss); G1ParScanHeapRSClosure only_scan_heap_rs_cl(_g1h, &pss); + G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss); G1ParScanAndMarkPermClosure scan_mark_perm_cl(_g1h, &pss); G1ParScanAndMarkHeapRSClosure scan_mark_heap_rs_cl(_g1h, &pss); @@ -4382,7 +4462,6 @@ g1_rem_set()->prepare_for_oops_into_collection_set_do(); concurrent_g1_refine()->set_use_cache(false); int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); - set_par_threads(n_workers); G1ParTask g1_par_task(this, n_workers, _task_queues); @@ -4390,8 +4469,9 @@ change_strong_roots_parity(); // In preparation for parallel strong roots. rem_set()->prepare_for_younger_refs_iterate(true); + + assert(dirty_card_queue_set().completed_buffers_num() == 0, "Should be empty"); double start_par = os::elapsedTime(); - if (ParallelGCThreads > 0) { // The individual threads will set their evac-failure closures. workers()->run_task(&g1_par_task); @@ -4411,8 +4491,8 @@ G1KeepAliveClosure keep_alive(this); JNIHandles::weak_oops_do(&is_alive, &keep_alive); } - g1_rem_set()->cleanup_after_oops_into_collection_set_do(); + concurrent_g1_refine()->set_use_cache(true); finalize_for_evac_failure(); @@ -4423,7 +4503,6 @@ if (evacuation_failed()) { remove_self_forwarding_pointers(); - if (PrintGCDetails) { gclog_or_tty->print(" (evacuation failed)"); } else if (PrintGC) { @@ -4431,6 +4510,14 @@ } } + if (G1DeferredRSUpdate) { + RedirtyLoggedCardTableEntryFastClosure redirty; + dirty_card_queue_set().set_closure(&redirty); + dirty_card_queue_set().apply_closure_to_all_completed_buffers(); + JavaThread::dirty_card_queue_set().merge_bufferlists(&dirty_card_queue_set()); + assert(dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed"); + } + COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); } diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Mon Mar 09 11:32:57 2009 -0400 @@ -457,6 +457,10 @@ // And it's mod ref barrier set, used to track updates for the above. ModRefBarrierSet* _mr_bs; + // A set of cards that cover the objects for which the Rsets should be updated + // concurrently after the collection. + DirtyCardQueueSet _dirty_card_queue_set; + // The Heap Region Rem Set Iterator. HeapRegionRemSetIterator** _rem_set_iterator; @@ -666,6 +670,9 @@ RefToScanQueue *task_queue(int i); + // A set of cards where updates happened during the GC + DirtyCardQueueSet& dirty_card_queue_set() { return _dirty_card_queue_set; } + // Create a G1CollectedHeap with the specified policy. // Must call the initialize method afterwards. // May not return if something goes wrong. diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_implementation/g1/g1RemSet.cpp --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp Mon Mar 09 11:32:57 2009 -0400 @@ -177,11 +177,19 @@ _cards_scanned(NULL), _total_cards_scanned(0) { _seq_task = new SubTasksDone(NumSeqTasks); - _new_refs = NEW_C_HEAP_ARRAY(GrowableArray*, ParallelGCThreads); + guarantee(n_workers() > 0, "There should be some workers"); + _new_refs = NEW_C_HEAP_ARRAY(GrowableArray*, n_workers()); + for (uint i = 0; i < n_workers(); i++) { + _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray(8192,true); + } } HRInto_G1RemSet::~HRInto_G1RemSet() { delete _seq_task; + for (uint i = 0; i < n_workers(); i++) { + delete _new_refs[i]; + } + FREE_C_HEAP_ARRAY(GrowableArray*, _new_refs); } void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) { @@ -281,8 +289,9 @@ if (!_ct_bs->is_card_claimed(card_index) && !_ct_bs->is_card_dirty(card_index)) { assert(_ct_bs->is_card_clean(card_index) || - _ct_bs->is_card_claimed(card_index), - "Card is either dirty, clean, or claimed"); + _ct_bs->is_card_claimed(card_index) || + _ct_bs->is_card_deferred(card_index), + "Card is either clean, claimed or deferred"); if (_ct_bs->claim_card(card_index)) scanCard(card_index, card_region); } @@ -338,14 +347,12 @@ _g1p->record_scan_rs_start_time(worker_i, rs_time_start * 1000.0); _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0); - if (ParallelGCThreads > 0) { - // In this case, we called scanNewRefsRS and recorded the corresponding - // time. - double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i); - if (scan_new_refs_time_ms > 0.0) { - closure_app_time_ms += scan_new_refs_time_ms; - } + + double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i); + if (scan_new_refs_time_ms > 0.0) { + closure_app_time_ms += scan_new_refs_time_ms; } + _g1p->record_obj_copy_time(worker_i, closure_app_time_ms); } @@ -469,8 +476,8 @@ double scan_new_refs_start_sec = os::elapsedTime(); G1CollectedHeap* g1h = G1CollectedHeap::heap(); CardTableModRefBS* ct_bs = (CardTableModRefBS*) (g1h->barrier_set()); - while (_new_refs[worker_i]->is_nonempty()) { - oop* p = _new_refs[worker_i]->pop(); + for (int i = 0; i < _new_refs[worker_i]->length(); i++) { + oop* p = _new_refs[worker_i]->at(i); oop obj = *p; // *p was in the collection set when p was pushed on "_new_refs", but // another thread may have processed this location from an RS, so it @@ -480,10 +487,6 @@ HeapRegion* r = g1h->heap_region_containing(p); DEBUG_ONLY(HeapRegion* to = g1h->heap_region_containing(obj)); - assert(ParallelGCThreads > 1 - || to->rem_set()->contains_reference(p), - "Invariant: pushed after being added." - "(Not reliable in parallel code.)"); oc->set_region(r); // If "p" has already been processed concurrently, this is // idempotent. @@ -538,8 +541,8 @@ } } else { assert(worker_i == 0, "invariant"); - updateRS(0); + scanNewRefsRS(oc, 0); scanRS(oc, 0); } } @@ -559,11 +562,7 @@ assert(!_par_traversal_in_progress, "Invariant between iterations."); if (ParallelGCThreads > 0) { set_par_traversal(true); - int n_workers = _g1->workers()->total_workers(); - _seq_task->set_par_threads(n_workers); - for (uint i = 0; i < ParallelGCThreads; i++) - _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray(8192,true); - + _seq_task->set_par_threads((int)n_workers()); if (cg1r->do_traversal()) { updateRS(0); // Have to do this again after updaters @@ -587,6 +586,53 @@ } }; +class UpdateRSetOopsIntoCSImmediate : public OopClosure { + G1CollectedHeap* _g1; +public: + UpdateRSetOopsIntoCSImmediate(G1CollectedHeap* g1) : _g1(g1) { } + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + virtual void do_oop(oop* p) { + HeapRegion* to = _g1->heap_region_containing(*p); + if (to->in_collection_set()) { + if (to->rem_set()->add_reference(p, 0)) { + _g1->schedule_popular_region_evac(to); + } + } + } +}; + +class UpdateRSetOopsIntoCSDeferred : public OopClosure { + G1CollectedHeap* _g1; + CardTableModRefBS* _ct_bs; + DirtyCardQueue* _dcq; +public: + UpdateRSetOopsIntoCSDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) : + _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) { } + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + virtual void do_oop(oop* p) { + oop obj = *p; + if (_g1->obj_in_cs(obj)) { + size_t card_index = _ct_bs->index_for(p); + if (_ct_bs->mark_card_deferred(card_index)) { + _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index)); + } + } + } +}; + +void HRInto_G1RemSet::new_refs_iterate(OopClosure* cl) { + for (size_t i = 0; i < n_workers(); i++) { + for (int j = 0; j < _new_refs[i]->length(); j++) { + oop* p = _new_refs[i]->at(j); + cl->do_oop(p); + } + } +} + void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() { guarantee( _cards_scanned != NULL, "invariant" ); _total_cards_scanned = 0; @@ -609,11 +655,25 @@ if (cg1r->do_traversal()) { cg1r->cg1rThread()->set_do_traversal(false); } - for (uint i = 0; i < ParallelGCThreads; i++) { - delete _new_refs[i]; - } set_par_traversal(false); } + + if (_g1->evacuation_failed()) { + // Restore remembered sets for the regions pointing into + // the collection set. + if (G1DeferredRSUpdate) { + DirtyCardQueue dcq(&_g1->dirty_card_queue_set()); + UpdateRSetOopsIntoCSDeferred deferred_update(_g1, &dcq); + new_refs_iterate(&deferred_update); + } else { + UpdateRSetOopsIntoCSImmediate immediate_update(_g1); + new_refs_iterate(&immediate_update); + } + } + for (uint i = 0; i < n_workers(); i++) { + _new_refs[i]->clear(); + } + assert(!_par_traversal_in_progress, "Invariant between iterations."); } @@ -683,7 +743,8 @@ bool doHeapRegion(HeapRegion* r) { if (!r->in_collection_set() && !r->continuesHumongous() && - !r->is_young()) { + !r->is_young() && + !r->is_survivor()) { _update_rs_oop_cl.set_from(r); UpdateRSObjectClosure update_rs_obj_cl(&_update_rs_oop_cl); @@ -820,7 +881,7 @@ // before all the cards on the region are dirtied. This is unlikely, // and it doesn't happen often, but it can happen. So, the extra // check below filters out those cards. - if (r->is_young()) { + if (r->is_young() || r->is_survivor()) { return; } // While we are processing RSet buffers during the collection, we diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_implementation/g1/g1RemSet.hpp --- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp Mon Mar 09 11:32:57 2009 -0400 @@ -155,6 +155,7 @@ bool _par_traversal_in_progress; void set_par_traversal(bool b); GrowableArray** _new_refs; + void new_refs_iterate(OopClosure* cl); public: // This is called to reset dual hash tables after the gc pause diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp --- a/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp Mon Mar 09 11:32:57 2009 -0400 @@ -31,24 +31,7 @@ } inline void HRInto_G1RemSet::write_ref_nv(HeapRegion* from, oop* p) { - oop obj = *p; - assert(from != NULL && from->is_in_reserved(p), - "p is not in a from"); - HeapRegion* to = _g1->heap_region_containing(obj); - if (from != to && to != NULL) { - if (!to->popular() && !from->is_survivor()) { -#if G1_REM_SET_LOGGING - gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS" - " for region [" PTR_FORMAT ", " PTR_FORMAT ")", - p, obj, - to->bottom(), to->end()); -#endif - assert(to->rem_set() != NULL, "Need per-region 'into' remsets."); - if (to->rem_set()->add_reference(p)) { - _g1->schedule_popular_region_evac(to); - } - } - } + par_write_ref(from, p, 0); } inline void HRInto_G1RemSet::write_ref(HeapRegion* from, oop* p) { @@ -82,7 +65,22 @@ HeapRegion* to = _g1->heap_region_containing(obj); // The test below could be optimized by applying a bit op to to and from. if (to != NULL && from != NULL && from != to) { - if (!to->popular() && !from->is_survivor()) { + bool update_delayed = false; + // There is a tricky infinite loop if we keep pushing + // self forwarding pointers onto our _new_refs list. + // The _par_traversal_in_progress flag is true during the collection pause, + // false during the evacuation failure handing. + if (_par_traversal_in_progress && + to->in_collection_set() && !self_forwarded(obj)) { + _new_refs[tid]->push(p); + // Deferred updates to the Cset are either discarded (in the normal case), + // or processed (if an evacuation failure occurs) at the end + // of the collection. + // See HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do(). + update_delayed = true; + } + + if (!to->popular() && !update_delayed) { #if G1_REM_SET_LOGGING gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS" " for region [" PTR_FORMAT ", " PTR_FORMAT ")", @@ -94,11 +92,5 @@ _g1->schedule_popular_region_evac(to); } } - // There is a tricky infinite loop if we keep pushing - // self forwarding pointers onto our _new_refs list. - if (_par_traversal_in_progress && - to->in_collection_set() && !self_forwarded(obj)) { - _new_refs[tid]->push(p); - } } } diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_implementation/g1/g1_globals.hpp --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Mon Mar 09 11:32:57 2009 -0400 @@ -172,6 +172,9 @@ develop(bool, G1RSBarrierUseQueue, true, \ "If true, use queueing RS barrier") \ \ + develop(bool, G1DeferredRSUpdate, true, \ + "If true, use deferred RS updates") \ + \ develop(bool, G1RSLogCheckCardTable, false, \ "If true, verify that no dirty cards remain after RS log " \ "processing.") \ diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_implementation/g1/ptrQueue.cpp --- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp Mon Mar 09 11:32:57 2009 -0400 @@ -91,15 +91,17 @@ _n_completed_buffers(0), _process_completed_threshold(0), _process_completed(false), _buf_free_list(NULL), _buf_free_list_sz(0) -{} +{ + _fl_owner = this; +} void** PtrQueueSet::allocate_buffer() { assert(_sz > 0, "Didn't set a buffer size."); - MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); - if (_buf_free_list != NULL) { - void** res = _buf_free_list; - _buf_free_list = (void**)_buf_free_list[0]; - _buf_free_list_sz--; + MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag); + if (_fl_owner->_buf_free_list != NULL) { + void** res = _fl_owner->_buf_free_list; + _fl_owner->_buf_free_list = (void**)_fl_owner->_buf_free_list[0]; + _fl_owner->_buf_free_list_sz--; // Just override the next pointer with NULL, just in case we scan this part // of the buffer. res[0] = NULL; @@ -111,10 +113,10 @@ void PtrQueueSet::deallocate_buffer(void** buf) { assert(_sz > 0, "Didn't set a buffer size."); - MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); - buf[0] = (void*)_buf_free_list; - _buf_free_list = buf; - _buf_free_list_sz++; + MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag); + buf[0] = (void*)_fl_owner->_buf_free_list; + _fl_owner->_buf_free_list = buf; + _fl_owner->_buf_free_list_sz++; } void PtrQueueSet::reduce_free_list() { @@ -207,3 +209,58 @@ void PtrQueueSet::set_process_completed_threshold(size_t sz) { _process_completed_threshold = sz; } + +// Merge lists of buffers. Notify waiting threads if the length of the list +// exceeds threshold. The source queue is emptied as a result. The queues +// must share the monitor. +void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) { + assert(_cbl_mon == src->_cbl_mon, "Should share the same lock"); + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); + if (_completed_buffers_tail == NULL) { + assert(_completed_buffers_head == NULL, "Well-formedness"); + _completed_buffers_head = src->_completed_buffers_head; + _completed_buffers_tail = src->_completed_buffers_tail; + } else { + assert(_completed_buffers_head != NULL, "Well formedness"); + if (src->_completed_buffers_head != NULL) { + _completed_buffers_tail->next = src->_completed_buffers_head; + _completed_buffers_tail = src->_completed_buffers_tail; + } + } + _n_completed_buffers += src->_n_completed_buffers; + + src->_n_completed_buffers = 0; + src->_completed_buffers_head = NULL; + src->_completed_buffers_tail = NULL; + + assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL || + _completed_buffers_head != NULL && _completed_buffers_tail != NULL, + "Sanity"); + + if (!_process_completed && + _n_completed_buffers >= _process_completed_threshold) { + _process_completed = true; + if (_notify_when_complete) + _cbl_mon->notify_all(); + } +} + +// Merge free lists of the two queues. The free list of the source +// queue is emptied as a result. The queues must share the same +// mutex that guards free lists. +void PtrQueueSet::merge_freelists(PtrQueueSet* src) { + assert(_fl_lock == src->_fl_lock, "Should share the same lock"); + MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); + if (_buf_free_list != NULL) { + void **p = _buf_free_list; + while (*p != NULL) { + p = (void**)*p; + } + *p = src->_buf_free_list; + } else { + _buf_free_list = src->_buf_free_list; + } + _buf_free_list_sz += src->_buf_free_list_sz; + src->_buf_free_list = NULL; + src->_buf_free_list_sz = 0; +} diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_implementation/g1/ptrQueue.hpp --- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp Mon Mar 09 11:32:57 2009 -0400 @@ -155,6 +155,9 @@ Mutex* _fl_lock; void** _buf_free_list; size_t _buf_free_list_sz; + // Queue set can share a freelist. The _fl_owner variable + // specifies the owner. It is set to "this" by default. + PtrQueueSet* _fl_owner; // The size of all buffers in the set. size_t _sz; @@ -188,10 +191,13 @@ // Because of init-order concerns, we can't pass these as constructor // arguments. void initialize(Monitor* cbl_mon, Mutex* fl_lock, - int max_completed_queue = 0) { + int max_completed_queue = 0, + PtrQueueSet *fl_owner = NULL) { _max_completed_queue = max_completed_queue; assert(cbl_mon != NULL && fl_lock != NULL, "Init order issue?"); - _cbl_mon = cbl_mon; _fl_lock = fl_lock; + _cbl_mon = cbl_mon; + _fl_lock = fl_lock; + _fl_owner = (fl_owner != NULL) ? fl_owner : this; } // Return an empty oop array of size _sz (required to be non-zero). @@ -228,4 +234,7 @@ void reduce_free_list(); size_t completed_buffers_num() { return _n_completed_buffers; } + + void merge_bufferlists(PtrQueueSet* src); + void merge_freelists(PtrQueueSet* src); }; diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_implementation/g1/sparsePRT.cpp --- a/src/share/vm/gc_implementation/g1/sparsePRT.cpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_implementation/g1/sparsePRT.cpp Mon Mar 09 11:32:57 2009 -0400 @@ -504,6 +504,7 @@ // Make sure that the current and next tables agree. (Another mechanism // takes care of deleting now-unused tables.) _cur = _next; + set_expanded(false); } void SparsePRT::expand() { diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_implementation/g1/sparsePRT.hpp --- a/src/share/vm/gc_implementation/g1/sparsePRT.hpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_implementation/g1/sparsePRT.hpp Mon Mar 09 11:32:57 2009 -0400 @@ -274,7 +274,7 @@ // Clean up all tables on the expanded list. Called single threaded. static void cleanup_all(); - RSHashTable* next() const { return _next; } + RSHashTable* cur() const { return _cur; } void init_iterator(SparsePRTIter* sprt_iter); @@ -300,7 +300,7 @@ {} void init(const SparsePRT* sprt) { - RSHashTableIter::init(sprt->next()); + RSHashTableIter::init(sprt->cur()); } bool has_next(size_t& card_index) { return RSHashTableIter::has_next(card_index); diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Mon Mar 09 11:32:57 2009 -0400 @@ -125,6 +125,8 @@ perm_gen->verify_object_start_array(); } + heap->pre_full_gc_dump(); + // Filled in below to track the state of the young gen after the collection. bool eden_empty; bool survivors_empty; @@ -363,6 +365,8 @@ Universe::print_heap_after_gc(); } + heap->post_full_gc_dump(); + #ifdef TRACESPINNING ParallelTaskTerminator::print_termination_counts(); #endif diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Mon Mar 09 11:32:57 2009 -0400 @@ -1982,6 +1982,8 @@ heap->record_gen_tops_before_GC(); } + heap->pre_full_gc_dump(); + _print_phases = PrintGCDetails && PrintParallelOldGCPhaseTimes; // Make sure data structures are sane, make the heap parsable, and do other @@ -2204,6 +2206,8 @@ gc_task_manager()->print_task_time_stamps(); } + heap->post_full_gc_dump(); + #ifdef TRACESPINNING ParallelTaskTerminator::print_termination_counts(); #endif diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_implementation/shared/vmGCOperations.cpp --- a/src/share/vm/gc_implementation/shared/vmGCOperations.cpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_implementation/shared/vmGCOperations.cpp Mon Mar 09 11:32:57 2009 -0400 @@ -121,7 +121,7 @@ // make the heap parsable (no need to retire TLABs) ch->ensure_parsability(false); } - HeapInspection::heap_inspection(_out); + HeapInspection::heap_inspection(_out, _need_prologue /* need_prologue */); } diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_implementation/shared/vmGCOperations.hpp --- a/src/share/vm/gc_implementation/shared/vmGCOperations.hpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_implementation/shared/vmGCOperations.hpp Mon Mar 09 11:32:57 2009 -0400 @@ -112,13 +112,16 @@ private: outputStream* _out; bool _full_gc; + bool _need_prologue; public: - VM_GC_HeapInspection(outputStream* out, bool request_full_gc) : + VM_GC_HeapInspection(outputStream* out, bool request_full_gc, + bool need_prologue) : VM_GC_Operation(0 /* total collections, dummy, ignored */, 0 /* total full collections, dummy, ignored */, request_full_gc) { _out = out; _full_gc = request_full_gc; + _need_prologue = need_prologue; } ~VM_GC_HeapInspection() {} diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_interface/collectedHeap.cpp --- a/src/share/vm/gc_interface/collectedHeap.cpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_interface/collectedHeap.cpp Mon Mar 09 11:32:57 2009 -0400 @@ -294,3 +294,29 @@ ThreadLocalAllocBuffer::resize_all_tlabs(); } } + +void CollectedHeap::pre_full_gc_dump() { + if (HeapDumpBeforeFullGC) { + TraceTime tt("Heap Dump: ", PrintGCDetails, false, gclog_or_tty); + // We are doing a "major" collection and a heap dump before + // major collection has been requested. + HeapDumper::dump_heap(); + } + if (PrintClassHistogramBeforeFullGC) { + TraceTime tt("Class Histogram: ", PrintGCDetails, true, gclog_or_tty); + VM_GC_HeapInspection inspector(gclog_or_tty, false /* ! full gc */, false /* ! prologue */); + inspector.doit(); + } +} + +void CollectedHeap::post_full_gc_dump() { + if (HeapDumpAfterFullGC) { + TraceTime tt("Heap Dump", PrintGCDetails, false, gclog_or_tty); + HeapDumper::dump_heap(); + } + if (PrintClassHistogramAfterFullGC) { + TraceTime tt("Class Histogram", PrintGCDetails, true, gclog_or_tty); + VM_GC_HeapInspection inspector(gclog_or_tty, false /* ! full gc */, false /* ! prologue */); + inspector.doit(); + } +} diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/gc_interface/collectedHeap.hpp --- a/src/share/vm/gc_interface/collectedHeap.hpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/gc_interface/collectedHeap.hpp Mon Mar 09 11:32:57 2009 -0400 @@ -514,6 +514,10 @@ // Perform any cleanup actions necessary before allowing a verification. virtual void prepare_for_verify() = 0; + // Generate any dumps preceding or following a full gc + void pre_full_gc_dump(); + void post_full_gc_dump(); + virtual void print() const = 0; virtual void print_on(outputStream* st) const = 0; diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/includeDB_gc --- a/src/share/vm/includeDB_gc Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/includeDB_gc Mon Mar 09 11:32:57 2009 -0400 @@ -26,10 +26,12 @@ collectedHeap.cpp collectedHeap.hpp collectedHeap.cpp collectedHeap.inline.hpp +collectedHeap.cpp heapDumper.hpp collectedHeap.cpp init.hpp collectedHeap.cpp oop.inline.hpp collectedHeap.cpp systemDictionary.hpp collectedHeap.cpp thread_.inline.hpp +collectedHeap.cpp vmGCOperations.hpp collectedHeap.hpp allocation.hpp collectedHeap.hpp barrierSet.hpp diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/memory/cardTableModRefBS.cpp --- a/src/share/vm/memory/cardTableModRefBS.cpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/memory/cardTableModRefBS.cpp Mon Mar 09 11:32:57 2009 -0400 @@ -356,18 +356,62 @@ inline_write_ref_field(field, newVal); } +/* + Claimed and deferred bits are used together in G1 during the evacuation + pause. These bits can have the following state transitions: + 1. The claimed bit can be put over any other card state. Except that + the "dirty -> dirty and claimed" transition is checked for in + G1 code and is not used. + 2. Deferred bit can be set only if the previous state of the card + was either clean or claimed. mark_card_deferred() is wait-free. + We do not care if the operation is be successful because if + it does not it will only result in duplicate entry in the update + buffer because of the "cache-miss". So it's not worth spinning. + */ + bool CardTableModRefBS::claim_card(size_t card_index) { jbyte val = _byte_map[card_index]; - if (val != claimed_card_val()) { - jbyte res = Atomic::cmpxchg((jbyte) claimed_card_val(), &_byte_map[card_index], val); - if (res == val) + assert(val != dirty_card_val(), "Shouldn't claim a dirty card"); + while (val == clean_card_val() || + (val & (clean_card_mask_val() | claimed_card_val())) != claimed_card_val()) { + jbyte new_val = val; + if (val == clean_card_val()) { + new_val = (jbyte)claimed_card_val(); + } else { + new_val = val | (jbyte)claimed_card_val(); + } + jbyte res = Atomic::cmpxchg(new_val, &_byte_map[card_index], val); + if (res == val) { return true; - else return false; + } + val = res; } return false; } +bool CardTableModRefBS::mark_card_deferred(size_t card_index) { + jbyte val = _byte_map[card_index]; + // It's already processed + if ((val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val()) { + return false; + } + // Cached bit can be installed either on a clean card or on a claimed card. + jbyte new_val = val; + if (val == clean_card_val()) { + new_val = (jbyte)deferred_card_val(); + } else { + if (val & claimed_card_val()) { + new_val = val | (jbyte)deferred_card_val(); + } + } + if (new_val != val) { + Atomic::cmpxchg(new_val, &_byte_map[card_index], val); + } + return true; +} + + void CardTableModRefBS::non_clean_card_iterate(Space* sp, MemRegion mr, DirtyCardToOopClosure* dcto_cl, diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/memory/cardTableModRefBS.hpp --- a/src/share/vm/memory/cardTableModRefBS.hpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/memory/cardTableModRefBS.hpp Mon Mar 09 11:32:57 2009 -0400 @@ -52,11 +52,15 @@ enum CardValues { clean_card = -1, + // The mask contains zeros in places for all other values. + clean_card_mask = clean_card - 31, + dirty_card = 0, precleaned_card = 1, - claimed_card = 3, - last_card = 4, - CT_MR_BS_last_reserved = 10 + claimed_card = 2, + deferred_card = 4, + last_card = 8, + CT_MR_BS_last_reserved = 16 }; // dirty and precleaned are equivalent wrt younger_refs_iter. @@ -254,9 +258,11 @@ }; static int clean_card_val() { return clean_card; } + static int clean_card_mask_val() { return clean_card_mask; } static int dirty_card_val() { return dirty_card; } static int claimed_card_val() { return claimed_card; } static int precleaned_card_val() { return precleaned_card; } + static int deferred_card_val() { return deferred_card; } // For RTTI simulation. bool is_a(BarrierSet::Name bsn) { @@ -329,7 +335,8 @@ } bool is_card_claimed(size_t card_index) { - return _byte_map[card_index] == claimed_card_val(); + jbyte val = _byte_map[card_index]; + return (val & (clean_card_mask_val() | claimed_card_val())) == claimed_card_val(); } bool claim_card(size_t card_index); @@ -338,6 +345,13 @@ return _byte_map[card_index] == clean_card_val(); } + bool is_card_deferred(size_t card_index) { + jbyte val = _byte_map[card_index]; + return (val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val(); + } + + bool mark_card_deferred(size_t card_index); + // Card marking array base (adjusted for heap low boundary) // This would be the 0th element of _byte_map, if the heap started at 0x0. // But since the heap starts at some higher address, this points to somewhere @@ -434,6 +448,10 @@ return byte_for(p) - _byte_map; } + const jbyte* byte_for_index(const size_t card_index) const { + return _byte_map + card_index; + } + void verify(); void verify_guard(); diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/memory/genCollectedHeap.cpp --- a/src/share/vm/memory/genCollectedHeap.cpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/memory/genCollectedHeap.cpp Mon Mar 09 11:32:57 2009 -0400 @@ -456,6 +456,9 @@ int max_level_collected = starting_level; for (int i = starting_level; i <= max_level; i++) { if (_gens[i]->should_collect(full, size, is_tlab)) { + if (i == n_gens() - 1) { // a major collection is to happen + pre_full_gc_dump(); // do any pre full gc dumps + } // Timer for individual generations. Last argument is false: no CR TraceTime t1(_gens[i]->short_name(), PrintGCDetails, false, gclog_or_tty); TraceCollectorStats tcs(_gens[i]->counters()); @@ -573,6 +576,10 @@ // a whole heap collection. complete = complete || (max_level_collected == n_gens() - 1); + if (complete) { // We did a "major" collection + post_full_gc_dump(); // do any post full gc dumps + } + if (PrintGCDetails) { print_heap_change(gch_prev_used); diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/memory/heapInspection.cpp --- a/src/share/vm/memory/heapInspection.cpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/memory/heapInspection.cpp Mon Mar 09 11:32:57 2009 -0400 @@ -233,7 +233,7 @@ size_t missed_count() { return _missed_count; } }; -void HeapInspection::heap_inspection(outputStream* st) { +void HeapInspection::heap_inspection(outputStream* st, bool need_prologue) { ResourceMark rm; HeapWord* ref; @@ -244,7 +244,9 @@ case CollectedHeap::GenCollectedHeap: { is_shared_heap = true; SharedHeap* sh = (SharedHeap*)heap; - sh->gc_prologue(false /* !full */); // get any necessary locks, etc. + if (need_prologue) { + sh->gc_prologue(false /* !full */); // get any necessary locks, etc. + } ref = sh->perm_gen()->used_region().start(); break; } @@ -290,7 +292,7 @@ } st->flush(); - if (is_shared_heap) { + if (need_prologue && is_shared_heap) { SharedHeap* sh = (SharedHeap*)heap; sh->gc_epilogue(false /* !full */); // release all acquired locks, etc. } diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/memory/heapInspection.hpp --- a/src/share/vm/memory/heapInspection.hpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/memory/heapInspection.hpp Mon Mar 09 11:32:57 2009 -0400 @@ -127,6 +127,6 @@ class HeapInspection : public AllStatic { public: - static void heap_inspection(outputStream* st) KERNEL_RETURN; + static void heap_inspection(outputStream* st, bool need_prologue) KERNEL_RETURN; static void find_instances_at_safepoint(klassOop k, GrowableArray* result) KERNEL_RETURN; }; diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/runtime/globals.hpp Mon Mar 09 11:32:57 2009 -0400 @@ -662,6 +662,12 @@ product(ccstrlist, OnOutOfMemoryError, "", \ "Run user-defined commands on first java.lang.OutOfMemoryError") \ \ + manageable(bool, HeapDumpBeforeFullGC, false, \ + "Dump heap to file before any major stop-world GC") \ + \ + manageable(bool, HeapDumpAfterFullGC, false, \ + "Dump heap to file after any major stop-world GC") \ + \ manageable(bool, HeapDumpOnOutOfMemoryError, false, \ "Dump heap to file when java.lang.OutOfMemoryError is thrown") \ \ @@ -1971,6 +1977,12 @@ product(bool, PrintHeapAtSIGBREAK, true, \ "Print heap layout in response to SIGBREAK") \ \ + manageable(bool, PrintClassHistogramBeforeFullGC, false, \ + "Print a class histogram before any major stop-world GC") \ + \ + manageable(bool, PrintClassHistogramAfterFullGC, false, \ + "Print a class histogram after any major stop-world GC") \ + \ manageable(bool, PrintClassHistogram, false, \ "Print a histogram of class instances") \ \ diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/runtime/os.cpp --- a/src/share/vm/runtime/os.cpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/runtime/os.cpp Mon Mar 09 11:32:57 2009 -0400 @@ -207,7 +207,8 @@ VMThread::execute(&op1); Universe::print_heap_at_SIGBREAK(); if (PrintClassHistogram) { - VM_GC_HeapInspection op1(gclog_or_tty, true /* force full GC before heap inspection */); + VM_GC_HeapInspection op1(gclog_or_tty, true /* force full GC before heap inspection */, + true /* need_prologue */); VMThread::execute(&op1); } if (JvmtiExport::should_post_data_dump()) { diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/services/attachListener.cpp --- a/src/share/vm/services/attachListener.cpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/services/attachListener.cpp Mon Mar 09 11:32:57 2009 -0400 @@ -194,7 +194,7 @@ } live_objects_only = strcmp(arg0, "-live") == 0; } - VM_GC_HeapInspection heapop(out, live_objects_only /* request gc */); + VM_GC_HeapInspection heapop(out, live_objects_only /* request full gc */, true /* need_prologue */); VMThread::execute(&heapop); return JNI_OK; } diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/services/heapDumper.cpp --- a/src/share/vm/services/heapDumper.cpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/services/heapDumper.cpp Mon Mar 09 11:32:57 2009 -0400 @@ -347,7 +347,6 @@ INITIAL_CLASS_COUNT = 200 }; - // Supports I/O operations on a dump file class DumpWriter : public StackObj { @@ -1303,7 +1302,9 @@ // The VM operation that performs the heap dump class VM_HeapDumper : public VM_GC_Operation { private: - DumpWriter* _writer; + static VM_HeapDumper* _global_dumper; + static DumpWriter* _global_writer; + DumpWriter* _local_writer; bool _gc_before_heap_dump; bool _is_segmented_dump; jlong _dump_start; @@ -1311,8 +1312,20 @@ ThreadStackTrace** _stack_traces; int _num_threads; - // accessors - DumpWriter* writer() const { return _writer; } + // accessors and setters + static VM_HeapDumper* dumper() { assert(_global_dumper != NULL, "Error"); return _global_dumper; } + static DumpWriter* writer() { assert(_global_writer != NULL, "Error"); return _global_writer; } + void set_global_dumper() { + assert(_global_dumper == NULL, "Error"); + _global_dumper = this; + } + void set_global_writer() { + assert(_global_writer == NULL, "Error"); + _global_writer = _local_writer; + } + void clear_global_dumper() { _global_dumper = NULL; } + void clear_global_writer() { _global_writer = NULL; } + bool is_segmented_dump() const { return _is_segmented_dump; } void set_segmented_dump() { _is_segmented_dump = true; } jlong dump_start() const { return _dump_start; } @@ -1357,7 +1370,7 @@ VM_GC_Operation(0 /* total collections, dummy, ignored */, 0 /* total full collections, dummy, ignored */, gc_before_heap_dump) { - _writer = writer; + _local_writer = writer; _gc_before_heap_dump = gc_before_heap_dump; _is_segmented_dump = false; _dump_start = (jlong)-1; @@ -1381,6 +1394,9 @@ void doit(); }; +VM_HeapDumper* VM_HeapDumper::_global_dumper = NULL; +DumpWriter* VM_HeapDumper::_global_writer = NULL; + bool VM_HeapDumper::skip_operation() const { return false; } @@ -1479,31 +1495,28 @@ void VM_HeapDumper::do_load_class(klassOop k) { static u4 class_serial_num = 0; - VM_HeapDumper* dumper = ((VM_HeapDumper*)VMThread::vm_operation()); - DumpWriter* writer = dumper->writer(); - // len of HPROF_LOAD_CLASS record u4 remaining = 2*oopSize + 2*sizeof(u4); // write a HPROF_LOAD_CLASS for the class and each array class do { - DumperSupport::write_header(writer, HPROF_LOAD_CLASS, remaining); + DumperSupport::write_header(writer(), HPROF_LOAD_CLASS, remaining); // class serial number is just a number - writer->write_u4(++class_serial_num); + writer()->write_u4(++class_serial_num); // class ID Klass* klass = Klass::cast(k); - writer->write_classID(klass); + writer()->write_classID(klass); // add the klassOop and class serial number pair - dumper->add_class_serial_number(klass, class_serial_num); + dumper()->add_class_serial_number(klass, class_serial_num); - writer->write_u4(STACK_TRACE_ID); + writer()->write_u4(STACK_TRACE_ID); // class name ID symbolOop name = klass->name(); - writer->write_objectID(name); + writer()->write_objectID(name); // write a LOAD_CLASS record for the array type (if it exists) k = klass->array_klass_or_null(); @@ -1512,17 +1525,13 @@ // writes a HPROF_GC_CLASS_DUMP record for the given class void VM_HeapDumper::do_class_dump(klassOop k) { - VM_HeapDumper* dumper = ((VM_HeapDumper*)VMThread::vm_operation()); - DumpWriter* writer = dumper->writer(); - DumperSupport::dump_class_and_array_classes(writer, k); + DumperSupport::dump_class_and_array_classes(writer(), k); } // writes a HPROF_GC_CLASS_DUMP records for a given basic type // array (and each multi-dimensional array too) void VM_HeapDumper::do_basic_type_array_class_dump(klassOop k) { - VM_HeapDumper* dumper = ((VM_HeapDumper*)VMThread::vm_operation()); - DumpWriter* writer = dumper->writer(); - DumperSupport::dump_basic_type_array_class(writer, k); + DumperSupport::dump_basic_type_array_class(writer(), k); } // Walk the stack of the given thread. @@ -1658,6 +1667,11 @@ ch->ensure_parsability(false); } + // At this point we should be the only dumper active, so + // the following should be safe. + set_global_dumper(); + set_global_writer(); + // Write the file header - use 1.0.2 for large heaps, otherwise 1.0.1 size_t used = ch->used(); const char* header; @@ -1667,6 +1681,7 @@ } else { header = "JAVA PROFILE 1.0.1"; } + // header is few bytes long - no chance to overflow int writer()->write_raw((void*)header, (int)strlen(header)); writer()->write_u1(0); // terminator @@ -1723,6 +1738,10 @@ // fixes up the length of the dump record. In the case of a segmented // heap then the HPROF_HEAP_DUMP_END record is also written. end_of_dump(); + + // Now we clear the global variables, so that a future dumper might run. + clear_global_dumper(); + clear_global_writer(); } void VM_HeapDumper::dump_stack_traces() { @@ -1790,7 +1809,12 @@ // generate the dump VM_HeapDumper dumper(&writer, _gc_before_heap_dump); - VMThread::execute(&dumper); + if (Thread::current()->is_VM_thread()) { + assert(SafepointSynchronize::is_at_safepoint(), "Expected to be called at a safepoint"); + dumper.doit(); + } else { + VMThread::execute(&dumper); + } // close dump file and record any error that the writer may have encountered writer.close(); @@ -1845,49 +1869,68 @@ } } - -// Called by error reporting +// Called by error reporting by a single Java thread outside of a JVM safepoint, +// or by heap dumping by the VM thread during a (GC) safepoint. Thus, these various +// callers are strictly serialized and guaranteed not to interfere below. For more +// general use, however, this method will need modification to prevent +// inteference when updating the static variables base_path and dump_file_seq below. void HeapDumper::dump_heap() { - static char path[JVM_MAXPATHLEN]; + static char base_path[JVM_MAXPATHLEN] = {'\0'}; + static uint dump_file_seq = 0; + char my_path[JVM_MAXPATHLEN] = {'\0'}; // The dump file defaults to java_pid.hprof in the current working // directory. HeapDumpPath= can be used to specify an alternative // dump file name or a directory where dump file is created. - bool use_default_filename = true; - if (HeapDumpPath == NULL || HeapDumpPath[0] == '\0') { - path[0] = '\0'; // HeapDumpPath= not specified - } else { - assert(strlen(HeapDumpPath) < sizeof(path), "HeapDumpPath too long"); - strcpy(path, HeapDumpPath); - // check if the path is a directory (must exist) - DIR* dir = os::opendir(path); - if (dir == NULL) { - use_default_filename = false; + if (dump_file_seq == 0) { // first time in, we initialize base_path + bool use_default_filename = true; + if (HeapDumpPath == NULL || HeapDumpPath[0] == '\0') { + // HeapDumpPath= not specified } else { - // HeapDumpPath specified a directory. We append a file separator - // (if needed). - os::closedir(dir); - size_t fs_len = strlen(os::file_separator()); - if (strlen(path) >= fs_len) { - char* end = path; - end += (strlen(path) - fs_len); - if (strcmp(end, os::file_separator()) != 0) { - assert(strlen(path) + strlen(os::file_separator()) < sizeof(path), - "HeapDumpPath too long"); - strcat(path, os::file_separator()); + assert(strlen(HeapDumpPath) < sizeof(base_path), "HeapDumpPath too long"); + strcpy(base_path, HeapDumpPath); + // check if the path is a directory (must exist) + DIR* dir = os::opendir(base_path); + if (dir == NULL) { + use_default_filename = false; + } else { + // HeapDumpPath specified a directory. We append a file separator + // (if needed). + os::closedir(dir); + size_t fs_len = strlen(os::file_separator()); + if (strlen(base_path) >= fs_len) { + char* end = base_path; + end += (strlen(base_path) - fs_len); + if (strcmp(end, os::file_separator()) != 0) { + assert(strlen(base_path) + strlen(os::file_separator()) < sizeof(base_path), + "HeapDumpPath too long"); + strcat(base_path, os::file_separator()); + } } } } + // If HeapDumpPath wasn't a file name then we append the default name + if (use_default_filename) { + char fn[32]; + sprintf(fn, "java_pid%d", os::current_process_id()); + assert(strlen(base_path) + strlen(fn) < sizeof(base_path), "HeapDumpPath too long"); + strcat(base_path, fn); + } + assert(strlen(base_path) < sizeof(my_path), "Buffer too small"); + strcpy(my_path, base_path); + } else { + // Append a sequence number id for dumps following the first + char fn[33]; + sprintf(fn, ".%d", dump_file_seq); + assert(strlen(base_path) + strlen(fn) < sizeof(my_path), "HeapDumpPath too long"); + strcpy(my_path, base_path); + strcat(my_path, fn); } - // If HeapDumpPath wasn't a file name then we append the default name - if (use_default_filename) { - char fn[32]; - sprintf(fn, "java_pid%d.hprof", os::current_process_id()); - assert(strlen(path) + strlen(fn) < sizeof(path), "HeapDumpPath too long"); - strcat(path, fn); - } + dump_file_seq++; // increment seq number for next time we dump + assert(strlen(".hprof") + strlen(my_path) < sizeof(my_path), "HeapDumpPath too long"); + strcat(my_path, ".hprof"); HeapDumper dumper(false /* no GC before heap dump */, true /* send to tty */); - dumper.dump(path); + dumper.dump(my_path); } diff -r 3db67f76d308 -r bcedf688d882 src/share/vm/services/heapDumper.hpp --- a/src/share/vm/services/heapDumper.hpp Thu Mar 05 22:07:29 2009 -0500 +++ b/src/share/vm/services/heapDumper.hpp Mon Mar 09 11:32:57 2009 -0400 @@ -53,7 +53,7 @@ public: HeapDumper(bool gc_before_heap_dump) : - _gc_before_heap_dump(gc_before_heap_dump), _error(NULL), _print_to_tty(false) { } + _gc_before_heap_dump(gc_before_heap_dump), _error(NULL), _print_to_tty(false) { } HeapDumper(bool gc_before_heap_dump, bool print_to_tty) : _gc_before_heap_dump(gc_before_heap_dump), _error(NULL), _print_to_tty(print_to_tty) { }