# HG changeset patch # User ysr # Date 1303352370 25200 # Node ID c48ad6ab8bdfd9f7f084d986d02b246526f57244 # Parent 49a67202bc671d23d719c9e14752d80295a9e8f6 7037276: Unnecessary double traversal of dirty card windows Summary: Short-circuited an unnecessary double traversal of dirty card windows when iterating younger refs. Also renamed some cardtable methods for more clarity. Reviewed-by: jmasa, stefank, poonam diff -r 49a67202bc67 -r c48ad6ab8bdf src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp --- a/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp Tue Apr 19 15:46:59 2011 -0400 +++ b/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp Wed Apr 20 19:19:30 2011 -0700 @@ -33,44 +33,43 @@ #include "runtime/mutexLocker.hpp" #include "runtime/virtualspace.hpp" -void CardTableModRefBS::par_non_clean_card_iterate_work(Space* sp, MemRegion mr, - DirtyCardToOopClosure* dcto_cl, - MemRegionClosure* cl, - int n_threads) { - if (n_threads > 0) { - assert((n_threads == 1 && ParallelGCThreads == 0) || - n_threads <= (int)ParallelGCThreads, - "# worker threads != # requested!"); - // Make sure the LNC array is valid for the space. - jbyte** lowest_non_clean; - uintptr_t lowest_non_clean_base_chunk_index; - size_t lowest_non_clean_chunk_size; - get_LNC_array_for_space(sp, lowest_non_clean, - lowest_non_clean_base_chunk_index, - lowest_non_clean_chunk_size); +void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr, + DirtyCardToOopClosure* dcto_cl, + ClearNoncleanCardWrapper* cl, + int n_threads) { + assert(n_threads > 0, "Error: expected n_threads > 0"); + assert((n_threads == 1 && ParallelGCThreads == 0) || + n_threads <= (int)ParallelGCThreads, + "# worker threads != # requested!"); + // Make sure the LNC array is valid for the space. + jbyte** lowest_non_clean; + uintptr_t lowest_non_clean_base_chunk_index; + size_t lowest_non_clean_chunk_size; + get_LNC_array_for_space(sp, lowest_non_clean, + lowest_non_clean_base_chunk_index, + lowest_non_clean_chunk_size); - int n_strides = n_threads * StridesPerThread; - SequentialSubTasksDone* pst = sp->par_seq_tasks(); - pst->set_n_threads(n_threads); - pst->set_n_tasks(n_strides); + int n_strides = n_threads * StridesPerThread; + SequentialSubTasksDone* pst = sp->par_seq_tasks(); + pst->set_n_threads(n_threads); + pst->set_n_tasks(n_strides); - int stride = 0; - while (!pst->is_task_claimed(/* reference */ stride)) { - process_stride(sp, mr, stride, n_strides, dcto_cl, cl, - lowest_non_clean, - lowest_non_clean_base_chunk_index, - lowest_non_clean_chunk_size); - } - if (pst->all_tasks_completed()) { - // Clear lowest_non_clean array for next time. - intptr_t first_chunk_index = addr_to_chunk_index(mr.start()); - uintptr_t last_chunk_index = addr_to_chunk_index(mr.last()); - for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) { - intptr_t ind = ch - lowest_non_clean_base_chunk_index; - assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size, - "Bounds error"); - lowest_non_clean[ind] = NULL; - } + int stride = 0; + while (!pst->is_task_claimed(/* reference */ stride)) { + process_stride(sp, mr, stride, n_strides, dcto_cl, cl, + lowest_non_clean, + lowest_non_clean_base_chunk_index, + lowest_non_clean_chunk_size); + } + if (pst->all_tasks_completed()) { + // Clear lowest_non_clean array for next time. + intptr_t first_chunk_index = addr_to_chunk_index(mr.start()); + uintptr_t last_chunk_index = addr_to_chunk_index(mr.last()); + for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) { + intptr_t ind = ch - lowest_non_clean_base_chunk_index; + assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size, + "Bounds error"); + lowest_non_clean[ind] = NULL; } } } @@ -81,7 +80,7 @@ MemRegion used, jint stride, int n_strides, DirtyCardToOopClosure* dcto_cl, - MemRegionClosure* cl, + ClearNoncleanCardWrapper* cl, jbyte** lowest_non_clean, uintptr_t lowest_non_clean_base_chunk_index, size_t lowest_non_clean_chunk_size) { @@ -127,7 +126,11 @@ lowest_non_clean_base_chunk_index, lowest_non_clean_chunk_size); - non_clean_card_iterate_work(chunk_mr, cl); + // We do not call the non_clean_card_iterate_serial() version because + // we want to clear the cards, and the ClearNoncleanCardWrapper closure + // itself does the work of finding contiguous dirty ranges of cards to + // process (and clear). + cl->do_MemRegion(chunk_mr); // Find the next chunk of the stride. chunk_card_start += CardsPerStrideChunk * n_strides; diff -r 49a67202bc67 -r c48ad6ab8bdf src/share/vm/memory/cardTableModRefBS.cpp --- a/src/share/vm/memory/cardTableModRefBS.cpp Tue Apr 19 15:46:59 2011 -0400 +++ b/src/share/vm/memory/cardTableModRefBS.cpp Wed Apr 20 19:19:30 2011 -0700 @@ -456,31 +456,35 @@ } -void CardTableModRefBS::non_clean_card_iterate(Space* sp, - MemRegion mr, - DirtyCardToOopClosure* dcto_cl, - MemRegionClosure* cl) { +void CardTableModRefBS::non_clean_card_iterate_possibly_parallel(Space* sp, + MemRegion mr, + DirtyCardToOopClosure* dcto_cl, + ClearNoncleanCardWrapper* cl) { if (!mr.is_empty()) { int n_threads = SharedHeap::heap()->n_par_threads(); if (n_threads > 0) { #ifndef SERIALGC - par_non_clean_card_iterate_work(sp, mr, dcto_cl, cl, n_threads); + non_clean_card_iterate_parallel_work(sp, mr, dcto_cl, cl, n_threads); #else // SERIALGC fatal("Parallel gc not supported here."); #endif // SERIALGC } else { - non_clean_card_iterate_work(mr, cl); + // We do not call the non_clean_card_iterate_serial() version below because + // we want to clear the cards (which non_clean_card_iterate_serial() does not + // do for us), and the ClearNoncleanCardWrapper closure itself does the work + // of finding contiguous dirty ranges of cards to process (and clear). + cl->do_MemRegion(mr); } } } -// NOTE: For this to work correctly, it is important that -// we look for non-clean cards below (so as to catch those -// marked precleaned), rather than look explicitly for dirty -// cards (and miss those marked precleaned). In that sense, -// the name precleaned is currently somewhat of a misnomer. -void CardTableModRefBS::non_clean_card_iterate_work(MemRegion mr, - MemRegionClosure* cl) { +// The iterator itself is not MT-aware, but +// MT-aware callers and closures can use this to +// accomplish dirty card iteration in parallel. The +// iterator itself does not clear the dirty cards, or +// change their values in any manner. +void CardTableModRefBS::non_clean_card_iterate_serial(MemRegion mr, + MemRegionClosure* cl) { for (int i = 0; i < _cur_covered_regions; i++) { MemRegion mri = mr.intersection(_covered[i]); if (mri.word_size() > 0) { @@ -661,7 +665,7 @@ void CardTableModRefBS::verify_clean_region(MemRegion mr) { GuaranteeNotModClosure blk(this); - non_clean_card_iterate_work(mr, &blk); + non_clean_card_iterate_serial(mr, &blk); } // To verify a MemRegion is entirely dirty this closure is passed to diff -r 49a67202bc67 -r c48ad6ab8bdf src/share/vm/memory/cardTableModRefBS.hpp --- a/src/share/vm/memory/cardTableModRefBS.hpp Tue Apr 19 15:46:59 2011 -0400 +++ b/src/share/vm/memory/cardTableModRefBS.hpp Wed Apr 20 19:19:30 2011 -0700 @@ -44,6 +44,7 @@ class Generation; class OopsInGenClosure; class DirtyCardToOopClosure; +class ClearNoncleanCardWrapper; class CardTableModRefBS: public ModRefBarrierSet { // Some classes get to look at some private stuff. @@ -165,22 +166,28 @@ // Iterate over the portion of the card-table which covers the given // region mr in the given space and apply cl to any dirty sub-regions - // of mr. cl and dcto_cl must either be the same closure or cl must - // wrap dcto_cl. Both are required - neither may be NULL. Also, dcto_cl - // may be modified. Note that this function will operate in a parallel - // mode if worker threads are available. - void non_clean_card_iterate(Space* sp, MemRegion mr, - DirtyCardToOopClosure* dcto_cl, - MemRegionClosure* cl); + // of mr. Dirty cards are _not_ cleared by the iterator method itself, + // but closures may arrange to do so on their own should they so wish. + void non_clean_card_iterate_serial(MemRegion mr, MemRegionClosure* cl); - // Utility function used to implement the other versions below. - void non_clean_card_iterate_work(MemRegion mr, MemRegionClosure* cl); + // A variant of the above that will operate in a parallel mode if + // worker threads are available, and clear the dirty cards as it + // processes them. + // ClearNoncleanCardWrapper cl must wrap the DirtyCardToOopClosure dcto_cl, + // which may itself be modified by the method. + void non_clean_card_iterate_possibly_parallel(Space* sp, MemRegion mr, + DirtyCardToOopClosure* dcto_cl, + ClearNoncleanCardWrapper* cl); - void par_non_clean_card_iterate_work(Space* sp, MemRegion mr, - DirtyCardToOopClosure* dcto_cl, - MemRegionClosure* cl, - int n_threads); + private: + // Work method used to implement non_clean_card_iterate_possibly_parallel() + // above in the parallel case. + void non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr, + DirtyCardToOopClosure* dcto_cl, + ClearNoncleanCardWrapper* cl, + int n_threads); + protected: // Dirty the bytes corresponding to "mr" (not all of which must be // covered.) void dirty_MemRegion(MemRegion mr); @@ -237,7 +244,7 @@ MemRegion used, jint stride, int n_strides, DirtyCardToOopClosure* dcto_cl, - MemRegionClosure* cl, + ClearNoncleanCardWrapper* cl, jbyte** lowest_non_clean, uintptr_t lowest_non_clean_base_chunk_index, size_t lowest_non_clean_chunk_size); @@ -409,14 +416,14 @@ // marking, where a dirty card may cause scanning, and summarization // marking, of objects that extend onto subsequent cards.) void mod_card_iterate(MemRegionClosure* cl) { - non_clean_card_iterate_work(_whole_heap, cl); + non_clean_card_iterate_serial(_whole_heap, cl); } // Like the "mod_cards_iterate" above, except only invokes the closure // for cards within the MemRegion "mr" (which is required to be // card-aligned and sized.) void mod_card_iterate(MemRegion mr, MemRegionClosure* cl) { - non_clean_card_iterate_work(mr, cl); + non_clean_card_iterate_serial(mr, cl); } static uintx ct_max_alignment_constraint(); @@ -493,4 +500,5 @@ void set_CTRS(CardTableRS* rs) { _rs = rs; } }; + #endif // SHARE_VM_MEMORY_CARDTABLEMODREFBS_HPP diff -r 49a67202bc67 -r c48ad6ab8bdf src/share/vm/memory/cardTableRS.cpp --- a/src/share/vm/memory/cardTableRS.cpp Tue Apr 19 15:46:59 2011 -0400 +++ b/src/share/vm/memory/cardTableRS.cpp Wed Apr 20 19:19:30 2011 -0700 @@ -105,107 +105,111 @@ g->younger_refs_iterate(blk); } -class ClearNoncleanCardWrapper: public MemRegionClosure { - MemRegionClosure* _dirty_card_closure; - CardTableRS* _ct; - bool _is_par; -private: - // Clears the given card, return true if the corresponding card should be - // processed. - bool clear_card(jbyte* entry) { - if (_is_par) { - while (true) { - // In the parallel case, we may have to do this several times. - jbyte entry_val = *entry; - assert(entry_val != CardTableRS::clean_card_val(), - "We shouldn't be looking at clean cards, and this should " - "be the only place they get cleaned."); - if (CardTableRS::card_is_dirty_wrt_gen_iter(entry_val) - || _ct->is_prev_youngergen_card_val(entry_val)) { - jbyte res = - Atomic::cmpxchg(CardTableRS::clean_card_val(), entry, entry_val); - if (res == entry_val) { - break; - } else { - assert(res == CardTableRS::cur_youngergen_and_prev_nonclean_card, - "The CAS above should only fail if another thread did " - "a GC write barrier."); - } - } else if (entry_val == - CardTableRS::cur_youngergen_and_prev_nonclean_card) { - // Parallelism shouldn't matter in this case. Only the thread - // assigned to scan the card should change this value. - *entry = _ct->cur_youngergen_card_val(); - break; - } else { - assert(entry_val == _ct->cur_youngergen_card_val(), - "Should be the only possibility."); - // In this case, the card was clean before, and become - // cur_youngergen only because of processing of a promoted object. - // We don't have to look at the card. - return false; - } +inline bool ClearNoncleanCardWrapper::clear_card(jbyte* entry) { + if (_is_par) { + return clear_card_parallel(entry); + } else { + return clear_card_serial(entry); + } +} + +inline bool ClearNoncleanCardWrapper::clear_card_parallel(jbyte* entry) { + while (true) { + // In the parallel case, we may have to do this several times. + jbyte entry_val = *entry; + assert(entry_val != CardTableRS::clean_card_val(), + "We shouldn't be looking at clean cards, and this should " + "be the only place they get cleaned."); + if (CardTableRS::card_is_dirty_wrt_gen_iter(entry_val) + || _ct->is_prev_youngergen_card_val(entry_val)) { + jbyte res = + Atomic::cmpxchg(CardTableRS::clean_card_val(), entry, entry_val); + if (res == entry_val) { + break; + } else { + assert(res == CardTableRS::cur_youngergen_and_prev_nonclean_card, + "The CAS above should only fail if another thread did " + "a GC write barrier."); } - return true; + } else if (entry_val == + CardTableRS::cur_youngergen_and_prev_nonclean_card) { + // Parallelism shouldn't matter in this case. Only the thread + // assigned to scan the card should change this value. + *entry = _ct->cur_youngergen_card_val(); + break; } else { - jbyte entry_val = *entry; - assert(entry_val != CardTableRS::clean_card_val(), - "We shouldn't be looking at clean cards, and this should " - "be the only place they get cleaned."); - assert(entry_val != CardTableRS::cur_youngergen_and_prev_nonclean_card, - "This should be possible in the sequential case."); - *entry = CardTableRS::clean_card_val(); - return true; + assert(entry_val == _ct->cur_youngergen_card_val(), + "Should be the only possibility."); + // In this case, the card was clean before, and become + // cur_youngergen only because of processing of a promoted object. + // We don't have to look at the card. + return false; } } + return true; +} -public: - ClearNoncleanCardWrapper(MemRegionClosure* dirty_card_closure, - CardTableRS* ct) : + +inline bool ClearNoncleanCardWrapper::clear_card_serial(jbyte* entry) { + jbyte entry_val = *entry; + assert(entry_val != CardTableRS::clean_card_val(), + "We shouldn't be looking at clean cards, and this should " + "be the only place they get cleaned."); + assert(entry_val != CardTableRS::cur_youngergen_and_prev_nonclean_card, + "This should be possible in the sequential case."); + *entry = CardTableRS::clean_card_val(); + return true; +} + +ClearNoncleanCardWrapper::ClearNoncleanCardWrapper( + MemRegionClosure* dirty_card_closure, CardTableRS* ct) : _dirty_card_closure(dirty_card_closure), _ct(ct) { _is_par = (SharedHeap::heap()->n_par_threads() > 0); +} + +void ClearNoncleanCardWrapper::do_MemRegion(MemRegion mr) { + assert(mr.word_size() > 0, "Error"); + assert(_ct->is_aligned(mr.start()), "mr.start() should be card aligned"); + // mr.end() may not necessarily be card aligned. + jbyte* cur_entry = _ct->byte_for(mr.last()); + const jbyte* limit = _ct->byte_for(mr.start()); + HeapWord* end_of_non_clean = mr.end(); + HeapWord* start_of_non_clean = end_of_non_clean; + while (cur_entry >= limit) { + HeapWord* cur_hw = _ct->addr_for(cur_entry); + if ((*cur_entry != CardTableRS::clean_card_val()) && clear_card(cur_entry)) { + // Continue the dirty range by opening the + // dirty window one card to the left. + start_of_non_clean = cur_hw; + } else { + // We hit a "clean" card; process any non-empty + // "dirty" range accumulated so far. + if (start_of_non_clean < end_of_non_clean) { + const MemRegion mrd(start_of_non_clean, end_of_non_clean); + _dirty_card_closure->do_MemRegion(mrd); + } + // Reset the dirty window, while continuing to look + // for the next dirty card that will start a + // new dirty window. + end_of_non_clean = cur_hw; + start_of_non_clean = cur_hw; + } + // Note that "cur_entry" leads "start_of_non_clean" in + // its leftward excursion after this point + // in the loop and, when we hit the left end of "mr", + // will point off of the left end of the card-table + // for "mr". + cur_entry--; } - void do_MemRegion(MemRegion mr) { - // We start at the high end of "mr", walking backwards - // while accumulating a contiguous dirty range of cards in - // [start_of_non_clean, end_of_non_clean) which we then - // process en masse. - HeapWord* end_of_non_clean = mr.end(); - HeapWord* start_of_non_clean = end_of_non_clean; - jbyte* entry = _ct->byte_for(mr.last()); - const jbyte* first_entry = _ct->byte_for(mr.start()); - while (entry >= first_entry) { - HeapWord* cur = _ct->addr_for(entry); - if (!clear_card(entry)) { - // We hit a clean card; process any non-empty - // dirty range accumulated so far. - if (start_of_non_clean < end_of_non_clean) { - MemRegion mr2(start_of_non_clean, end_of_non_clean); - _dirty_card_closure->do_MemRegion(mr2); - } - // Reset the dirty window while continuing to - // look for the next dirty window to process. - end_of_non_clean = cur; - start_of_non_clean = end_of_non_clean; - } - // Open the left end of the window one card to the left. - start_of_non_clean = cur; - // Note that "entry" leads "start_of_non_clean" in - // its leftward excursion after this point - // in the loop and, when we hit the left end of "mr", - // will point off of the left end of the card-table - // for "mr". - entry--; - } - // If the first card of "mr" was dirty, we will have - // been left with a dirty window, co-initial with "mr", - // which we now process. - if (start_of_non_clean < end_of_non_clean) { - MemRegion mr2(start_of_non_clean, end_of_non_clean); - _dirty_card_closure->do_MemRegion(mr2); - } + // If the first card of "mr" was dirty, we will have + // been left with a dirty window, co-initial with "mr", + // which we now process. + if (start_of_non_clean < end_of_non_clean) { + const MemRegion mrd(start_of_non_clean, end_of_non_clean); + _dirty_card_closure->do_MemRegion(mrd); } -}; +} + // clean (by dirty->clean before) ==> cur_younger_gen // dirty ==> cur_youngergen_and_prev_nonclean_card // precleaned ==> cur_youngergen_and_prev_nonclean_card @@ -246,8 +250,8 @@ cl->gen_boundary()); ClearNoncleanCardWrapper clear_cl(dcto_cl, this); - _ct_bs->non_clean_card_iterate(sp, sp->used_region_at_save_marks(), - dcto_cl, &clear_cl); + _ct_bs->non_clean_card_iterate_possibly_parallel(sp, sp->used_region_at_save_marks(), + dcto_cl, &clear_cl); } void CardTableRS::clear_into_younger(Generation* gen, bool clear_perm) { diff -r 49a67202bc67 -r c48ad6ab8bdf src/share/vm/memory/cardTableRS.hpp --- a/src/share/vm/memory/cardTableRS.hpp Tue Apr 19 15:46:59 2011 -0400 +++ b/src/share/vm/memory/cardTableRS.hpp Wed Apr 20 19:19:30 2011 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -166,4 +166,21 @@ }; +class ClearNoncleanCardWrapper: public MemRegionClosure { + MemRegionClosure* _dirty_card_closure; + CardTableRS* _ct; + bool _is_par; +private: + // Clears the given card, return true if the corresponding card should be + // processed. + inline bool clear_card(jbyte* entry); + // Work methods called by the clear_card() + inline bool clear_card_serial(jbyte* entry); + inline bool clear_card_parallel(jbyte* entry); + +public: + ClearNoncleanCardWrapper(MemRegionClosure* dirty_card_closure, CardTableRS* ct); + void do_MemRegion(MemRegion mr); +}; + #endif // SHARE_VM_MEMORY_CARDTABLERS_HPP