# HG changeset patch # User jcoomes # Date 1369944291 25200 # Node ID 5534bd30c151c8bd4892d77d1ebc9a974104b265 # Parent 8dbc025ff709263121f59f26b9f2f02ca690b62d 6725714: par compact - add a table to speed up bitmap searches Reviewed-by: jmasa, tschatzl diff -r 8dbc025ff709 -r 5534bd30c151 src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Mon May 27 12:58:42 2013 +0200 +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Thu May 30 13:04:51 2013 -0700 @@ -116,7 +116,7 @@ // The alignment used for eden and survivors within the young gen // and for boundary between young gen and old gen. - size_t intra_heap_alignment() const { return 64 * K; } + size_t intra_heap_alignment() const { return 64 * K * HeapWordSize; } size_t capacity() const; size_t used() const; diff -r 8dbc025ff709 -r 5534bd30c151 src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Mon May 27 12:58:42 2013 +0200 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Thu May 30 13:04:51 2013 -0700 @@ -59,13 +59,25 @@ #include // All sizes are in HeapWords. -const size_t ParallelCompactData::Log2RegionSize = 9; // 512 words +const size_t ParallelCompactData::Log2RegionSize = 16; // 64K words const size_t ParallelCompactData::RegionSize = (size_t)1 << Log2RegionSize; const size_t ParallelCompactData::RegionSizeBytes = RegionSize << LogHeapWordSize; const size_t ParallelCompactData::RegionSizeOffsetMask = RegionSize - 1; const size_t ParallelCompactData::RegionAddrOffsetMask = RegionSizeBytes - 1; -const size_t ParallelCompactData::RegionAddrMask = ~RegionAddrOffsetMask; +const size_t ParallelCompactData::RegionAddrMask = ~RegionAddrOffsetMask; + +const size_t ParallelCompactData::Log2BlockSize = 7; // 128 words +const size_t ParallelCompactData::BlockSize = (size_t)1 << Log2BlockSize; +const size_t ParallelCompactData::BlockSizeBytes = + BlockSize << LogHeapWordSize; +const size_t ParallelCompactData::BlockSizeOffsetMask = BlockSize - 1; +const size_t ParallelCompactData::BlockAddrOffsetMask = BlockSizeBytes - 1; +const size_t ParallelCompactData::BlockAddrMask = ~BlockAddrOffsetMask; + +const size_t ParallelCompactData::BlocksPerRegion = RegionSize / BlockSize; +const size_t ParallelCompactData::Log2BlocksPerRegion = + Log2RegionSize - Log2BlockSize; const ParallelCompactData::RegionData::region_sz_t ParallelCompactData::RegionData::dc_shift = 27; @@ -359,6 +371,10 @@ _reserved_byte_size = 0; _region_data = 0; _region_count = 0; + + _block_vspace = 0; + _block_data = 0; + _block_count = 0; } bool ParallelCompactData::initialize(MemRegion covered_region) @@ -372,8 +388,7 @@ assert((region_size & RegionSizeOffsetMask) == 0, "region size not a multiple of RegionSize"); - bool result = initialize_region_data(region_size); - + bool result = initialize_region_data(region_size) && initialize_block_data(); return result; } @@ -418,17 +433,36 @@ return false; } +bool ParallelCompactData::initialize_block_data() +{ + assert(_region_count != 0, "region data must be initialized first"); + const size_t count = _region_count << Log2BlocksPerRegion; + _block_vspace = create_vspace(count, sizeof(BlockData)); + if (_block_vspace != 0) { + _block_data = (BlockData*)_block_vspace->reserved_low_addr(); + _block_count = count; + return true; + } + return false; +} + void ParallelCompactData::clear() { memset(_region_data, 0, _region_vspace->committed_size()); + memset(_block_data, 0, _block_vspace->committed_size()); } void ParallelCompactData::clear_range(size_t beg_region, size_t end_region) { assert(beg_region <= _region_count, "beg_region out of range"); assert(end_region <= _region_count, "end_region out of range"); + assert(RegionSize % BlockSize == 0, "RegionSize not a multiple of BlockSize"); const size_t region_cnt = end_region - beg_region; memset(_region_data + beg_region, 0, region_cnt * sizeof(RegionData)); + + const size_t beg_block = beg_region * BlocksPerRegion; + const size_t block_cnt = region_cnt * BlocksPerRegion; + memset(_block_data + beg_block, 0, block_cnt * sizeof(BlockData)); } HeapWord* ParallelCompactData::partial_obj_end(size_t region_idx) const @@ -707,49 +741,48 @@ HeapWord* ParallelCompactData::calc_new_pointer(HeapWord* addr) { assert(addr != NULL, "Should detect NULL oop earlier"); - assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap"); -#ifdef ASSERT - if (PSParallelCompact::mark_bitmap()->is_unmarked(addr)) { - gclog_or_tty->print_cr("calc_new_pointer:: addr " PTR_FORMAT, addr); - } -#endif - assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked"); + assert(PSParallelCompact::gc_heap()->is_in(addr), "not in heap"); + assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "not marked"); // Region covering the object. - size_t region_index = addr_to_region_idx(addr); - const RegionData* const region_ptr = region(region_index); - HeapWord* const region_addr = region_align_down(addr); - - assert(addr < region_addr + RegionSize, "Region does not cover object"); - assert(addr_to_region_ptr(region_addr) == region_ptr, "sanity check"); - + RegionData* const region_ptr = addr_to_region_ptr(addr); HeapWord* result = region_ptr->destination(); - // If all the data in the region is live, then the new location of the object - // can be calculated from the destination of the region plus the offset of the - // object in the region. + // If the entire Region is live, the new location is region->destination + the + // offset of the object within in the Region. + + // Run some performance tests to determine if this special case pays off. It + // is worth it for pointers into the dense prefix. If the optimization to + // avoid pointer updates in regions that only point to the dense prefix is + // ever implemented, this should be revisited. if (region_ptr->data_size() == RegionSize) { - result += pointer_delta(addr, region_addr); - DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result);) + result += region_offset(addr); return result; } - // The new location of the object is - // region destination + - // size of the partial object extending onto the region + - // sizes of the live objects in the Region that are to the left of addr - const size_t partial_obj_size = region_ptr->partial_obj_size(); - HeapWord* const search_start = region_addr + partial_obj_size; + // Otherwise, the new location is region->destination + block offset + the + // number of live words in the Block that are (a) to the left of addr and (b) + // due to objects that start in the Block. + + // Fill in the block table if necessary. This is unsynchronized, so multiple + // threads may fill the block table for a region (harmless, since it is + // idempotent). + if (!region_ptr->blocks_filled()) { + PSParallelCompact::fill_blocks(addr_to_region_idx(addr)); + region_ptr->set_blocks_filled(); + } + + HeapWord* const search_start = block_align_down(addr); + const size_t block_offset = addr_to_block_ptr(addr)->offset(); const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap(); - size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr)); - - result += partial_obj_size + live_to_left; - DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result);) + const size_t live = bitmap->live_words_in_range(search_start, oop(addr)); + result += block_offset + live; + DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result)); return result; } -#ifdef ASSERT +#ifdef ASSERT void ParallelCompactData::verify_clear(const PSVirtualSpace* vspace) { const size_t* const beg = (const size_t*)vspace->committed_low_addr(); @@ -762,16 +795,10 @@ void ParallelCompactData::verify_clear() { verify_clear(_region_vspace); + verify_clear(_block_vspace); } #endif // #ifdef ASSERT -#ifdef NOT_PRODUCT -ParallelCompactData::RegionData* debug_region(size_t region_index) { - ParallelCompactData& sd = PSParallelCompact::summary_data(); - return sd.region(region_index); -} -#endif - elapsedTimer PSParallelCompact::_accumulated_time; unsigned int PSParallelCompact::_total_invocations = 0; unsigned int PSParallelCompact::_maximum_compaction_gc_num = 0; @@ -1961,11 +1988,6 @@ maximum_heap_compaction); } -bool ParallelCompactData::region_contains(size_t region_index, HeapWord* addr) { - size_t addr_region_index = addr_to_region_idx(addr); - return region_index == addr_region_index; -} - // This method contains no policy. You should probably // be calling invoke() instead. bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) { @@ -2627,6 +2649,41 @@ } } +#ifdef ASSERT +// Write a histogram of the number of times the block table was filled for a +// region. +void PSParallelCompact::write_block_fill_histogram(outputStream* const out) +{ + if (!TraceParallelOldGCCompactionPhase) return; + + typedef ParallelCompactData::RegionData rd_t; + ParallelCompactData& sd = summary_data(); + + for (unsigned int id = old_space_id; id < last_space_id; ++id) { + MutableSpace* const spc = _space_info[id].space(); + if (spc->bottom() != spc->top()) { + const rd_t* const beg = sd.addr_to_region_ptr(spc->bottom()); + HeapWord* const top_aligned_up = sd.region_align_up(spc->top()); + const rd_t* const end = sd.addr_to_region_ptr(top_aligned_up); + + size_t histo[5] = { 0, 0, 0, 0, 0 }; + const size_t histo_len = sizeof(histo) / sizeof(size_t); + const size_t region_cnt = pointer_delta(end, beg, sizeof(rd_t)); + + for (const rd_t* cur = beg; cur < end; ++cur) { + ++histo[MIN2(cur->blocks_filled_count(), histo_len - 1)]; + } + out->print("%u %-4s" SIZE_FORMAT_W(5), id, space_names[id], region_cnt); + for (size_t i = 0; i < histo_len; ++i) { + out->print(" " SIZE_FORMAT_W(5) " %5.1f%%", + histo[i], 100.0 * histo[i] / region_cnt); + } + out->cr(); + } + } +} +#endif // #ifdef ASSERT + void PSParallelCompact::compact() { // trace("5"); TraceTime tm("compaction phase", print_phases(), true, gclog_or_tty); @@ -2666,6 +2723,8 @@ update_deferred_objects(cm, SpaceId(id)); } } + + DEBUG_ONLY(write_block_fill_histogram(gclog_or_tty)); } #ifdef ASSERT @@ -3130,6 +3189,57 @@ } while (true); } +void PSParallelCompact::fill_blocks(size_t region_idx) +{ + // Fill in the block table elements for the specified region. Each block + // table element holds the number of live words in the region that are to the + // left of the first object that starts in the block. Thus only blocks in + // which an object starts need to be filled. + // + // The algorithm scans the section of the bitmap that corresponds to the + // region, keeping a running total of the live words. When an object start is + // found, if it's the first to start in the block that contains it, the + // current total is written to the block table element. + const size_t Log2BlockSize = ParallelCompactData::Log2BlockSize; + const size_t Log2RegionSize = ParallelCompactData::Log2RegionSize; + const size_t RegionSize = ParallelCompactData::RegionSize; + + ParallelCompactData& sd = summary_data(); + const size_t partial_obj_size = sd.region(region_idx)->partial_obj_size(); + if (partial_obj_size >= RegionSize) { + return; // No objects start in this region. + } + + // Ensure the first loop iteration decides that the block has changed. + size_t cur_block = sd.block_count(); + + const ParMarkBitMap* const bitmap = mark_bitmap(); + + const size_t Log2BitsPerBlock = Log2BlockSize - LogMinObjAlignment; + assert((size_t)1 << Log2BitsPerBlock == + bitmap->words_to_bits(ParallelCompactData::BlockSize), "sanity"); + + size_t beg_bit = bitmap->words_to_bits(region_idx << Log2RegionSize); + const size_t range_end = beg_bit + bitmap->words_to_bits(RegionSize); + size_t live_bits = bitmap->words_to_bits(partial_obj_size); + beg_bit = bitmap->find_obj_beg(beg_bit + live_bits, range_end); + while (beg_bit < range_end) { + const size_t new_block = beg_bit >> Log2BitsPerBlock; + if (new_block != cur_block) { + cur_block = new_block; + sd.block(cur_block)->set_offset(bitmap->bits_to_words(live_bits)); + } + + const size_t end_bit = bitmap->find_obj_end(beg_bit, range_end); + if (end_bit < range_end - 1) { + live_bits += end_bit - beg_bit + 1; + beg_bit = bitmap->find_obj_beg(end_bit + 1, range_end); + } else { + return; + } + } +} + void PSParallelCompact::move_and_update(ParCompactionManager* cm, SpaceId space_id) { const MutableSpace* sp = space(space_id); diff -r 8dbc025ff709 -r 5534bd30c151 src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp Mon May 27 12:58:42 2013 +0200 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp Thu May 30 13:04:51 2013 -0700 @@ -220,6 +220,17 @@ // Mask for the bits in a pointer to get the address of the start of a region. static const size_t RegionAddrMask; + static const size_t Log2BlockSize; + static const size_t BlockSize; + static const size_t BlockSizeBytes; + + static const size_t BlockSizeOffsetMask; + static const size_t BlockAddrOffsetMask; + static const size_t BlockAddrMask; + + static const size_t BlocksPerRegion; + static const size_t Log2BlocksPerRegion; + class RegionData { public: @@ -272,6 +283,12 @@ inline uint destination_count() const; inline uint destination_count_raw() const; + // Whether the block table for this region has been filled. + inline bool blocks_filled() const; + + // Number of times the block table was filled. + DEBUG_ONLY(inline size_t blocks_filled_count() const;) + // The location of the java heap data that corresponds to this region. inline HeapWord* data_location() const; @@ -296,6 +313,7 @@ void set_partial_obj_size(size_t words) { _partial_obj_size = (region_sz_t) words; } + inline void set_blocks_filled(); inline void set_destination_count(uint count); inline void set_live_obj_size(size_t words); @@ -328,7 +346,11 @@ HeapWord* _partial_obj_addr; region_sz_t _partial_obj_size; region_sz_t volatile _dc_and_los; + bool _blocks_filled; + #ifdef ASSERT + size_t _blocks_filled_count; // Number of block table fills. + // These enable optimizations that are only partially implemented. Use // debug builds to prevent the code fragments from breaking. HeapWord* _data_location; @@ -337,11 +359,26 @@ #ifdef ASSERT public: - uint _pushed; // 0 until region is pushed onto a worker's stack + uint _pushed; // 0 until region is pushed onto a stack private: #endif }; + // "Blocks" allow shorter sections of the bitmap to be searched. Each Block + // holds an offset, which is the amount of live data in the Region to the left + // of the first live object that starts in the Block. + class BlockData + { + public: + typedef unsigned short int blk_ofs_t; + + blk_ofs_t offset() const { return _offset; } + void set_offset(size_t val) { _offset = (blk_ofs_t)val; } + + private: + blk_ofs_t _offset; + }; + public: ParallelCompactData(); bool initialize(MemRegion covered_region); @@ -353,8 +390,9 @@ inline RegionData* region(size_t region_idx) const; inline size_t region(const RegionData* const region_ptr) const; - // Returns true if the given address is contained within the region - bool region_contains(size_t region_index, HeapWord* addr); + size_t block_count() const { return _block_count; } + inline BlockData* block(size_t block_idx) const; + inline size_t block(const BlockData* block_ptr) const; void add_obj(HeapWord* addr, size_t len); void add_obj(oop p, size_t len) { add_obj((HeapWord*)p, len); } @@ -394,11 +432,24 @@ inline HeapWord* region_align_up(HeapWord* addr) const; inline bool is_region_aligned(HeapWord* addr) const; + // Analogous to region_offset() for blocks. + size_t block_offset(const HeapWord* addr) const; + size_t addr_to_block_idx(const HeapWord* addr) const; + size_t addr_to_block_idx(const oop obj) const { + return addr_to_block_idx((HeapWord*) obj); + } + inline BlockData* addr_to_block_ptr(const HeapWord* addr) const; + inline HeapWord* block_to_addr(size_t block) const; + inline size_t region_to_block_idx(size_t region) const; + + inline HeapWord* block_align_down(HeapWord* addr) const; + inline HeapWord* block_align_up(HeapWord* addr) const; + inline bool is_block_aligned(HeapWord* addr) const; + // Return the address one past the end of the partial object. HeapWord* partial_obj_end(size_t region_idx) const; - // Return the new location of the object p after the - // the compaction. + // Return the location of the object after compaction. HeapWord* calc_new_pointer(HeapWord* addr); HeapWord* calc_new_pointer(oop p) { @@ -411,6 +462,7 @@ #endif // #ifdef ASSERT private: + bool initialize_block_data(); bool initialize_region_data(size_t region_size); PSVirtualSpace* create_vspace(size_t count, size_t element_size); @@ -424,6 +476,10 @@ size_t _reserved_byte_size; RegionData* _region_data; size_t _region_count; + + PSVirtualSpace* _block_vspace; + BlockData* _block_data; + size_t _block_count; }; inline uint @@ -438,6 +494,28 @@ return destination_count_raw() >> dc_shift; } +inline bool +ParallelCompactData::RegionData::blocks_filled() const +{ + return _blocks_filled; +} + +#ifdef ASSERT +inline size_t +ParallelCompactData::RegionData::blocks_filled_count() const +{ + return _blocks_filled_count; +} +#endif // #ifdef ASSERT + +inline void +ParallelCompactData::RegionData::set_blocks_filled() +{ + _blocks_filled = true; + // Debug builds count the number of times the table was filled. + DEBUG_ONLY(Atomic::inc_ptr(&_blocks_filled_count)); +} + inline void ParallelCompactData::RegionData::set_destination_count(uint count) { @@ -532,6 +610,12 @@ return pointer_delta(region_ptr, _region_data, sizeof(RegionData)); } +inline ParallelCompactData::BlockData* +ParallelCompactData::block(size_t n) const { + assert(n < block_count(), "bad arg"); + return _block_data + n; +} + inline size_t ParallelCompactData::region_offset(const HeapWord* addr) const { @@ -598,6 +682,63 @@ return region_offset(addr) == 0; } +inline size_t +ParallelCompactData::block_offset(const HeapWord* addr) const +{ + assert(addr >= _region_start, "bad addr"); + assert(addr <= _region_end, "bad addr"); + return (size_t(addr) & BlockAddrOffsetMask) >> LogHeapWordSize; +} + +inline size_t +ParallelCompactData::addr_to_block_idx(const HeapWord* addr) const +{ + assert(addr >= _region_start, "bad addr"); + assert(addr <= _region_end, "bad addr"); + return pointer_delta(addr, _region_start) >> Log2BlockSize; +} + +inline ParallelCompactData::BlockData* +ParallelCompactData::addr_to_block_ptr(const HeapWord* addr) const +{ + return block(addr_to_block_idx(addr)); +} + +inline HeapWord* +ParallelCompactData::block_to_addr(size_t block) const +{ + assert(block < _block_count, "block out of range"); + return _region_start + (block << Log2BlockSize); +} + +inline size_t +ParallelCompactData::region_to_block_idx(size_t region) const +{ + return region << Log2BlocksPerRegion; +} + +inline HeapWord* +ParallelCompactData::block_align_down(HeapWord* addr) const +{ + assert(addr >= _region_start, "bad addr"); + assert(addr < _region_end + RegionSize, "bad addr"); + return (HeapWord*)(size_t(addr) & BlockAddrMask); +} + +inline HeapWord* +ParallelCompactData::block_align_up(HeapWord* addr) const +{ + assert(addr >= _region_start, "bad addr"); + assert(addr <= _region_end, "bad addr"); + return block_align_down(addr + BlockSizeOffsetMask); +} + +inline bool +ParallelCompactData::is_block_aligned(HeapWord* addr) const +{ + return block_offset(addr) == 0; +} + // Abstract closure for use with ParMarkBitMap::iterate(), which will invoke the // do_addr() method. // @@ -775,6 +916,7 @@ // Convenient access to type names. typedef ParMarkBitMap::idx_t idx_t; typedef ParallelCompactData::RegionData RegionData; + typedef ParallelCompactData::BlockData BlockData; typedef enum { old_space_id, eden_space_id, @@ -962,6 +1104,8 @@ // Adjust addresses in roots. Does not adjust addresses in heap. static void adjust_roots(); + DEBUG_ONLY(static void write_block_fill_histogram(outputStream* const out);) + // Move objects to new locations. static void compact_perm(ParCompactionManager* cm); static void compact(); @@ -1128,6 +1272,9 @@ fill_region(cm, region); } + // Fill in the block table for the specified region. + static void fill_blocks(size_t region_idx); + // Update the deferred objects in the space. static void update_deferred_objects(ParCompactionManager* cm, SpaceId id);