Mercurial > hg > truffle
comparison src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp @ 10369:5534bd30c151
6725714: par compact - add a table to speed up bitmap searches
Reviewed-by: jmasa, tschatzl
author | jcoomes |
---|---|
date | Thu, 30 May 2013 13:04:51 -0700 |
parents | eda078b01c65 |
children | f2110083203d |
comparison
equal
deleted
inserted
replaced
10363:8dbc025ff709 | 10369:5534bd30c151 |
---|---|
57 #include "utilities/stack.inline.hpp" | 57 #include "utilities/stack.inline.hpp" |
58 | 58 |
59 #include <math.h> | 59 #include <math.h> |
60 | 60 |
61 // All sizes are in HeapWords. | 61 // All sizes are in HeapWords. |
62 const size_t ParallelCompactData::Log2RegionSize = 9; // 512 words | 62 const size_t ParallelCompactData::Log2RegionSize = 16; // 64K words |
63 const size_t ParallelCompactData::RegionSize = (size_t)1 << Log2RegionSize; | 63 const size_t ParallelCompactData::RegionSize = (size_t)1 << Log2RegionSize; |
64 const size_t ParallelCompactData::RegionSizeBytes = | 64 const size_t ParallelCompactData::RegionSizeBytes = |
65 RegionSize << LogHeapWordSize; | 65 RegionSize << LogHeapWordSize; |
66 const size_t ParallelCompactData::RegionSizeOffsetMask = RegionSize - 1; | 66 const size_t ParallelCompactData::RegionSizeOffsetMask = RegionSize - 1; |
67 const size_t ParallelCompactData::RegionAddrOffsetMask = RegionSizeBytes - 1; | 67 const size_t ParallelCompactData::RegionAddrOffsetMask = RegionSizeBytes - 1; |
68 const size_t ParallelCompactData::RegionAddrMask = ~RegionAddrOffsetMask; | 68 const size_t ParallelCompactData::RegionAddrMask = ~RegionAddrOffsetMask; |
69 | |
70 const size_t ParallelCompactData::Log2BlockSize = 7; // 128 words | |
71 const size_t ParallelCompactData::BlockSize = (size_t)1 << Log2BlockSize; | |
72 const size_t ParallelCompactData::BlockSizeBytes = | |
73 BlockSize << LogHeapWordSize; | |
74 const size_t ParallelCompactData::BlockSizeOffsetMask = BlockSize - 1; | |
75 const size_t ParallelCompactData::BlockAddrOffsetMask = BlockSizeBytes - 1; | |
76 const size_t ParallelCompactData::BlockAddrMask = ~BlockAddrOffsetMask; | |
77 | |
78 const size_t ParallelCompactData::BlocksPerRegion = RegionSize / BlockSize; | |
79 const size_t ParallelCompactData::Log2BlocksPerRegion = | |
80 Log2RegionSize - Log2BlockSize; | |
69 | 81 |
70 const ParallelCompactData::RegionData::region_sz_t | 82 const ParallelCompactData::RegionData::region_sz_t |
71 ParallelCompactData::RegionData::dc_shift = 27; | 83 ParallelCompactData::RegionData::dc_shift = 27; |
72 | 84 |
73 const ParallelCompactData::RegionData::region_sz_t | 85 const ParallelCompactData::RegionData::region_sz_t |
357 | 369 |
358 _region_vspace = 0; | 370 _region_vspace = 0; |
359 _reserved_byte_size = 0; | 371 _reserved_byte_size = 0; |
360 _region_data = 0; | 372 _region_data = 0; |
361 _region_count = 0; | 373 _region_count = 0; |
374 | |
375 _block_vspace = 0; | |
376 _block_data = 0; | |
377 _block_count = 0; | |
362 } | 378 } |
363 | 379 |
364 bool ParallelCompactData::initialize(MemRegion covered_region) | 380 bool ParallelCompactData::initialize(MemRegion covered_region) |
365 { | 381 { |
366 _region_start = covered_region.start(); | 382 _region_start = covered_region.start(); |
370 assert(region_align_down(_region_start) == _region_start, | 386 assert(region_align_down(_region_start) == _region_start, |
371 "region start not aligned"); | 387 "region start not aligned"); |
372 assert((region_size & RegionSizeOffsetMask) == 0, | 388 assert((region_size & RegionSizeOffsetMask) == 0, |
373 "region size not a multiple of RegionSize"); | 389 "region size not a multiple of RegionSize"); |
374 | 390 |
375 bool result = initialize_region_data(region_size); | 391 bool result = initialize_region_data(region_size) && initialize_block_data(); |
376 | |
377 return result; | 392 return result; |
378 } | 393 } |
379 | 394 |
380 PSVirtualSpace* | 395 PSVirtualSpace* |
381 ParallelCompactData::create_vspace(size_t count, size_t element_size) | 396 ParallelCompactData::create_vspace(size_t count, size_t element_size) |
416 return true; | 431 return true; |
417 } | 432 } |
418 return false; | 433 return false; |
419 } | 434 } |
420 | 435 |
436 bool ParallelCompactData::initialize_block_data() | |
437 { | |
438 assert(_region_count != 0, "region data must be initialized first"); | |
439 const size_t count = _region_count << Log2BlocksPerRegion; | |
440 _block_vspace = create_vspace(count, sizeof(BlockData)); | |
441 if (_block_vspace != 0) { | |
442 _block_data = (BlockData*)_block_vspace->reserved_low_addr(); | |
443 _block_count = count; | |
444 return true; | |
445 } | |
446 return false; | |
447 } | |
448 | |
421 void ParallelCompactData::clear() | 449 void ParallelCompactData::clear() |
422 { | 450 { |
423 memset(_region_data, 0, _region_vspace->committed_size()); | 451 memset(_region_data, 0, _region_vspace->committed_size()); |
452 memset(_block_data, 0, _block_vspace->committed_size()); | |
424 } | 453 } |
425 | 454 |
426 void ParallelCompactData::clear_range(size_t beg_region, size_t end_region) { | 455 void ParallelCompactData::clear_range(size_t beg_region, size_t end_region) { |
427 assert(beg_region <= _region_count, "beg_region out of range"); | 456 assert(beg_region <= _region_count, "beg_region out of range"); |
428 assert(end_region <= _region_count, "end_region out of range"); | 457 assert(end_region <= _region_count, "end_region out of range"); |
458 assert(RegionSize % BlockSize == 0, "RegionSize not a multiple of BlockSize"); | |
429 | 459 |
430 const size_t region_cnt = end_region - beg_region; | 460 const size_t region_cnt = end_region - beg_region; |
431 memset(_region_data + beg_region, 0, region_cnt * sizeof(RegionData)); | 461 memset(_region_data + beg_region, 0, region_cnt * sizeof(RegionData)); |
462 | |
463 const size_t beg_block = beg_region * BlocksPerRegion; | |
464 const size_t block_cnt = region_cnt * BlocksPerRegion; | |
465 memset(_block_data + beg_block, 0, block_cnt * sizeof(BlockData)); | |
432 } | 466 } |
433 | 467 |
434 HeapWord* ParallelCompactData::partial_obj_end(size_t region_idx) const | 468 HeapWord* ParallelCompactData::partial_obj_end(size_t region_idx) const |
435 { | 469 { |
436 const RegionData* cur_cp = region(region_idx); | 470 const RegionData* cur_cp = region(region_idx); |
705 return true; | 739 return true; |
706 } | 740 } |
707 | 741 |
708 HeapWord* ParallelCompactData::calc_new_pointer(HeapWord* addr) { | 742 HeapWord* ParallelCompactData::calc_new_pointer(HeapWord* addr) { |
709 assert(addr != NULL, "Should detect NULL oop earlier"); | 743 assert(addr != NULL, "Should detect NULL oop earlier"); |
710 assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap"); | 744 assert(PSParallelCompact::gc_heap()->is_in(addr), "not in heap"); |
745 assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "not marked"); | |
746 | |
747 // Region covering the object. | |
748 RegionData* const region_ptr = addr_to_region_ptr(addr); | |
749 HeapWord* result = region_ptr->destination(); | |
750 | |
751 // If the entire Region is live, the new location is region->destination + the | |
752 // offset of the object within in the Region. | |
753 | |
754 // Run some performance tests to determine if this special case pays off. It | |
755 // is worth it for pointers into the dense prefix. If the optimization to | |
756 // avoid pointer updates in regions that only point to the dense prefix is | |
757 // ever implemented, this should be revisited. | |
758 if (region_ptr->data_size() == RegionSize) { | |
759 result += region_offset(addr); | |
760 return result; | |
761 } | |
762 | |
763 // Otherwise, the new location is region->destination + block offset + the | |
764 // number of live words in the Block that are (a) to the left of addr and (b) | |
765 // due to objects that start in the Block. | |
766 | |
767 // Fill in the block table if necessary. This is unsynchronized, so multiple | |
768 // threads may fill the block table for a region (harmless, since it is | |
769 // idempotent). | |
770 if (!region_ptr->blocks_filled()) { | |
771 PSParallelCompact::fill_blocks(addr_to_region_idx(addr)); | |
772 region_ptr->set_blocks_filled(); | |
773 } | |
774 | |
775 HeapWord* const search_start = block_align_down(addr); | |
776 const size_t block_offset = addr_to_block_ptr(addr)->offset(); | |
777 | |
778 const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap(); | |
779 const size_t live = bitmap->live_words_in_range(search_start, oop(addr)); | |
780 result += block_offset + live; | |
781 DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result)); | |
782 return result; | |
783 } | |
784 | |
711 #ifdef ASSERT | 785 #ifdef ASSERT |
712 if (PSParallelCompact::mark_bitmap()->is_unmarked(addr)) { | |
713 gclog_or_tty->print_cr("calc_new_pointer:: addr " PTR_FORMAT, addr); | |
714 } | |
715 #endif | |
716 assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked"); | |
717 | |
718 // Region covering the object. | |
719 size_t region_index = addr_to_region_idx(addr); | |
720 const RegionData* const region_ptr = region(region_index); | |
721 HeapWord* const region_addr = region_align_down(addr); | |
722 | |
723 assert(addr < region_addr + RegionSize, "Region does not cover object"); | |
724 assert(addr_to_region_ptr(region_addr) == region_ptr, "sanity check"); | |
725 | |
726 HeapWord* result = region_ptr->destination(); | |
727 | |
728 // If all the data in the region is live, then the new location of the object | |
729 // can be calculated from the destination of the region plus the offset of the | |
730 // object in the region. | |
731 if (region_ptr->data_size() == RegionSize) { | |
732 result += pointer_delta(addr, region_addr); | |
733 DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result);) | |
734 return result; | |
735 } | |
736 | |
737 // The new location of the object is | |
738 // region destination + | |
739 // size of the partial object extending onto the region + | |
740 // sizes of the live objects in the Region that are to the left of addr | |
741 const size_t partial_obj_size = region_ptr->partial_obj_size(); | |
742 HeapWord* const search_start = region_addr + partial_obj_size; | |
743 | |
744 const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap(); | |
745 size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr)); | |
746 | |
747 result += partial_obj_size + live_to_left; | |
748 DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result);) | |
749 return result; | |
750 } | |
751 | |
752 #ifdef ASSERT | |
753 void ParallelCompactData::verify_clear(const PSVirtualSpace* vspace) | 786 void ParallelCompactData::verify_clear(const PSVirtualSpace* vspace) |
754 { | 787 { |
755 const size_t* const beg = (const size_t*)vspace->committed_low_addr(); | 788 const size_t* const beg = (const size_t*)vspace->committed_low_addr(); |
756 const size_t* const end = (const size_t*)vspace->committed_high_addr(); | 789 const size_t* const end = (const size_t*)vspace->committed_high_addr(); |
757 for (const size_t* p = beg; p < end; ++p) { | 790 for (const size_t* p = beg; p < end; ++p) { |
760 } | 793 } |
761 | 794 |
762 void ParallelCompactData::verify_clear() | 795 void ParallelCompactData::verify_clear() |
763 { | 796 { |
764 verify_clear(_region_vspace); | 797 verify_clear(_region_vspace); |
798 verify_clear(_block_vspace); | |
765 } | 799 } |
766 #endif // #ifdef ASSERT | 800 #endif // #ifdef ASSERT |
767 | |
768 #ifdef NOT_PRODUCT | |
769 ParallelCompactData::RegionData* debug_region(size_t region_index) { | |
770 ParallelCompactData& sd = PSParallelCompact::summary_data(); | |
771 return sd.region(region_index); | |
772 } | |
773 #endif | |
774 | 801 |
775 elapsedTimer PSParallelCompact::_accumulated_time; | 802 elapsedTimer PSParallelCompact::_accumulated_time; |
776 unsigned int PSParallelCompact::_total_invocations = 0; | 803 unsigned int PSParallelCompact::_total_invocations = 0; |
777 unsigned int PSParallelCompact::_maximum_compaction_gc_num = 0; | 804 unsigned int PSParallelCompact::_maximum_compaction_gc_num = 0; |
778 jlong PSParallelCompact::_time_of_last_gc = 0; | 805 jlong PSParallelCompact::_time_of_last_gc = 0; |
1959 | 1986 |
1960 PSParallelCompact::invoke_no_policy(clear_all_soft_refs || | 1987 PSParallelCompact::invoke_no_policy(clear_all_soft_refs || |
1961 maximum_heap_compaction); | 1988 maximum_heap_compaction); |
1962 } | 1989 } |
1963 | 1990 |
1964 bool ParallelCompactData::region_contains(size_t region_index, HeapWord* addr) { | |
1965 size_t addr_region_index = addr_to_region_idx(addr); | |
1966 return region_index == addr_region_index; | |
1967 } | |
1968 | |
1969 // This method contains no policy. You should probably | 1991 // This method contains no policy. You should probably |
1970 // be calling invoke() instead. | 1992 // be calling invoke() instead. |
1971 bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) { | 1993 bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) { |
1972 assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint"); | 1994 assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint"); |
1973 assert(ref_processor() != NULL, "Sanity"); | 1995 assert(ref_processor() != NULL, "Sanity"); |
2625 q->enqueue(new StealRegionCompactionTask(terminator_ptr)); | 2647 q->enqueue(new StealRegionCompactionTask(terminator_ptr)); |
2626 } | 2648 } |
2627 } | 2649 } |
2628 } | 2650 } |
2629 | 2651 |
2652 #ifdef ASSERT | |
2653 // Write a histogram of the number of times the block table was filled for a | |
2654 // region. | |
2655 void PSParallelCompact::write_block_fill_histogram(outputStream* const out) | |
2656 { | |
2657 if (!TraceParallelOldGCCompactionPhase) return; | |
2658 | |
2659 typedef ParallelCompactData::RegionData rd_t; | |
2660 ParallelCompactData& sd = summary_data(); | |
2661 | |
2662 for (unsigned int id = old_space_id; id < last_space_id; ++id) { | |
2663 MutableSpace* const spc = _space_info[id].space(); | |
2664 if (spc->bottom() != spc->top()) { | |
2665 const rd_t* const beg = sd.addr_to_region_ptr(spc->bottom()); | |
2666 HeapWord* const top_aligned_up = sd.region_align_up(spc->top()); | |
2667 const rd_t* const end = sd.addr_to_region_ptr(top_aligned_up); | |
2668 | |
2669 size_t histo[5] = { 0, 0, 0, 0, 0 }; | |
2670 const size_t histo_len = sizeof(histo) / sizeof(size_t); | |
2671 const size_t region_cnt = pointer_delta(end, beg, sizeof(rd_t)); | |
2672 | |
2673 for (const rd_t* cur = beg; cur < end; ++cur) { | |
2674 ++histo[MIN2(cur->blocks_filled_count(), histo_len - 1)]; | |
2675 } | |
2676 out->print("%u %-4s" SIZE_FORMAT_W(5), id, space_names[id], region_cnt); | |
2677 for (size_t i = 0; i < histo_len; ++i) { | |
2678 out->print(" " SIZE_FORMAT_W(5) " %5.1f%%", | |
2679 histo[i], 100.0 * histo[i] / region_cnt); | |
2680 } | |
2681 out->cr(); | |
2682 } | |
2683 } | |
2684 } | |
2685 #endif // #ifdef ASSERT | |
2686 | |
2630 void PSParallelCompact::compact() { | 2687 void PSParallelCompact::compact() { |
2631 // trace("5"); | 2688 // trace("5"); |
2632 TraceTime tm("compaction phase", print_phases(), true, gclog_or_tty); | 2689 TraceTime tm("compaction phase", print_phases(), true, gclog_or_tty); |
2633 | 2690 |
2634 ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); | 2691 ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); |
2664 ParCompactionManager* cm = ParCompactionManager::manager_array(0); | 2721 ParCompactionManager* cm = ParCompactionManager::manager_array(0); |
2665 for (unsigned int id = old_space_id; id < last_space_id; ++id) { | 2722 for (unsigned int id = old_space_id; id < last_space_id; ++id) { |
2666 update_deferred_objects(cm, SpaceId(id)); | 2723 update_deferred_objects(cm, SpaceId(id)); |
2667 } | 2724 } |
2668 } | 2725 } |
2726 | |
2727 DEBUG_ONLY(write_block_fill_histogram(gclog_or_tty)); | |
2669 } | 2728 } |
2670 | 2729 |
2671 #ifdef ASSERT | 2730 #ifdef ASSERT |
2672 void PSParallelCompact::verify_complete(SpaceId space_id) { | 2731 void PSParallelCompact::verify_complete(SpaceId space_id) { |
2673 // All Regions between space bottom() to new_top() should be marked as filled | 2732 // All Regions between space bottom() to new_top() should be marked as filled |
3128 src_region_idx = next_src_region(closure, src_space_id, src_space_top, | 3187 src_region_idx = next_src_region(closure, src_space_id, src_space_top, |
3129 end_addr); | 3188 end_addr); |
3130 } while (true); | 3189 } while (true); |
3131 } | 3190 } |
3132 | 3191 |
3192 void PSParallelCompact::fill_blocks(size_t region_idx) | |
3193 { | |
3194 // Fill in the block table elements for the specified region. Each block | |
3195 // table element holds the number of live words in the region that are to the | |
3196 // left of the first object that starts in the block. Thus only blocks in | |
3197 // which an object starts need to be filled. | |
3198 // | |
3199 // The algorithm scans the section of the bitmap that corresponds to the | |
3200 // region, keeping a running total of the live words. When an object start is | |
3201 // found, if it's the first to start in the block that contains it, the | |
3202 // current total is written to the block table element. | |
3203 const size_t Log2BlockSize = ParallelCompactData::Log2BlockSize; | |
3204 const size_t Log2RegionSize = ParallelCompactData::Log2RegionSize; | |
3205 const size_t RegionSize = ParallelCompactData::RegionSize; | |
3206 | |
3207 ParallelCompactData& sd = summary_data(); | |
3208 const size_t partial_obj_size = sd.region(region_idx)->partial_obj_size(); | |
3209 if (partial_obj_size >= RegionSize) { | |
3210 return; // No objects start in this region. | |
3211 } | |
3212 | |
3213 // Ensure the first loop iteration decides that the block has changed. | |
3214 size_t cur_block = sd.block_count(); | |
3215 | |
3216 const ParMarkBitMap* const bitmap = mark_bitmap(); | |
3217 | |
3218 const size_t Log2BitsPerBlock = Log2BlockSize - LogMinObjAlignment; | |
3219 assert((size_t)1 << Log2BitsPerBlock == | |
3220 bitmap->words_to_bits(ParallelCompactData::BlockSize), "sanity"); | |
3221 | |
3222 size_t beg_bit = bitmap->words_to_bits(region_idx << Log2RegionSize); | |
3223 const size_t range_end = beg_bit + bitmap->words_to_bits(RegionSize); | |
3224 size_t live_bits = bitmap->words_to_bits(partial_obj_size); | |
3225 beg_bit = bitmap->find_obj_beg(beg_bit + live_bits, range_end); | |
3226 while (beg_bit < range_end) { | |
3227 const size_t new_block = beg_bit >> Log2BitsPerBlock; | |
3228 if (new_block != cur_block) { | |
3229 cur_block = new_block; | |
3230 sd.block(cur_block)->set_offset(bitmap->bits_to_words(live_bits)); | |
3231 } | |
3232 | |
3233 const size_t end_bit = bitmap->find_obj_end(beg_bit, range_end); | |
3234 if (end_bit < range_end - 1) { | |
3235 live_bits += end_bit - beg_bit + 1; | |
3236 beg_bit = bitmap->find_obj_beg(end_bit + 1, range_end); | |
3237 } else { | |
3238 return; | |
3239 } | |
3240 } | |
3241 } | |
3242 | |
3133 void | 3243 void |
3134 PSParallelCompact::move_and_update(ParCompactionManager* cm, SpaceId space_id) { | 3244 PSParallelCompact::move_and_update(ParCompactionManager* cm, SpaceId space_id) { |
3135 const MutableSpace* sp = space(space_id); | 3245 const MutableSpace* sp = space(space_id); |
3136 if (sp->is_empty()) { | 3246 if (sp->is_empty()) { |
3137 return; | 3247 return; |