comparison src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp @ 10246:194f52aa2f23

7176479: G1: JVM crashes on T5-8 system with 1.5 TB heap Summary: Refactor G1's hot card cache and card counts table into their own files. Simplify the card counts table, including removing the encoding of the card index in each entry. The card counts table now has a 1:1 correspondence with the cards spanned by heap. Space for the card counts table is reserved from virtual memory (rather than C heap) during JVM startup and is committed/expanded when the heap is expanded. Changes were also reviewed-by Vitaly Davidovich. Reviewed-by: tschatzl, jmasa
author johnc
date Thu, 09 May 2013 11:16:39 -0700
parents b0d20fa374b4
children 48391ab0687e
comparison
equal deleted inserted replaced
10245:923ac8d1df95 10246:194f52aa2f23
94 G1RemSet* g1rs, 94 G1RemSet* g1rs,
95 ConcurrentG1Refine* cg1r) : 95 ConcurrentG1Refine* cg1r) :
96 _sts(sts), _g1rs(g1rs), _cg1r(cg1r), _concurrent(true) 96 _sts(sts), _g1rs(g1rs), _cg1r(cg1r), _concurrent(true)
97 {} 97 {}
98 bool do_card_ptr(jbyte* card_ptr, int worker_i) { 98 bool do_card_ptr(jbyte* card_ptr, int worker_i) {
99 bool oops_into_cset = _g1rs->concurrentRefineOneCard(card_ptr, worker_i, false); 99 bool oops_into_cset = _g1rs->refine_card(card_ptr, worker_i, false);
100 // This path is executed by the concurrent refine or mutator threads, 100 // This path is executed by the concurrent refine or mutator threads,
101 // concurrently, and so we do not care if card_ptr contains references 101 // concurrently, and so we do not care if card_ptr contains references
102 // that point into the collection set. 102 // that point into the collection set.
103 assert(!oops_into_cset, "should be"); 103 assert(!oops_into_cset, "should be");
104 104
1450 1450
1451 print_hrs_post_compaction(); 1451 print_hrs_post_compaction();
1452 _hr_printer.end_gc(true /* full */, (size_t) total_collections()); 1452 _hr_printer.end_gc(true /* full */, (size_t) total_collections());
1453 } 1453 }
1454 1454
1455 if (_cg1r->use_cache()) { 1455 G1HotCardCache* hot_card_cache = _cg1r->hot_card_cache();
1456 _cg1r->clear_and_record_card_counts(); 1456 if (hot_card_cache->use_cache()) {
1457 _cg1r->clear_hot_cache(); 1457 hot_card_cache->reset_card_counts();
1458 hot_card_cache->reset_hot_cache();
1458 } 1459 }
1459 1460
1460 // Rebuild remembered sets of all regions. 1461 // Rebuild remembered sets of all regions.
1461 if (G1CollectedHeap::use_parallel_gc_threads()) { 1462 if (G1CollectedHeap::use_parallel_gc_threads()) {
1462 uint n_workers = 1463 uint n_workers =
1765 _g1_committed.set_end(new_end); 1766 _g1_committed.set_end(new_end);
1766 // Tell the card table about the update. 1767 // Tell the card table about the update.
1767 Universe::heap()->barrier_set()->resize_covered_region(_g1_committed); 1768 Universe::heap()->barrier_set()->resize_covered_region(_g1_committed);
1768 // Tell the BOT about the update. 1769 // Tell the BOT about the update.
1769 _bot_shared->resize(_g1_committed.word_size()); 1770 _bot_shared->resize(_g1_committed.word_size());
1771 // Tell the hot card cache about the update
1772 _cg1r->hot_card_cache()->resize_card_counts(capacity());
1770 } 1773 }
1771 1774
1772 bool G1CollectedHeap::expand(size_t expand_bytes) { 1775 bool G1CollectedHeap::expand(size_t expand_bytes) {
1773 size_t old_mem_size = _g1_storage.committed_size(); 1776 size_t old_mem_size = _g1_storage.committed_size();
1774 size_t aligned_expand_bytes = ReservedSpace::page_align_size_up(expand_bytes); 1777 size_t aligned_expand_bytes = ReservedSpace::page_align_size_up(expand_bytes);
1997 2000
1998 // Ensure that the sizes are properly aligned. 2001 // Ensure that the sizes are properly aligned.
1999 Universe::check_alignment(init_byte_size, HeapRegion::GrainBytes, "g1 heap"); 2002 Universe::check_alignment(init_byte_size, HeapRegion::GrainBytes, "g1 heap");
2000 Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap"); 2003 Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap");
2001 2004
2002 _cg1r = new ConcurrentG1Refine(); 2005 _cg1r = new ConcurrentG1Refine(this);
2003 2006
2004 // Reserve the maximum. 2007 // Reserve the maximum.
2005 2008
2006 // When compressed oops are enabled, the preferred heap base 2009 // When compressed oops are enabled, the preferred heap base
2007 // is calculated by subtracting the requested size from the 2010 // is calculated by subtracting the requested size from the
2058 _g1_committed = MemRegion((HeapWord*)_g1_storage.low(), (size_t) 0); 2061 _g1_committed = MemRegion((HeapWord*)_g1_storage.low(), (size_t) 0);
2059 _hrs.initialize((HeapWord*) _g1_reserved.start(), 2062 _hrs.initialize((HeapWord*) _g1_reserved.start(),
2060 (HeapWord*) _g1_reserved.end(), 2063 (HeapWord*) _g1_reserved.end(),
2061 _expansion_regions); 2064 _expansion_regions);
2062 2065
2066 // Do later initialization work for concurrent refinement.
2067 _cg1r->init();
2068
2063 // 6843694 - ensure that the maximum region index can fit 2069 // 6843694 - ensure that the maximum region index can fit
2064 // in the remembered set structures. 2070 // in the remembered set structures.
2065 const uint max_region_idx = (1U << (sizeof(RegionIdx_t)*BitsPerByte-1)) - 1; 2071 const uint max_region_idx = (1U << (sizeof(RegionIdx_t)*BitsPerByte-1)) - 1;
2066 guarantee((max_regions() - 1) <= max_region_idx, "too many regions"); 2072 guarantee((max_regions() - 1) <= max_region_idx, "too many regions");
2067 2073
2075 _bot_shared = new G1BlockOffsetSharedArray(_reserved, 2081 _bot_shared = new G1BlockOffsetSharedArray(_reserved,
2076 heap_word_size(init_byte_size)); 2082 heap_word_size(init_byte_size));
2077 2083
2078 _g1h = this; 2084 _g1h = this;
2079 2085
2080 _in_cset_fast_test_length = max_regions(); 2086 _in_cset_fast_test_length = max_regions();
2081 _in_cset_fast_test_base = 2087 _in_cset_fast_test_base =
2082 NEW_C_HEAP_ARRAY(bool, (size_t) _in_cset_fast_test_length, mtGC); 2088 NEW_C_HEAP_ARRAY(bool, (size_t) _in_cset_fast_test_length, mtGC);
2083 2089
2084 // We're biasing _in_cset_fast_test to avoid subtracting the 2090 // We're biasing _in_cset_fast_test to avoid subtracting the
2085 // beginning of the heap every time we want to index; basically 2091 // beginning of the heap every time we want to index; basically
2086 // it's the same with what we do with the card table. 2092 // it's the same with what we do with the card table.
2087 _in_cset_fast_test = _in_cset_fast_test_base - 2093 _in_cset_fast_test = _in_cset_fast_test_base -
2088 ((uintx) _g1_reserved.start() >> HeapRegion::LogOfHRGrainBytes); 2094 ((uintx) _g1_reserved.start() >> HeapRegion::LogOfHRGrainBytes);
2089 2095
2090 // Clear the _cset_fast_test bitmap in anticipation of adding 2096 // Clear the _cset_fast_test bitmap in anticipation of adding
2091 // regions to the incremental collection set for the first 2097 // regions to the incremental collection set for the first
2092 // evacuation pause. 2098 // evacuation pause.
2093 clear_cset_fast_test(); 2099 clear_cset_fast_test();
2094 2100
2095 // Create the ConcurrentMark data structure and thread. 2101 // Create the ConcurrentMark data structure and thread.
2096 // (Must do this late, so that "max_regions" is defined.) 2102 // (Must do this late, so that "max_regions" is defined.)
2097 _cm = new ConcurrentMark(this, heap_rs); 2103 _cm = new ConcurrentMark(this, heap_rs);
2098 if (_cm == NULL || !_cm->completed_initialization()) { 2104 if (_cm == NULL || !_cm->completed_initialization()) {
2149 &JavaThread::dirty_card_queue_set()); 2155 &JavaThread::dirty_card_queue_set());
2150 2156
2151 // In case we're keeping closure specialization stats, initialize those 2157 // In case we're keeping closure specialization stats, initialize those
2152 // counts and that mechanism. 2158 // counts and that mechanism.
2153 SpecializationStats::clear(); 2159 SpecializationStats::clear();
2154
2155 // Do later initialization work for concurrent refinement.
2156 _cg1r->init();
2157 2160
2158 // Here we allocate the dummy full region that is required by the 2161 // Here we allocate the dummy full region that is required by the
2159 // G1AllocRegion class. If we don't pass an address in the reserved 2162 // G1AllocRegion class. If we don't pass an address in the reserved
2160 // space here, lots of asserts fire. 2163 // space here, lots of asserts fire.
2161 2164
2311 void G1CollectedHeap::iterate_dirty_card_closure(CardTableEntryClosure* cl, 2314 void G1CollectedHeap::iterate_dirty_card_closure(CardTableEntryClosure* cl,
2312 DirtyCardQueue* into_cset_dcq, 2315 DirtyCardQueue* into_cset_dcq,
2313 bool concurrent, 2316 bool concurrent,
2314 int worker_i) { 2317 int worker_i) {
2315 // Clean cards in the hot card cache 2318 // Clean cards in the hot card cache
2316 concurrent_g1_refine()->clean_up_cache(worker_i, g1_rem_set(), into_cset_dcq); 2319 G1HotCardCache* hot_card_cache = _cg1r->hot_card_cache();
2320 hot_card_cache->drain(worker_i, g1_rem_set(), into_cset_dcq);
2317 2321
2318 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 2322 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
2319 int n_completed_buffers = 0; 2323 int n_completed_buffers = 0;
2320 while (dcqs.apply_closure_to_completed_buffer(cl, worker_i, 0, true)) { 2324 while (dcqs.apply_closure_to_completed_buffer(cl, worker_i, 0, true)) {
2321 n_completed_buffers++; 2325 n_completed_buffers++;
5601 5605
5602 // Should G1EvacuationFailureALot be in effect for this GC? 5606 // Should G1EvacuationFailureALot be in effect for this GC?
5603 NOT_PRODUCT(set_evacuation_failure_alot_for_current_gc();) 5607 NOT_PRODUCT(set_evacuation_failure_alot_for_current_gc();)
5604 5608
5605 g1_rem_set()->prepare_for_oops_into_collection_set_do(); 5609 g1_rem_set()->prepare_for_oops_into_collection_set_do();
5606 concurrent_g1_refine()->set_use_cache(false); 5610
5607 concurrent_g1_refine()->clear_hot_cache_claimed_index(); 5611 // Disable the hot card cache.
5612 G1HotCardCache* hot_card_cache = _cg1r->hot_card_cache();
5613 hot_card_cache->reset_hot_cache_claimed_index();
5614 hot_card_cache->set_use_cache(false);
5608 5615
5609 uint n_workers; 5616 uint n_workers;
5610 if (G1CollectedHeap::use_parallel_gc_threads()) { 5617 if (G1CollectedHeap::use_parallel_gc_threads()) {
5611 n_workers = 5618 n_workers =
5612 AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(), 5619 AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
5684 } 5691 }
5685 5692
5686 release_gc_alloc_regions(n_workers); 5693 release_gc_alloc_regions(n_workers);
5687 g1_rem_set()->cleanup_after_oops_into_collection_set_do(); 5694 g1_rem_set()->cleanup_after_oops_into_collection_set_do();
5688 5695
5689 concurrent_g1_refine()->clear_hot_cache(); 5696 // Reset and re-enable the hot card cache.
5690 concurrent_g1_refine()->set_use_cache(true); 5697 // Note the counts for the cards in the regions in the
5698 // collection set are reset when the collection set is freed.
5699 hot_card_cache->reset_hot_cache();
5700 hot_card_cache->set_use_cache(true);
5691 5701
5692 finalize_for_evac_failure(); 5702 finalize_for_evac_failure();
5693 5703
5694 if (evacuation_failed()) { 5704 if (evacuation_failed()) {
5695 remove_self_forwarding_pointers(); 5705 remove_self_forwarding_pointers();
5747 bool par) { 5757 bool par) {
5748 assert(!hr->isHumongous(), "this is only for non-humongous regions"); 5758 assert(!hr->isHumongous(), "this is only for non-humongous regions");
5749 assert(!hr->is_empty(), "the region should not be empty"); 5759 assert(!hr->is_empty(), "the region should not be empty");
5750 assert(free_list != NULL, "pre-condition"); 5760 assert(free_list != NULL, "pre-condition");
5751 5761
5762 // Clear the card counts for this region.
5763 // Note: we only need to do this if the region is not young
5764 // (since we don't refine cards in young regions).
5765 if (!hr->is_young()) {
5766 _cg1r->hot_card_cache()->reset_card_counts(hr);
5767 }
5752 *pre_used += hr->used(); 5768 *pre_used += hr->used();
5753 hr->hr_clear(par, true /* clear_space */); 5769 hr->hr_clear(par, true /* clear_space */);
5754 free_list->add_as_head(hr); 5770 free_list->add_as_head(hr);
5755 } 5771 }
5756 5772