Mercurial > hg > graal-jvmci-8
comparison src/share/vm/gc_implementation/g1/g1RemSet.cpp @ 794:315a5d70b295
6484957: G1: parallel concurrent refinement
6826318: G1: remove traversal-based refinement code
Summary: Removed traversal-based refinement code as it's no longer used. Made the concurrent refinement (queue-based) parallel.
Reviewed-by: tonyp
author | iveresov |
---|---|
date | Mon, 11 May 2009 16:30:56 -0700 |
parents | 20c6f43950b5 |
children | 29e7d79232b9 |
comparison
equal
deleted
inserted
replaced
758:9b3a41ccc927 | 794:315a5d70b295 |
---|---|
103 int worker_i) { | 103 int worker_i) { |
104 IntoCSRegionClosure rc(_g1, oc); | 104 IntoCSRegionClosure rc(_g1, oc); |
105 _g1->heap_region_iterate(&rc); | 105 _g1->heap_region_iterate(&rc); |
106 } | 106 } |
107 | 107 |
108 class UpdateRSOutOfRegionClosure: public HeapRegionClosure { | |
109 G1CollectedHeap* _g1h; | |
110 ModRefBarrierSet* _mr_bs; | |
111 UpdateRSOopClosure _cl; | |
112 int _worker_i; | |
113 public: | |
114 UpdateRSOutOfRegionClosure(G1CollectedHeap* g1, int worker_i = 0) : | |
115 _cl(g1->g1_rem_set()->as_HRInto_G1RemSet(), worker_i), | |
116 _mr_bs(g1->mr_bs()), | |
117 _worker_i(worker_i), | |
118 _g1h(g1) | |
119 {} | |
120 bool doHeapRegion(HeapRegion* r) { | |
121 if (!r->in_collection_set() && !r->continuesHumongous()) { | |
122 _cl.set_from(r); | |
123 r->set_next_filter_kind(HeapRegionDCTOC::OutOfRegionFilterKind); | |
124 _mr_bs->mod_oop_in_space_iterate(r, &_cl, true, true); | |
125 } | |
126 return false; | |
127 } | |
128 }; | |
129 | |
130 class VerifyRSCleanCardOopClosure: public OopClosure { | 108 class VerifyRSCleanCardOopClosure: public OopClosure { |
131 G1CollectedHeap* _g1; | 109 G1CollectedHeap* _g1; |
132 public: | 110 public: |
133 VerifyRSCleanCardOopClosure(G1CollectedHeap* g1) : _g1(g1) {} | 111 VerifyRSCleanCardOopClosure(G1CollectedHeap* g1) : _g1(g1) {} |
134 | 112 |
348 ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine(); | 326 ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine(); |
349 | 327 |
350 double start = os::elapsedTime(); | 328 double start = os::elapsedTime(); |
351 _g1p->record_update_rs_start_time(worker_i, start * 1000.0); | 329 _g1p->record_update_rs_start_time(worker_i, start * 1000.0); |
352 | 330 |
353 if (G1RSBarrierUseQueue && !cg1r->do_traversal()) { | 331 // Apply the appropriate closure to all remaining log entries. |
354 // Apply the appropriate closure to all remaining log entries. | 332 _g1->iterate_dirty_card_closure(false, worker_i); |
355 _g1->iterate_dirty_card_closure(false, worker_i); | 333 // Now there should be no dirty cards. |
356 // Now there should be no dirty cards. | 334 if (G1RSLogCheckCardTable) { |
357 if (G1RSLogCheckCardTable) { | 335 CountNonCleanMemRegionClosure cl(_g1); |
358 CountNonCleanMemRegionClosure cl(_g1); | 336 _ct_bs->mod_card_iterate(&cl); |
359 _ct_bs->mod_card_iterate(&cl); | 337 // XXX This isn't true any more: keeping cards of young regions |
360 // XXX This isn't true any more: keeping cards of young regions | 338 // marked dirty broke it. Need some reasonable fix. |
361 // marked dirty broke it. Need some reasonable fix. | 339 guarantee(cl.n() == 0, "Card table should be clean."); |
362 guarantee(cl.n() == 0, "Card table should be clean."); | 340 } |
363 } | 341 |
364 } else { | |
365 UpdateRSOutOfRegionClosure update_rs(_g1, worker_i); | |
366 _g1->heap_region_iterate(&update_rs); | |
367 // We did a traversal; no further one is necessary. | |
368 if (G1RSBarrierUseQueue) { | |
369 assert(cg1r->do_traversal(), "Or we shouldn't have gotten here."); | |
370 cg1r->set_pya_cancel(); | |
371 } | |
372 if (_cg1r->use_cache()) { | |
373 _cg1r->clear_and_record_card_counts(); | |
374 _cg1r->clear_hot_cache(); | |
375 } | |
376 } | |
377 _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0); | 342 _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0); |
378 } | 343 } |
379 | 344 |
380 #ifndef PRODUCT | 345 #ifndef PRODUCT |
381 class PrintRSClosure : public HeapRegionClosure { | 346 class PrintRSClosure : public HeapRegionClosure { |
484 _g1p->record_scan_new_refs_time(worker_i, | 449 _g1p->record_scan_new_refs_time(worker_i, |
485 (os::elapsedTime() - scan_new_refs_start_sec) | 450 (os::elapsedTime() - scan_new_refs_start_sec) |
486 * 1000.0); | 451 * 1000.0); |
487 } | 452 } |
488 | 453 |
489 void HRInto_G1RemSet::set_par_traversal(bool b) { | |
490 _par_traversal_in_progress = b; | |
491 HeapRegionRemSet::set_par_traversal(b); | |
492 } | |
493 | |
494 void HRInto_G1RemSet::cleanupHRRS() { | 454 void HRInto_G1RemSet::cleanupHRRS() { |
495 HeapRegionRemSet::cleanup(); | 455 HeapRegionRemSet::cleanup(); |
496 } | 456 } |
497 | 457 |
498 void | 458 void |
525 // updating and scanning. See CRs 6677707 and 6677708. | 485 // updating and scanning. See CRs 6677707 and 6677708. |
526 if (G1ParallelRSetUpdatingEnabled || (worker_i == 0)) { | 486 if (G1ParallelRSetUpdatingEnabled || (worker_i == 0)) { |
527 updateRS(worker_i); | 487 updateRS(worker_i); |
528 scanNewRefsRS(oc, worker_i); | 488 scanNewRefsRS(oc, worker_i); |
529 } else { | 489 } else { |
530 _g1p->record_update_rs_start_time(worker_i, os::elapsedTime()); | 490 _g1p->record_update_rs_start_time(worker_i, os::elapsedTime() * 1000.0); |
531 _g1p->record_update_rs_processed_buffers(worker_i, 0.0); | 491 _g1p->record_update_rs_processed_buffers(worker_i, 0.0); |
532 _g1p->record_update_rs_time(worker_i, 0.0); | 492 _g1p->record_update_rs_time(worker_i, 0.0); |
533 _g1p->record_scan_new_refs_time(worker_i, 0.0); | 493 _g1p->record_scan_new_refs_time(worker_i, 0.0); |
534 } | 494 } |
535 if (G1ParallelRSetScanningEnabled || (worker_i == 0)) { | 495 if (G1ParallelRSetScanningEnabled || (worker_i == 0)) { |
536 scanRS(oc, worker_i); | 496 scanRS(oc, worker_i); |
537 } else { | 497 } else { |
538 _g1p->record_scan_rs_start_time(worker_i, os::elapsedTime()); | 498 _g1p->record_scan_rs_start_time(worker_i, os::elapsedTime() * 1000.0); |
539 _g1p->record_scan_rs_time(worker_i, 0.0); | 499 _g1p->record_scan_rs_time(worker_i, 0.0); |
540 } | 500 } |
541 } else { | 501 } else { |
542 assert(worker_i == 0, "invariant"); | 502 assert(worker_i == 0, "invariant"); |
543 updateRS(0); | 503 updateRS(0); |
560 | 520 |
561 assert(!_par_traversal_in_progress, "Invariant between iterations."); | 521 assert(!_par_traversal_in_progress, "Invariant between iterations."); |
562 if (ParallelGCThreads > 0) { | 522 if (ParallelGCThreads > 0) { |
563 set_par_traversal(true); | 523 set_par_traversal(true); |
564 _seq_task->set_par_threads((int)n_workers()); | 524 _seq_task->set_par_threads((int)n_workers()); |
565 if (cg1r->do_traversal()) { | |
566 updateRS(0); | |
567 // Have to do this again after updaters | |
568 cleanupHRRS(); | |
569 } | |
570 } | 525 } |
571 guarantee( _cards_scanned == NULL, "invariant" ); | 526 guarantee( _cards_scanned == NULL, "invariant" ); |
572 _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers()); | 527 _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers()); |
573 for (uint i = 0; i < n_workers(); ++i) { | 528 for (uint i = 0; i < n_workers(); ++i) { |
574 _cards_scanned[i] = 0; | 529 _cards_scanned[i] = 0; |
645 _g1->set_refine_cte_cl_concurrency(true); | 600 _g1->set_refine_cte_cl_concurrency(true); |
646 cleanUpIteratorsClosure iterClosure; | 601 cleanUpIteratorsClosure iterClosure; |
647 _g1->collection_set_iterate(&iterClosure); | 602 _g1->collection_set_iterate(&iterClosure); |
648 // Set all cards back to clean. | 603 // Set all cards back to clean. |
649 _g1->cleanUpCardTable(); | 604 _g1->cleanUpCardTable(); |
605 | |
650 if (ParallelGCThreads > 0) { | 606 if (ParallelGCThreads > 0) { |
651 ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine(); | |
652 if (cg1r->do_traversal()) { | |
653 cg1r->cg1rThread()->set_do_traversal(false); | |
654 } | |
655 set_par_traversal(false); | 607 set_par_traversal(false); |
656 } | 608 } |
657 | 609 |
658 if (_g1->evacuation_failed()) { | 610 if (_g1->evacuation_failed()) { |
659 // Restore remembered sets for the regions pointing into | 611 // Restore remembered sets for the regions pointing into |
719 ScrubRSClosure scrub_cl(region_bm, card_bm); | 671 ScrubRSClosure scrub_cl(region_bm, card_bm); |
720 _g1->heap_region_par_iterate_chunked(&scrub_cl, worker_num, claim_val); | 672 _g1->heap_region_par_iterate_chunked(&scrub_cl, worker_num, claim_val); |
721 } | 673 } |
722 | 674 |
723 | 675 |
724 class ConcRefineRegionClosure: public HeapRegionClosure { | |
725 G1CollectedHeap* _g1h; | |
726 CardTableModRefBS* _ctbs; | |
727 ConcurrentGCThread* _cgc_thrd; | |
728 ConcurrentG1Refine* _cg1r; | |
729 unsigned _cards_processed; | |
730 UpdateRSOopClosure _update_rs_oop_cl; | |
731 public: | |
732 ConcRefineRegionClosure(CardTableModRefBS* ctbs, | |
733 ConcurrentG1Refine* cg1r, | |
734 HRInto_G1RemSet* g1rs) : | |
735 _ctbs(ctbs), _cg1r(cg1r), _cgc_thrd(cg1r->cg1rThread()), | |
736 _update_rs_oop_cl(g1rs), _cards_processed(0), | |
737 _g1h(G1CollectedHeap::heap()) | |
738 {} | |
739 | |
740 bool doHeapRegion(HeapRegion* r) { | |
741 if (!r->in_collection_set() && | |
742 !r->continuesHumongous() && | |
743 !r->is_young()) { | |
744 _update_rs_oop_cl.set_from(r); | |
745 UpdateRSObjectClosure update_rs_obj_cl(&_update_rs_oop_cl); | |
746 | |
747 // For each run of dirty card in the region: | |
748 // 1) Clear the cards. | |
749 // 2) Process the range corresponding to the run, adding any | |
750 // necessary RS entries. | |
751 // 1 must precede 2, so that a concurrent modification redirties the | |
752 // card. If a processing attempt does not succeed, because it runs | |
753 // into an unparseable region, we will do binary search to find the | |
754 // beginning of the next parseable region. | |
755 HeapWord* startAddr = r->bottom(); | |
756 HeapWord* endAddr = r->used_region().end(); | |
757 HeapWord* lastAddr; | |
758 HeapWord* nextAddr; | |
759 | |
760 for (nextAddr = lastAddr = startAddr; | |
761 nextAddr < endAddr; | |
762 nextAddr = lastAddr) { | |
763 MemRegion dirtyRegion; | |
764 | |
765 // Get and clear dirty region from card table | |
766 MemRegion next_mr(nextAddr, endAddr); | |
767 dirtyRegion = | |
768 _ctbs->dirty_card_range_after_reset( | |
769 next_mr, | |
770 true, CardTableModRefBS::clean_card_val()); | |
771 assert(dirtyRegion.start() >= nextAddr, | |
772 "returned region inconsistent?"); | |
773 | |
774 if (!dirtyRegion.is_empty()) { | |
775 HeapWord* stop_point = | |
776 r->object_iterate_mem_careful(dirtyRegion, | |
777 &update_rs_obj_cl); | |
778 if (stop_point == NULL) { | |
779 lastAddr = dirtyRegion.end(); | |
780 _cards_processed += | |
781 (int) (dirtyRegion.word_size() / CardTableModRefBS::card_size_in_words); | |
782 } else { | |
783 // We're going to skip one or more cards that we can't parse. | |
784 HeapWord* next_parseable_card = | |
785 r->next_block_start_careful(stop_point); | |
786 // Round this up to a card boundary. | |
787 next_parseable_card = | |
788 _ctbs->addr_for(_ctbs->byte_after_const(next_parseable_card)); | |
789 // Now we invalidate the intervening cards so we'll see them | |
790 // again. | |
791 MemRegion remaining_dirty = | |
792 MemRegion(stop_point, dirtyRegion.end()); | |
793 MemRegion skipped = | |
794 MemRegion(stop_point, next_parseable_card); | |
795 _ctbs->invalidate(skipped.intersection(remaining_dirty)); | |
796 | |
797 // Now start up again where we can parse. | |
798 lastAddr = next_parseable_card; | |
799 | |
800 // Count how many we did completely. | |
801 _cards_processed += | |
802 (stop_point - dirtyRegion.start()) / | |
803 CardTableModRefBS::card_size_in_words; | |
804 } | |
805 // Allow interruption at regular intervals. | |
806 // (Might need to make them more regular, if we get big | |
807 // dirty regions.) | |
808 if (_cgc_thrd != NULL) { | |
809 if (_cgc_thrd->should_yield()) { | |
810 _cgc_thrd->yield(); | |
811 switch (_cg1r->get_pya()) { | |
812 case PYA_continue: | |
813 // This may have changed: re-read. | |
814 endAddr = r->used_region().end(); | |
815 continue; | |
816 case PYA_restart: case PYA_cancel: | |
817 return true; | |
818 } | |
819 } | |
820 } | |
821 } else { | |
822 break; | |
823 } | |
824 } | |
825 } | |
826 // A good yield opportunity. | |
827 if (_cgc_thrd != NULL) { | |
828 if (_cgc_thrd->should_yield()) { | |
829 _cgc_thrd->yield(); | |
830 switch (_cg1r->get_pya()) { | |
831 case PYA_restart: case PYA_cancel: | |
832 return true; | |
833 default: | |
834 break; | |
835 } | |
836 | |
837 } | |
838 } | |
839 return false; | |
840 } | |
841 | |
842 unsigned cards_processed() { return _cards_processed; } | |
843 }; | |
844 | |
845 | |
846 void HRInto_G1RemSet::concurrentRefinementPass(ConcurrentG1Refine* cg1r) { | |
847 ConcRefineRegionClosure cr_cl(ct_bs(), cg1r, this); | |
848 _g1->heap_region_iterate(&cr_cl); | |
849 _conc_refine_traversals++; | |
850 _conc_refine_cards += cr_cl.cards_processed(); | |
851 } | |
852 | |
853 static IntHistogram out_of_histo(50, 50); | 676 static IntHistogram out_of_histo(50, 50); |
854 | |
855 | |
856 | 677 |
857 void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) { | 678 void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) { |
858 // If the card is no longer dirty, nothing to do. | 679 // If the card is no longer dirty, nothing to do. |
859 if (*card_ptr != CardTableModRefBS::dirty_card_val()) return; | 680 if (*card_ptr != CardTableModRefBS::dirty_card_val()) return; |
860 | 681 |
981 size_t max_mem_sz() { return _max_mem_sz; } | 802 size_t max_mem_sz() { return _max_mem_sz; } |
982 size_t occupied() { return _occupied; } | 803 size_t occupied() { return _occupied; } |
983 HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; } | 804 HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; } |
984 }; | 805 }; |
985 | 806 |
807 class PrintRSThreadVTimeClosure : public ThreadClosure { | |
808 public: | |
809 virtual void do_thread(Thread *t) { | |
810 ConcurrentG1RefineThread* crt = (ConcurrentG1RefineThread*) t; | |
811 gclog_or_tty->print(" %5.2f", crt->vtime_accum()); | |
812 } | |
813 }; | |
814 | |
986 void HRInto_G1RemSet::print_summary_info() { | 815 void HRInto_G1RemSet::print_summary_info() { |
987 G1CollectedHeap* g1 = G1CollectedHeap::heap(); | 816 G1CollectedHeap* g1 = G1CollectedHeap::heap(); |
988 ConcurrentG1RefineThread* cg1r_thrd = | |
989 g1->concurrent_g1_refine()->cg1rThread(); | |
990 | 817 |
991 #if CARD_REPEAT_HISTO | 818 #if CARD_REPEAT_HISTO |
992 gclog_or_tty->print_cr("\nG1 card_repeat count histogram: "); | 819 gclog_or_tty->print_cr("\nG1 card_repeat count histogram: "); |
993 gclog_or_tty->print_cr(" # of repeats --> # of cards with that number."); | 820 gclog_or_tty->print_cr(" # of repeats --> # of cards with that number."); |
994 card_repeat_count.print_on(gclog_or_tty); | 821 card_repeat_count.print_on(gclog_or_tty); |
997 if (FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT) { | 824 if (FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT) { |
998 gclog_or_tty->print_cr("\nG1 rem-set out-of-region histogram: "); | 825 gclog_or_tty->print_cr("\nG1 rem-set out-of-region histogram: "); |
999 gclog_or_tty->print_cr(" # of CS ptrs --> # of cards with that number."); | 826 gclog_or_tty->print_cr(" # of CS ptrs --> # of cards with that number."); |
1000 out_of_histo.print_on(gclog_or_tty); | 827 out_of_histo.print_on(gclog_or_tty); |
1001 } | 828 } |
1002 gclog_or_tty->print_cr("\n Concurrent RS processed %d cards in " | 829 gclog_or_tty->print_cr("\n Concurrent RS processed %d cards", |
1003 "%5.2fs.", | 830 _conc_refine_cards); |
1004 _conc_refine_cards, cg1r_thrd->vtime_accum()); | |
1005 | |
1006 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); | 831 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); |
1007 jint tot_processed_buffers = | 832 jint tot_processed_buffers = |
1008 dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread(); | 833 dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread(); |
1009 gclog_or_tty->print_cr(" Of %d completed buffers:", tot_processed_buffers); | 834 gclog_or_tty->print_cr(" Of %d completed buffers:", tot_processed_buffers); |
1010 gclog_or_tty->print_cr(" %8d (%5.1f%%) by conc RS thread.", | 835 gclog_or_tty->print_cr(" %8d (%5.1f%%) by conc RS threads.", |
1011 dcqs.processed_buffers_rs_thread(), | 836 dcqs.processed_buffers_rs_thread(), |
1012 100.0*(float)dcqs.processed_buffers_rs_thread()/ | 837 100.0*(float)dcqs.processed_buffers_rs_thread()/ |
1013 (float)tot_processed_buffers); | 838 (float)tot_processed_buffers); |
1014 gclog_or_tty->print_cr(" %8d (%5.1f%%) by mutator threads.", | 839 gclog_or_tty->print_cr(" %8d (%5.1f%%) by mutator threads.", |
1015 dcqs.processed_buffers_mut(), | 840 dcqs.processed_buffers_mut(), |
1016 100.0*(float)dcqs.processed_buffers_mut()/ | 841 100.0*(float)dcqs.processed_buffers_mut()/ |
1017 (float)tot_processed_buffers); | 842 (float)tot_processed_buffers); |
1018 gclog_or_tty->print_cr(" Did %d concurrent refinement traversals.", | 843 gclog_or_tty->print_cr(" Conc RS threads times(s)"); |
1019 _conc_refine_traversals); | 844 PrintRSThreadVTimeClosure p; |
1020 if (!G1RSBarrierUseQueue) { | 845 gclog_or_tty->print(" "); |
1021 gclog_or_tty->print_cr(" Scanned %8.2f cards/traversal.", | 846 g1->concurrent_g1_refine()->threads_do(&p); |
1022 _conc_refine_traversals > 0 ? | |
1023 (float)_conc_refine_cards/(float)_conc_refine_traversals : | |
1024 0); | |
1025 } | |
1026 gclog_or_tty->print_cr(""); | 847 gclog_or_tty->print_cr(""); |
848 | |
1027 if (G1UseHRIntoRS) { | 849 if (G1UseHRIntoRS) { |
1028 HRRSStatsIter blk; | 850 HRRSStatsIter blk; |
1029 g1->heap_region_iterate(&blk); | 851 g1->heap_region_iterate(&blk); |
1030 gclog_or_tty->print_cr(" Total heap region rem set sizes = " SIZE_FORMAT "K." | 852 gclog_or_tty->print_cr(" Total heap region rem set sizes = " SIZE_FORMAT "K." |
1031 " Max = " SIZE_FORMAT "K.", | 853 " Max = " SIZE_FORMAT "K.", |