comparison src/share/vm/gc_implementation/g1/concurrentMark.cpp @ 3316:cd8e33b2a8ad

7034139: G1: assert(Thread::current()->is_ConcurrentGC_thread()) failed: only a conc GC thread can call this. Summary: We were calling STS join and leave during a STW pause and we are not suppoesed to. I now only call those during concurrent phase. I also added stress code in the non-product builds to force an overflows (the condition that ws uncovering the bug) to make sure it does not happen again. Reviewed-by: johnc, brutisso
author tonyp
date Fri, 29 Apr 2011 12:40:49 -0400
parents 8f1042ff784d
children 063382f9b575
comparison
equal deleted inserted replaced
3315:da0fffdcc453 3316:cd8e33b2a8ad
824 }; 824 };
825 825
826 void ConcurrentMark::checkpointRootsInitialPost() { 826 void ConcurrentMark::checkpointRootsInitialPost() {
827 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 827 G1CollectedHeap* g1h = G1CollectedHeap::heap();
828 828
829 // If we force an overflow during remark, the remark operation will
830 // actually abort and we'll restart concurrent marking. If we always
831 // force an oveflow during remark we'll never actually complete the
832 // marking phase. So, we initilize this here, at the start of the
833 // cycle, so that at the remaining overflow number will decrease at
834 // every remark and we'll eventually not need to cause one.
835 force_overflow_stw()->init();
836
829 // For each region note start of marking. 837 // For each region note start of marking.
830 NoteStartOfMarkHRClosure startcl; 838 NoteStartOfMarkHRClosure startcl;
831 g1h->heap_region_iterate(&startcl); 839 g1h->heap_region_iterate(&startcl);
832 840
833 // Start weak-reference discovery. 841 // Start weak-reference discovery.
891 899
892 g1p->record_concurrent_mark_init_end(); 900 g1p->record_concurrent_mark_init_end();
893 } 901 }
894 902
895 /* 903 /*
896 Notice that in the next two methods, we actually leave the STS 904 * Notice that in the next two methods, we actually leave the STS
897 during the barrier sync and join it immediately afterwards. If we 905 * during the barrier sync and join it immediately afterwards. If we
898 do not do this, this then the following deadlock can occur: one 906 * do not do this, the following deadlock can occur: one thread could
899 thread could be in the barrier sync code, waiting for the other 907 * be in the barrier sync code, waiting for the other thread to also
900 thread to also sync up, whereas another one could be trying to 908 * sync up, whereas another one could be trying to yield, while also
901 yield, while also waiting for the other threads to sync up too. 909 * waiting for the other threads to sync up too.
902 910 *
903 Because the thread that does the sync barrier has left the STS, it 911 * Note, however, that this code is also used during remark and in
904 is possible to be suspended for a Full GC or an evacuation pause 912 * this case we should not attempt to leave / enter the STS, otherwise
905 could occur. This is actually safe, since the entering the sync 913 * we'll either hit an asseert (debug / fastdebug) or deadlock
906 barrier is one of the last things do_marking_step() does, and it 914 * (product). So we should only leave / enter the STS if we are
907 doesn't manipulate any data structures afterwards. 915 * operating concurrently.
908 */ 916 *
917 * Because the thread that does the sync barrier has left the STS, it
918 * is possible to be suspended for a Full GC or an evacuation pause
919 * could occur. This is actually safe, since the entering the sync
920 * barrier is one of the last things do_marking_step() does, and it
921 * doesn't manipulate any data structures afterwards.
922 */
909 923
910 void ConcurrentMark::enter_first_sync_barrier(int task_num) { 924 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
911 if (verbose_low()) 925 if (verbose_low())
912 gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 926 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
913 927
914 ConcurrentGCThread::stsLeave(); 928 if (concurrent()) {
929 ConcurrentGCThread::stsLeave();
930 }
915 _first_overflow_barrier_sync.enter(); 931 _first_overflow_barrier_sync.enter();
916 ConcurrentGCThread::stsJoin(); 932 if (concurrent()) {
933 ConcurrentGCThread::stsJoin();
934 }
917 // at this point everyone should have synced up and not be doing any 935 // at this point everyone should have synced up and not be doing any
918 // more work 936 // more work
919 937
920 if (verbose_low()) 938 if (verbose_low())
921 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 939 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
922 940
923 // let task 0 do this 941 // let task 0 do this
924 if (task_num == 0) { 942 if (task_num == 0) {
925 // task 0 is responsible for clearing the global data structures 943 // task 0 is responsible for clearing the global data structures
926 clear_marking_state(); 944 // We should be here because of an overflow. During STW we should
945 // not clear the overflow flag since we rely on it being true when
946 // we exit this method to abort the pause and restart concurent
947 // marking.
948 clear_marking_state(concurrent() /* clear_overflow */);
949 force_overflow()->update();
927 950
928 if (PrintGC) { 951 if (PrintGC) {
929 gclog_or_tty->date_stamp(PrintGCDateStamps); 952 gclog_or_tty->date_stamp(PrintGCDateStamps);
930 gclog_or_tty->stamp(PrintGCTimeStamps); 953 gclog_or_tty->stamp(PrintGCTimeStamps);
931 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 954 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
938 961
939 void ConcurrentMark::enter_second_sync_barrier(int task_num) { 962 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
940 if (verbose_low()) 963 if (verbose_low())
941 gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 964 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
942 965
943 ConcurrentGCThread::stsLeave(); 966 if (concurrent()) {
967 ConcurrentGCThread::stsLeave();
968 }
944 _second_overflow_barrier_sync.enter(); 969 _second_overflow_barrier_sync.enter();
945 ConcurrentGCThread::stsJoin(); 970 if (concurrent()) {
971 ConcurrentGCThread::stsJoin();
972 }
946 // at this point everything should be re-initialised and ready to go 973 // at this point everything should be re-initialised and ready to go
947 974
948 if (verbose_low()) 975 if (verbose_low())
949 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 976 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
950 } 977 }
978
979 #ifndef PRODUCT
980 void ForceOverflowSettings::init() {
981 _num_remaining = G1ConcMarkForceOverflow;
982 _force = false;
983 update();
984 }
985
986 void ForceOverflowSettings::update() {
987 if (_num_remaining > 0) {
988 _num_remaining -= 1;
989 _force = true;
990 } else {
991 _force = false;
992 }
993 }
994
995 bool ForceOverflowSettings::should_force() {
996 if (_force) {
997 _force = false;
998 return true;
999 } else {
1000 return false;
1001 }
1002 }
1003 #endif // !PRODUCT
951 1004
952 void ConcurrentMark::grayRoot(oop p) { 1005 void ConcurrentMark::grayRoot(oop p) {
953 HeapWord* addr = (HeapWord*) p; 1006 HeapWord* addr = (HeapWord*) p;
954 // We can't really check against _heap_start and _heap_end, since it 1007 // We can't really check against _heap_start and _heap_end, since it
955 // is possible during an evacuation pause with piggy-backed 1008 // is possible during an evacuation pause with piggy-backed
1115 // stop-the-world GC happens even as we mark in this generation. 1168 // stop-the-world GC happens even as we mark in this generation.
1116 1169
1117 _restart_for_overflow = false; 1170 _restart_for_overflow = false;
1118 1171
1119 size_t active_workers = MAX2((size_t) 1, parallel_marking_threads()); 1172 size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
1173 force_overflow_conc()->init();
1120 set_phase(active_workers, true /* concurrent */); 1174 set_phase(active_workers, true /* concurrent */);
1121 1175
1122 CMConcurrentMarkingTask markingTask(this, cmThread()); 1176 CMConcurrentMarkingTask markingTask(this, cmThread());
1123 if (parallel_marking_threads() > 0) 1177 if (parallel_marking_threads() > 0)
1124 _parallel_workers->run_task(&markingTask); 1178 _parallel_workers->run_task(&markingTask);
2701 assert(_should_gray_objects, "invariant"); 2755 assert(_should_gray_objects, "invariant");
2702 } 2756 }
2703 2757
2704 } 2758 }
2705 2759
2706 void ConcurrentMark::clear_marking_state() { 2760 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2707 _markStack.setEmpty(); 2761 _markStack.setEmpty();
2708 _markStack.clear_overflow(); 2762 _markStack.clear_overflow();
2709 _regionStack.setEmpty(); 2763 _regionStack.setEmpty();
2710 _regionStack.clear_overflow(); 2764 _regionStack.clear_overflow();
2711 clear_has_overflown(); 2765 if (clear_overflow) {
2766 clear_has_overflown();
2767 } else {
2768 assert(has_overflown(), "pre-condition");
2769 }
2712 _finger = _heap_start; 2770 _finger = _heap_start;
2713 2771
2714 for (int i = 0; i < (int)_max_task_num; ++i) { 2772 for (int i = 0; i < (int)_max_task_num; ++i) {
2715 OopTaskQueue* queue = _task_queues->queue(i); 2773 OopTaskQueue* queue = _task_queues->queue(i);
2716 queue->set_empty(); 2774 queue->set_empty();
4277 break; 4335 break;
4278 } 4336 }
4279 } 4337 }
4280 } 4338 }
4281 4339
4340 // If we are about to wrap up and go into termination, check if we
4341 // should raise the overflow flag.
4342 if (do_termination && !has_aborted()) {
4343 if (_cm->force_overflow()->should_force()) {
4344 _cm->set_has_overflown();
4345 regular_clock_call();
4346 }
4347 }
4348
4282 // We still haven't aborted. Now, let's try to get into the 4349 // We still haven't aborted. Now, let's try to get into the
4283 // termination protocol. 4350 // termination protocol.
4284 if (do_termination && !has_aborted()) { 4351 if (do_termination && !has_aborted()) {
4285 // We cannot check whether the global stack is empty, since other 4352 // We cannot check whether the global stack is empty, since other
4286 // tasks might be concurrently pushing objects on it. We also cannot 4353 // tasks might be concurrently pushing objects on it. We also cannot