Mercurial > hg > graal-jvmci-8
comparison src/share/vm/gc_implementation/parNew/parNewGeneration.cpp @ 1710:94251661de76
6970376: ParNew: shared TaskQueue statistics
Reviewed-by: ysr
author | jcoomes |
---|---|
date | Mon, 09 Aug 2010 18:03:50 -0700 |
parents | a93a9eda13f7 |
children | 2d6b74c9a797 |
comparison
equal
deleted
inserted
replaced
1709:5f429ee79634 | 1710:94251661de76 |
---|---|
49 &_to_space_root_closure, gen_, &_old_gen_root_closure, | 49 &_to_space_root_closure, gen_, &_old_gen_root_closure, |
50 work_queue_set_, &term_), | 50 work_queue_set_, &term_), |
51 _is_alive_closure(gen_), _scan_weak_ref_closure(gen_, this), | 51 _is_alive_closure(gen_), _scan_weak_ref_closure(gen_, this), |
52 _keep_alive_closure(&_scan_weak_ref_closure), | 52 _keep_alive_closure(&_scan_weak_ref_closure), |
53 _promotion_failure_size(0), | 53 _promotion_failure_size(0), |
54 _pushes(0), _pops(0), _steals(0), _steal_attempts(0), _term_attempts(0), | |
55 _strong_roots_time(0.0), _term_time(0.0) | 54 _strong_roots_time(0.0), _term_time(0.0) |
56 { | 55 { |
56 #if TASKQUEUE_STATS | |
57 _term_attempts = 0; | |
58 _overflow_refills = 0; | |
59 _overflow_refill_objs = 0; | |
60 #endif // TASKQUEUE_STATS | |
61 | |
57 _survivor_chunk_array = | 62 _survivor_chunk_array = |
58 (ChunkArray*) old_gen()->get_data_recorder(thread_num()); | 63 (ChunkArray*) old_gen()->get_data_recorder(thread_num()); |
59 _hash_seed = 17; // Might want to take time-based random value. | 64 _hash_seed = 17; // Might want to take time-based random value. |
60 _start = os::elapsedTime(); | 65 _start = os::elapsedTime(); |
61 _old_gen_closure.set_generation(old_gen_); | 66 _old_gen_closure.set_generation(old_gen_); |
98 end = start + ParGCArrayScanChunk; | 103 end = start + ParGCArrayScanChunk; |
99 arrayOop(old)->set_length(end); | 104 arrayOop(old)->set_length(end); |
100 // Push remainder. | 105 // Push remainder. |
101 bool ok = work_queue()->push(old); | 106 bool ok = work_queue()->push(old); |
102 assert(ok, "just popped, push must be okay"); | 107 assert(ok, "just popped, push must be okay"); |
103 note_push(); | |
104 } else { | 108 } else { |
105 // Restore length so that it can be used if there | 109 // Restore length so that it can be used if there |
106 // is a promotion failure and forwarding pointers | 110 // is a promotion failure and forwarding pointers |
107 // must be removed. | 111 // must be removed. |
108 arrayOop(old)->set_length(end); | 112 arrayOop(old)->set_length(end); |
124 ObjToScanQueue* queue = work_queue(); | 128 ObjToScanQueue* queue = work_queue(); |
125 do { | 129 do { |
126 while (queue->size() > (juint)max_size) { | 130 while (queue->size() > (juint)max_size) { |
127 oop obj_to_scan; | 131 oop obj_to_scan; |
128 if (queue->pop_local(obj_to_scan)) { | 132 if (queue->pop_local(obj_to_scan)) { |
129 note_pop(); | |
130 if ((HeapWord *)obj_to_scan < young_old_boundary()) { | 133 if ((HeapWord *)obj_to_scan < young_old_boundary()) { |
131 if (obj_to_scan->is_objArray() && | 134 if (obj_to_scan->is_objArray() && |
132 obj_to_scan->is_forwarded() && | 135 obj_to_scan->is_forwarded() && |
133 obj_to_scan->forwardee() != obj_to_scan) { | 136 obj_to_scan->forwardee() != obj_to_scan) { |
134 scan_partial_array_and_push_remainder(obj_to_scan); | 137 scan_partial_array_and_push_remainder(obj_to_scan); |
269 Generation& old_gen, | 272 Generation& old_gen, |
270 ObjToScanQueueSet& queue_set, | 273 ObjToScanQueueSet& queue_set, |
271 GrowableArray<oop>** overflow_stacks_, | 274 GrowableArray<oop>** overflow_stacks_, |
272 size_t desired_plab_sz, | 275 size_t desired_plab_sz, |
273 ParallelTaskTerminator& term); | 276 ParallelTaskTerminator& term); |
277 | |
278 ~ParScanThreadStateSet() { TASKQUEUE_STATS_ONLY(reset_stats()); } | |
279 | |
274 inline ParScanThreadState& thread_state(int i); | 280 inline ParScanThreadState& thread_state(int i); |
275 int pushes() { return _pushes; } | 281 |
276 int pops() { return _pops; } | |
277 int steals() { return _steals; } | |
278 void reset(bool promotion_failed); | 282 void reset(bool promotion_failed); |
279 void flush(); | 283 void flush(); |
284 | |
285 #if TASKQUEUE_STATS | |
286 static void | |
287 print_termination_stats_hdr(outputStream* const st = gclog_or_tty); | |
288 void print_termination_stats(outputStream* const st = gclog_or_tty); | |
289 static void | |
290 print_taskqueue_stats_hdr(outputStream* const st = gclog_or_tty); | |
291 void print_taskqueue_stats(outputStream* const st = gclog_or_tty); | |
292 void reset_stats(); | |
293 #endif // TASKQUEUE_STATS | |
294 | |
280 private: | 295 private: |
281 ParallelTaskTerminator& _term; | 296 ParallelTaskTerminator& _term; |
282 ParNewGeneration& _gen; | 297 ParNewGeneration& _gen; |
283 Generation& _next_gen; | 298 Generation& _next_gen; |
284 // staticstics | |
285 int _pushes; | |
286 int _pops; | |
287 int _steals; | |
288 }; | 299 }; |
289 | 300 |
290 | 301 |
291 ParScanThreadStateSet::ParScanThreadStateSet( | 302 ParScanThreadStateSet::ParScanThreadStateSet( |
292 int num_threads, Space& to_space, ParNewGeneration& gen, | 303 int num_threads, Space& to_space, ParNewGeneration& gen, |
293 Generation& old_gen, ObjToScanQueueSet& queue_set, | 304 Generation& old_gen, ObjToScanQueueSet& queue_set, |
294 GrowableArray<oop>** overflow_stack_set_, | 305 GrowableArray<oop>** overflow_stack_set_, |
295 size_t desired_plab_sz, ParallelTaskTerminator& term) | 306 size_t desired_plab_sz, ParallelTaskTerminator& term) |
296 : ResourceArray(sizeof(ParScanThreadState), num_threads), | 307 : ResourceArray(sizeof(ParScanThreadState), num_threads), |
297 _gen(gen), _next_gen(old_gen), _term(term), | 308 _gen(gen), _next_gen(old_gen), _term(term) |
298 _pushes(0), _pops(0), _steals(0) | |
299 { | 309 { |
300 assert(num_threads > 0, "sanity check!"); | 310 assert(num_threads > 0, "sanity check!"); |
301 // Initialize states. | 311 // Initialize states. |
302 for (int i = 0; i < num_threads; ++i) { | 312 for (int i = 0; i < num_threads; ++i) { |
303 new ((ParScanThreadState*)_data + i) | 313 new ((ParScanThreadState*)_data + i) |
321 thread_state(i).print_and_clear_promotion_failure_size(); | 331 thread_state(i).print_and_clear_promotion_failure_size(); |
322 } | 332 } |
323 } | 333 } |
324 } | 334 } |
325 | 335 |
336 #if TASKQUEUE_STATS | |
337 void | |
338 ParScanThreadState::reset_stats() | |
339 { | |
340 taskqueue_stats().reset(); | |
341 _term_attempts = 0; | |
342 _overflow_refills = 0; | |
343 _overflow_refill_objs = 0; | |
344 } | |
345 | |
346 void ParScanThreadStateSet::reset_stats() | |
347 { | |
348 for (int i = 0; i < length(); ++i) { | |
349 thread_state(i).reset_stats(); | |
350 } | |
351 } | |
352 | |
353 void | |
354 ParScanThreadStateSet::print_termination_stats_hdr(outputStream* const st) | |
355 { | |
356 st->print_raw_cr("GC Termination Stats"); | |
357 st->print_raw_cr(" elapsed --strong roots-- " | |
358 "-------termination-------"); | |
359 st->print_raw_cr("thr ms ms % " | |
360 " ms % attempts"); | |
361 st->print_raw_cr("--- --------- --------- ------ " | |
362 "--------- ------ --------"); | |
363 } | |
364 | |
365 void ParScanThreadStateSet::print_termination_stats(outputStream* const st) | |
366 { | |
367 print_termination_stats_hdr(st); | |
368 | |
369 for (int i = 0; i < length(); ++i) { | |
370 const ParScanThreadState & pss = thread_state(i); | |
371 const double elapsed_ms = pss.elapsed_time() * 1000.0; | |
372 const double s_roots_ms = pss.strong_roots_time() * 1000.0; | |
373 const double term_ms = pss.term_time() * 1000.0; | |
374 st->print_cr("%3d %9.2f %9.2f %6.2f " | |
375 "%9.2f %6.2f " SIZE_FORMAT_W(8), | |
376 i, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms, | |
377 term_ms, term_ms * 100 / elapsed_ms, pss.term_attempts()); | |
378 } | |
379 } | |
380 | |
381 // Print stats related to work queue activity. | |
382 void ParScanThreadStateSet::print_taskqueue_stats_hdr(outputStream* const st) | |
383 { | |
384 st->print_raw_cr("GC Task Stats"); | |
385 st->print_raw("thr "); TaskQueueStats::print_header(1, st); st->cr(); | |
386 st->print_raw("--- "); TaskQueueStats::print_header(2, st); st->cr(); | |
387 } | |
388 | |
389 void ParScanThreadStateSet::print_taskqueue_stats(outputStream* const st) | |
390 { | |
391 print_taskqueue_stats_hdr(st); | |
392 | |
393 TaskQueueStats totals; | |
394 for (int i = 0; i < length(); ++i) { | |
395 const ParScanThreadState & pss = thread_state(i); | |
396 const TaskQueueStats & stats = pss.taskqueue_stats(); | |
397 st->print("%3d ", i); stats.print(st); st->cr(); | |
398 totals += stats; | |
399 | |
400 if (pss.overflow_refills() > 0) { | |
401 st->print_cr(" " SIZE_FORMAT_W(10) " overflow refills " | |
402 SIZE_FORMAT_W(10) " overflow objects", | |
403 pss.overflow_refills(), pss.overflow_refill_objs()); | |
404 } | |
405 } | |
406 st->print("tot "); totals.print(st); st->cr(); | |
407 | |
408 DEBUG_ONLY(totals.verify()); | |
409 } | |
410 #endif // TASKQUEUE_STATS | |
411 | |
326 void ParScanThreadStateSet::flush() | 412 void ParScanThreadStateSet::flush() |
327 { | 413 { |
328 // Work in this loop should be kept as lightweight as | 414 // Work in this loop should be kept as lightweight as |
329 // possible since this might otherwise become a bottleneck | 415 // possible since this might otherwise become a bottleneck |
330 // to scaling. Should we add heavy-weight work into this | 416 // to scaling. Should we add heavy-weight work into this |
344 _gen.age_table()->merge(local_table); | 430 _gen.age_table()->merge(local_table); |
345 | 431 |
346 // Inform old gen that we're done. | 432 // Inform old gen that we're done. |
347 _next_gen.par_promote_alloc_done(i); | 433 _next_gen.par_promote_alloc_done(i); |
348 _next_gen.par_oop_since_save_marks_iterate_done(i); | 434 _next_gen.par_oop_since_save_marks_iterate_done(i); |
349 | 435 } |
350 // Flush stats related to work queue activity (push/pop/steal) | 436 |
351 // This could conceivably become a bottleneck; if so, we'll put the | |
352 // stat's gathering under the flag. | |
353 if (PAR_STATS_ENABLED) { | |
354 _pushes += par_scan_state.pushes(); | |
355 _pops += par_scan_state.pops(); | |
356 _steals += par_scan_state.steals(); | |
357 if (ParallelGCVerbose) { | |
358 gclog_or_tty->print("Thread %d complete:\n" | |
359 " Pushes: %7d Pops: %7d Steals %7d (in %d attempts)\n", | |
360 i, par_scan_state.pushes(), par_scan_state.pops(), | |
361 par_scan_state.steals(), par_scan_state.steal_attempts()); | |
362 if (par_scan_state.overflow_pushes() > 0 || | |
363 par_scan_state.overflow_refills() > 0) { | |
364 gclog_or_tty->print(" Overflow pushes: %7d " | |
365 "Overflow refills: %7d for %d objs.\n", | |
366 par_scan_state.overflow_pushes(), | |
367 par_scan_state.overflow_refills(), | |
368 par_scan_state.overflow_refill_objs()); | |
369 } | |
370 | |
371 double elapsed = par_scan_state.elapsed(); | |
372 double strong_roots = par_scan_state.strong_roots_time(); | |
373 double term = par_scan_state.term_time(); | |
374 gclog_or_tty->print( | |
375 " Elapsed: %7.2f ms.\n" | |
376 " Strong roots: %7.2f ms (%6.2f%%)\n" | |
377 " Termination: %7.2f ms (%6.2f%%) (in %d entries)\n", | |
378 elapsed * 1000.0, | |
379 strong_roots * 1000.0, (strong_roots*100.0/elapsed), | |
380 term * 1000.0, (term*100.0/elapsed), | |
381 par_scan_state.term_attempts()); | |
382 } | |
383 } | |
384 } | |
385 if (UseConcMarkSweepGC && ParallelGCThreads > 0) { | 437 if (UseConcMarkSweepGC && ParallelGCThreads > 0) { |
386 // We need to call this even when ResizeOldPLAB is disabled | 438 // We need to call this even when ResizeOldPLAB is disabled |
387 // so as to avoid breaking some asserts. While we may be able | 439 // so as to avoid breaking some asserts. While we may be able |
388 // to avoid this by reorganizing the code a bit, I am loathe | 440 // to avoid this by reorganizing the code a bit, I am loathe |
389 // to do that unless we find cases where ergo leads to bad | 441 // to do that unless we find cases where ergo leads to bad |
454 par_scan_state()->trim_queues(0); | 506 par_scan_state()->trim_queues(0); |
455 | 507 |
456 // We have no local work, attempt to steal from other threads. | 508 // We have no local work, attempt to steal from other threads. |
457 | 509 |
458 // attempt to steal work from promoted. | 510 // attempt to steal work from promoted. |
459 par_scan_state()->note_steal_attempt(); | |
460 if (task_queues()->steal(par_scan_state()->thread_num(), | 511 if (task_queues()->steal(par_scan_state()->thread_num(), |
461 par_scan_state()->hash_seed(), | 512 par_scan_state()->hash_seed(), |
462 obj_to_scan)) { | 513 obj_to_scan)) { |
463 par_scan_state()->note_steal(); | |
464 bool res = work_q->push(obj_to_scan); | 514 bool res = work_q->push(obj_to_scan); |
465 assert(res, "Empty queue should have room for a push."); | 515 assert(res, "Empty queue should have room for a push."); |
466 | 516 |
467 par_scan_state()->note_push(); | |
468 // if successful, goto Start. | 517 // if successful, goto Start. |
469 continue; | 518 continue; |
470 | 519 |
471 // try global overflow list. | 520 // try global overflow list. |
472 } else if (par_gen()->take_from_overflow_list(par_scan_state())) { | 521 } else if (par_gen()->take_from_overflow_list(par_scan_state())) { |
839 } else { | 888 } else { |
840 GenCollectedHeap::StrongRootsScope srs(gch); | 889 GenCollectedHeap::StrongRootsScope srs(gch); |
841 tsk.work(0); | 890 tsk.work(0); |
842 } | 891 } |
843 thread_state_set.reset(promotion_failed()); | 892 thread_state_set.reset(promotion_failed()); |
844 | |
845 if (PAR_STATS_ENABLED && ParallelGCVerbose) { | |
846 gclog_or_tty->print("Thread totals:\n" | |
847 " Pushes: %7d Pops: %7d Steals %7d (sum = %7d).\n", | |
848 thread_state_set.pushes(), thread_state_set.pops(), | |
849 thread_state_set.steals(), | |
850 thread_state_set.pops()+thread_state_set.steals()); | |
851 } | |
852 assert(thread_state_set.pushes() == thread_state_set.pops() | |
853 + thread_state_set.steals(), | |
854 "Or else the queues are leaky."); | |
855 | 893 |
856 // Process (weak) reference objects found during scavenge. | 894 // Process (weak) reference objects found during scavenge. |
857 ReferenceProcessor* rp = ref_processor(); | 895 ReferenceProcessor* rp = ref_processor(); |
858 IsAliveClosure is_alive(this); | 896 IsAliveClosure is_alive(this); |
859 ScanWeakRefClosure scan_weak_ref(this); | 897 ScanWeakRefClosure scan_weak_ref(this); |
930 | 968 |
931 if (PrintGC && !PrintGCDetails) { | 969 if (PrintGC && !PrintGCDetails) { |
932 gch->print_heap_change(gch_prev_used); | 970 gch->print_heap_change(gch_prev_used); |
933 } | 971 } |
934 | 972 |
973 TASKQUEUE_STATS_ONLY(thread_state_set.print_termination_stats()); | |
974 TASKQUEUE_STATS_ONLY(thread_state_set.print_taskqueue_stats()); | |
975 | |
935 if (UseAdaptiveSizePolicy) { | 976 if (UseAdaptiveSizePolicy) { |
936 size_policy->minor_collection_end(gch->gc_cause()); | 977 size_policy->minor_collection_end(gch->gc_cause()); |
937 size_policy->avg_survived()->sample(from()->used()); | 978 size_policy->avg_survived()->sample(from()->used()); |
938 } | 979 } |
939 | 980 |
1102 // Add stats for overflow pushes. | 1143 // Add stats for overflow pushes. |
1103 if (Verbose && PrintGCDetails) { | 1144 if (Verbose && PrintGCDetails) { |
1104 gclog_or_tty->print("queue overflow!\n"); | 1145 gclog_or_tty->print("queue overflow!\n"); |
1105 } | 1146 } |
1106 push_on_overflow_list(old, par_scan_state); | 1147 push_on_overflow_list(old, par_scan_state); |
1107 par_scan_state->note_overflow_push(); | 1148 TASKQUEUE_STATS_ONLY(par_scan_state->taskqueue_stats().record_overflow(0)); |
1108 } | 1149 } |
1109 par_scan_state->note_push(); | |
1110 | 1150 |
1111 return new_obj; | 1151 return new_obj; |
1112 } | 1152 } |
1113 | 1153 |
1114 // Oops. Someone beat us to it. Undo the allocation. Where did we | 1154 // Oops. Someone beat us to it. Undo the allocation. Where did we |
1225 } | 1265 } |
1226 ) | 1266 ) |
1227 if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) { | 1267 if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) { |
1228 // Add stats for overflow pushes. | 1268 // Add stats for overflow pushes. |
1229 push_on_overflow_list(old, par_scan_state); | 1269 push_on_overflow_list(old, par_scan_state); |
1230 par_scan_state->note_overflow_push(); | 1270 TASKQUEUE_STATS_ONLY(par_scan_state->taskqueue_stats().record_overflow(0)); |
1231 } | 1271 } |
1232 par_scan_state->note_push(); | |
1233 | 1272 |
1234 return new_obj; | 1273 return new_obj; |
1235 } | 1274 } |
1236 | 1275 |
1237 // Oops. Someone beat us to it. Undo the allocation. Where did we | 1276 // Oops. Someone beat us to it. Undo the allocation. Where did we |
1464 bool ok = work_q->push(obj_to_push); | 1503 bool ok = work_q->push(obj_to_push); |
1465 assert(ok, "Should have succeeded"); | 1504 assert(ok, "Should have succeeded"); |
1466 cur = next; | 1505 cur = next; |
1467 n++; | 1506 n++; |
1468 } | 1507 } |
1469 par_scan_state->note_overflow_refill(n); | 1508 TASKQUEUE_STATS_ONLY(par_scan_state->note_overflow_refill(n)); |
1470 #ifndef PRODUCT | 1509 #ifndef PRODUCT |
1471 assert(_num_par_pushes >= n, "Too many pops?"); | 1510 assert(_num_par_pushes >= n, "Too many pops?"); |
1472 Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes); | 1511 Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes); |
1473 #endif | 1512 #endif |
1474 return true; | 1513 return true; |