comparison src/share/vm/gc_implementation/parNew/parNewGeneration.cpp @ 1710:94251661de76

6970376: ParNew: shared TaskQueue statistics Reviewed-by: ysr
author jcoomes
date Mon, 09 Aug 2010 18:03:50 -0700
parents a93a9eda13f7
children 2d6b74c9a797
comparison
equal deleted inserted replaced
1709:5f429ee79634 1710:94251661de76
49 &_to_space_root_closure, gen_, &_old_gen_root_closure, 49 &_to_space_root_closure, gen_, &_old_gen_root_closure,
50 work_queue_set_, &term_), 50 work_queue_set_, &term_),
51 _is_alive_closure(gen_), _scan_weak_ref_closure(gen_, this), 51 _is_alive_closure(gen_), _scan_weak_ref_closure(gen_, this),
52 _keep_alive_closure(&_scan_weak_ref_closure), 52 _keep_alive_closure(&_scan_weak_ref_closure),
53 _promotion_failure_size(0), 53 _promotion_failure_size(0),
54 _pushes(0), _pops(0), _steals(0), _steal_attempts(0), _term_attempts(0),
55 _strong_roots_time(0.0), _term_time(0.0) 54 _strong_roots_time(0.0), _term_time(0.0)
56 { 55 {
56 #if TASKQUEUE_STATS
57 _term_attempts = 0;
58 _overflow_refills = 0;
59 _overflow_refill_objs = 0;
60 #endif // TASKQUEUE_STATS
61
57 _survivor_chunk_array = 62 _survivor_chunk_array =
58 (ChunkArray*) old_gen()->get_data_recorder(thread_num()); 63 (ChunkArray*) old_gen()->get_data_recorder(thread_num());
59 _hash_seed = 17; // Might want to take time-based random value. 64 _hash_seed = 17; // Might want to take time-based random value.
60 _start = os::elapsedTime(); 65 _start = os::elapsedTime();
61 _old_gen_closure.set_generation(old_gen_); 66 _old_gen_closure.set_generation(old_gen_);
98 end = start + ParGCArrayScanChunk; 103 end = start + ParGCArrayScanChunk;
99 arrayOop(old)->set_length(end); 104 arrayOop(old)->set_length(end);
100 // Push remainder. 105 // Push remainder.
101 bool ok = work_queue()->push(old); 106 bool ok = work_queue()->push(old);
102 assert(ok, "just popped, push must be okay"); 107 assert(ok, "just popped, push must be okay");
103 note_push();
104 } else { 108 } else {
105 // Restore length so that it can be used if there 109 // Restore length so that it can be used if there
106 // is a promotion failure and forwarding pointers 110 // is a promotion failure and forwarding pointers
107 // must be removed. 111 // must be removed.
108 arrayOop(old)->set_length(end); 112 arrayOop(old)->set_length(end);
124 ObjToScanQueue* queue = work_queue(); 128 ObjToScanQueue* queue = work_queue();
125 do { 129 do {
126 while (queue->size() > (juint)max_size) { 130 while (queue->size() > (juint)max_size) {
127 oop obj_to_scan; 131 oop obj_to_scan;
128 if (queue->pop_local(obj_to_scan)) { 132 if (queue->pop_local(obj_to_scan)) {
129 note_pop();
130 if ((HeapWord *)obj_to_scan < young_old_boundary()) { 133 if ((HeapWord *)obj_to_scan < young_old_boundary()) {
131 if (obj_to_scan->is_objArray() && 134 if (obj_to_scan->is_objArray() &&
132 obj_to_scan->is_forwarded() && 135 obj_to_scan->is_forwarded() &&
133 obj_to_scan->forwardee() != obj_to_scan) { 136 obj_to_scan->forwardee() != obj_to_scan) {
134 scan_partial_array_and_push_remainder(obj_to_scan); 137 scan_partial_array_and_push_remainder(obj_to_scan);
269 Generation& old_gen, 272 Generation& old_gen,
270 ObjToScanQueueSet& queue_set, 273 ObjToScanQueueSet& queue_set,
271 GrowableArray<oop>** overflow_stacks_, 274 GrowableArray<oop>** overflow_stacks_,
272 size_t desired_plab_sz, 275 size_t desired_plab_sz,
273 ParallelTaskTerminator& term); 276 ParallelTaskTerminator& term);
277
278 ~ParScanThreadStateSet() { TASKQUEUE_STATS_ONLY(reset_stats()); }
279
274 inline ParScanThreadState& thread_state(int i); 280 inline ParScanThreadState& thread_state(int i);
275 int pushes() { return _pushes; } 281
276 int pops() { return _pops; }
277 int steals() { return _steals; }
278 void reset(bool promotion_failed); 282 void reset(bool promotion_failed);
279 void flush(); 283 void flush();
284
285 #if TASKQUEUE_STATS
286 static void
287 print_termination_stats_hdr(outputStream* const st = gclog_or_tty);
288 void print_termination_stats(outputStream* const st = gclog_or_tty);
289 static void
290 print_taskqueue_stats_hdr(outputStream* const st = gclog_or_tty);
291 void print_taskqueue_stats(outputStream* const st = gclog_or_tty);
292 void reset_stats();
293 #endif // TASKQUEUE_STATS
294
280 private: 295 private:
281 ParallelTaskTerminator& _term; 296 ParallelTaskTerminator& _term;
282 ParNewGeneration& _gen; 297 ParNewGeneration& _gen;
283 Generation& _next_gen; 298 Generation& _next_gen;
284 // staticstics
285 int _pushes;
286 int _pops;
287 int _steals;
288 }; 299 };
289 300
290 301
291 ParScanThreadStateSet::ParScanThreadStateSet( 302 ParScanThreadStateSet::ParScanThreadStateSet(
292 int num_threads, Space& to_space, ParNewGeneration& gen, 303 int num_threads, Space& to_space, ParNewGeneration& gen,
293 Generation& old_gen, ObjToScanQueueSet& queue_set, 304 Generation& old_gen, ObjToScanQueueSet& queue_set,
294 GrowableArray<oop>** overflow_stack_set_, 305 GrowableArray<oop>** overflow_stack_set_,
295 size_t desired_plab_sz, ParallelTaskTerminator& term) 306 size_t desired_plab_sz, ParallelTaskTerminator& term)
296 : ResourceArray(sizeof(ParScanThreadState), num_threads), 307 : ResourceArray(sizeof(ParScanThreadState), num_threads),
297 _gen(gen), _next_gen(old_gen), _term(term), 308 _gen(gen), _next_gen(old_gen), _term(term)
298 _pushes(0), _pops(0), _steals(0)
299 { 309 {
300 assert(num_threads > 0, "sanity check!"); 310 assert(num_threads > 0, "sanity check!");
301 // Initialize states. 311 // Initialize states.
302 for (int i = 0; i < num_threads; ++i) { 312 for (int i = 0; i < num_threads; ++i) {
303 new ((ParScanThreadState*)_data + i) 313 new ((ParScanThreadState*)_data + i)
321 thread_state(i).print_and_clear_promotion_failure_size(); 331 thread_state(i).print_and_clear_promotion_failure_size();
322 } 332 }
323 } 333 }
324 } 334 }
325 335
336 #if TASKQUEUE_STATS
337 void
338 ParScanThreadState::reset_stats()
339 {
340 taskqueue_stats().reset();
341 _term_attempts = 0;
342 _overflow_refills = 0;
343 _overflow_refill_objs = 0;
344 }
345
346 void ParScanThreadStateSet::reset_stats()
347 {
348 for (int i = 0; i < length(); ++i) {
349 thread_state(i).reset_stats();
350 }
351 }
352
353 void
354 ParScanThreadStateSet::print_termination_stats_hdr(outputStream* const st)
355 {
356 st->print_raw_cr("GC Termination Stats");
357 st->print_raw_cr(" elapsed --strong roots-- "
358 "-------termination-------");
359 st->print_raw_cr("thr ms ms % "
360 " ms % attempts");
361 st->print_raw_cr("--- --------- --------- ------ "
362 "--------- ------ --------");
363 }
364
365 void ParScanThreadStateSet::print_termination_stats(outputStream* const st)
366 {
367 print_termination_stats_hdr(st);
368
369 for (int i = 0; i < length(); ++i) {
370 const ParScanThreadState & pss = thread_state(i);
371 const double elapsed_ms = pss.elapsed_time() * 1000.0;
372 const double s_roots_ms = pss.strong_roots_time() * 1000.0;
373 const double term_ms = pss.term_time() * 1000.0;
374 st->print_cr("%3d %9.2f %9.2f %6.2f "
375 "%9.2f %6.2f " SIZE_FORMAT_W(8),
376 i, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms,
377 term_ms, term_ms * 100 / elapsed_ms, pss.term_attempts());
378 }
379 }
380
381 // Print stats related to work queue activity.
382 void ParScanThreadStateSet::print_taskqueue_stats_hdr(outputStream* const st)
383 {
384 st->print_raw_cr("GC Task Stats");
385 st->print_raw("thr "); TaskQueueStats::print_header(1, st); st->cr();
386 st->print_raw("--- "); TaskQueueStats::print_header(2, st); st->cr();
387 }
388
389 void ParScanThreadStateSet::print_taskqueue_stats(outputStream* const st)
390 {
391 print_taskqueue_stats_hdr(st);
392
393 TaskQueueStats totals;
394 for (int i = 0; i < length(); ++i) {
395 const ParScanThreadState & pss = thread_state(i);
396 const TaskQueueStats & stats = pss.taskqueue_stats();
397 st->print("%3d ", i); stats.print(st); st->cr();
398 totals += stats;
399
400 if (pss.overflow_refills() > 0) {
401 st->print_cr(" " SIZE_FORMAT_W(10) " overflow refills "
402 SIZE_FORMAT_W(10) " overflow objects",
403 pss.overflow_refills(), pss.overflow_refill_objs());
404 }
405 }
406 st->print("tot "); totals.print(st); st->cr();
407
408 DEBUG_ONLY(totals.verify());
409 }
410 #endif // TASKQUEUE_STATS
411
326 void ParScanThreadStateSet::flush() 412 void ParScanThreadStateSet::flush()
327 { 413 {
328 // Work in this loop should be kept as lightweight as 414 // Work in this loop should be kept as lightweight as
329 // possible since this might otherwise become a bottleneck 415 // possible since this might otherwise become a bottleneck
330 // to scaling. Should we add heavy-weight work into this 416 // to scaling. Should we add heavy-weight work into this
344 _gen.age_table()->merge(local_table); 430 _gen.age_table()->merge(local_table);
345 431
346 // Inform old gen that we're done. 432 // Inform old gen that we're done.
347 _next_gen.par_promote_alloc_done(i); 433 _next_gen.par_promote_alloc_done(i);
348 _next_gen.par_oop_since_save_marks_iterate_done(i); 434 _next_gen.par_oop_since_save_marks_iterate_done(i);
349 435 }
350 // Flush stats related to work queue activity (push/pop/steal) 436
351 // This could conceivably become a bottleneck; if so, we'll put the
352 // stat's gathering under the flag.
353 if (PAR_STATS_ENABLED) {
354 _pushes += par_scan_state.pushes();
355 _pops += par_scan_state.pops();
356 _steals += par_scan_state.steals();
357 if (ParallelGCVerbose) {
358 gclog_or_tty->print("Thread %d complete:\n"
359 " Pushes: %7d Pops: %7d Steals %7d (in %d attempts)\n",
360 i, par_scan_state.pushes(), par_scan_state.pops(),
361 par_scan_state.steals(), par_scan_state.steal_attempts());
362 if (par_scan_state.overflow_pushes() > 0 ||
363 par_scan_state.overflow_refills() > 0) {
364 gclog_or_tty->print(" Overflow pushes: %7d "
365 "Overflow refills: %7d for %d objs.\n",
366 par_scan_state.overflow_pushes(),
367 par_scan_state.overflow_refills(),
368 par_scan_state.overflow_refill_objs());
369 }
370
371 double elapsed = par_scan_state.elapsed();
372 double strong_roots = par_scan_state.strong_roots_time();
373 double term = par_scan_state.term_time();
374 gclog_or_tty->print(
375 " Elapsed: %7.2f ms.\n"
376 " Strong roots: %7.2f ms (%6.2f%%)\n"
377 " Termination: %7.2f ms (%6.2f%%) (in %d entries)\n",
378 elapsed * 1000.0,
379 strong_roots * 1000.0, (strong_roots*100.0/elapsed),
380 term * 1000.0, (term*100.0/elapsed),
381 par_scan_state.term_attempts());
382 }
383 }
384 }
385 if (UseConcMarkSweepGC && ParallelGCThreads > 0) { 437 if (UseConcMarkSweepGC && ParallelGCThreads > 0) {
386 // We need to call this even when ResizeOldPLAB is disabled 438 // We need to call this even when ResizeOldPLAB is disabled
387 // so as to avoid breaking some asserts. While we may be able 439 // so as to avoid breaking some asserts. While we may be able
388 // to avoid this by reorganizing the code a bit, I am loathe 440 // to avoid this by reorganizing the code a bit, I am loathe
389 // to do that unless we find cases where ergo leads to bad 441 // to do that unless we find cases where ergo leads to bad
454 par_scan_state()->trim_queues(0); 506 par_scan_state()->trim_queues(0);
455 507
456 // We have no local work, attempt to steal from other threads. 508 // We have no local work, attempt to steal from other threads.
457 509
458 // attempt to steal work from promoted. 510 // attempt to steal work from promoted.
459 par_scan_state()->note_steal_attempt();
460 if (task_queues()->steal(par_scan_state()->thread_num(), 511 if (task_queues()->steal(par_scan_state()->thread_num(),
461 par_scan_state()->hash_seed(), 512 par_scan_state()->hash_seed(),
462 obj_to_scan)) { 513 obj_to_scan)) {
463 par_scan_state()->note_steal();
464 bool res = work_q->push(obj_to_scan); 514 bool res = work_q->push(obj_to_scan);
465 assert(res, "Empty queue should have room for a push."); 515 assert(res, "Empty queue should have room for a push.");
466 516
467 par_scan_state()->note_push();
468 // if successful, goto Start. 517 // if successful, goto Start.
469 continue; 518 continue;
470 519
471 // try global overflow list. 520 // try global overflow list.
472 } else if (par_gen()->take_from_overflow_list(par_scan_state())) { 521 } else if (par_gen()->take_from_overflow_list(par_scan_state())) {
839 } else { 888 } else {
840 GenCollectedHeap::StrongRootsScope srs(gch); 889 GenCollectedHeap::StrongRootsScope srs(gch);
841 tsk.work(0); 890 tsk.work(0);
842 } 891 }
843 thread_state_set.reset(promotion_failed()); 892 thread_state_set.reset(promotion_failed());
844
845 if (PAR_STATS_ENABLED && ParallelGCVerbose) {
846 gclog_or_tty->print("Thread totals:\n"
847 " Pushes: %7d Pops: %7d Steals %7d (sum = %7d).\n",
848 thread_state_set.pushes(), thread_state_set.pops(),
849 thread_state_set.steals(),
850 thread_state_set.pops()+thread_state_set.steals());
851 }
852 assert(thread_state_set.pushes() == thread_state_set.pops()
853 + thread_state_set.steals(),
854 "Or else the queues are leaky.");
855 893
856 // Process (weak) reference objects found during scavenge. 894 // Process (weak) reference objects found during scavenge.
857 ReferenceProcessor* rp = ref_processor(); 895 ReferenceProcessor* rp = ref_processor();
858 IsAliveClosure is_alive(this); 896 IsAliveClosure is_alive(this);
859 ScanWeakRefClosure scan_weak_ref(this); 897 ScanWeakRefClosure scan_weak_ref(this);
930 968
931 if (PrintGC && !PrintGCDetails) { 969 if (PrintGC && !PrintGCDetails) {
932 gch->print_heap_change(gch_prev_used); 970 gch->print_heap_change(gch_prev_used);
933 } 971 }
934 972
973 TASKQUEUE_STATS_ONLY(thread_state_set.print_termination_stats());
974 TASKQUEUE_STATS_ONLY(thread_state_set.print_taskqueue_stats());
975
935 if (UseAdaptiveSizePolicy) { 976 if (UseAdaptiveSizePolicy) {
936 size_policy->minor_collection_end(gch->gc_cause()); 977 size_policy->minor_collection_end(gch->gc_cause());
937 size_policy->avg_survived()->sample(from()->used()); 978 size_policy->avg_survived()->sample(from()->used());
938 } 979 }
939 980
1102 // Add stats for overflow pushes. 1143 // Add stats for overflow pushes.
1103 if (Verbose && PrintGCDetails) { 1144 if (Verbose && PrintGCDetails) {
1104 gclog_or_tty->print("queue overflow!\n"); 1145 gclog_or_tty->print("queue overflow!\n");
1105 } 1146 }
1106 push_on_overflow_list(old, par_scan_state); 1147 push_on_overflow_list(old, par_scan_state);
1107 par_scan_state->note_overflow_push(); 1148 TASKQUEUE_STATS_ONLY(par_scan_state->taskqueue_stats().record_overflow(0));
1108 } 1149 }
1109 par_scan_state->note_push();
1110 1150
1111 return new_obj; 1151 return new_obj;
1112 } 1152 }
1113 1153
1114 // Oops. Someone beat us to it. Undo the allocation. Where did we 1154 // Oops. Someone beat us to it. Undo the allocation. Where did we
1225 } 1265 }
1226 ) 1266 )
1227 if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) { 1267 if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) {
1228 // Add stats for overflow pushes. 1268 // Add stats for overflow pushes.
1229 push_on_overflow_list(old, par_scan_state); 1269 push_on_overflow_list(old, par_scan_state);
1230 par_scan_state->note_overflow_push(); 1270 TASKQUEUE_STATS_ONLY(par_scan_state->taskqueue_stats().record_overflow(0));
1231 } 1271 }
1232 par_scan_state->note_push();
1233 1272
1234 return new_obj; 1273 return new_obj;
1235 } 1274 }
1236 1275
1237 // Oops. Someone beat us to it. Undo the allocation. Where did we 1276 // Oops. Someone beat us to it. Undo the allocation. Where did we
1464 bool ok = work_q->push(obj_to_push); 1503 bool ok = work_q->push(obj_to_push);
1465 assert(ok, "Should have succeeded"); 1504 assert(ok, "Should have succeeded");
1466 cur = next; 1505 cur = next;
1467 n++; 1506 n++;
1468 } 1507 }
1469 par_scan_state->note_overflow_refill(n); 1508 TASKQUEUE_STATS_ONLY(par_scan_state->note_overflow_refill(n));
1470 #ifndef PRODUCT 1509 #ifndef PRODUCT
1471 assert(_num_par_pushes >= n, "Too many pops?"); 1510 assert(_num_par_pushes >= n, "Too many pops?");
1472 Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes); 1511 Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);
1473 #endif 1512 #endif
1474 return true; 1513 return true;