comparison src/share/vm/gc_implementation/parNew/parNewGeneration.cpp @ 1145:e018e6884bd8

6631166: CMS: better heuristics when combatting fragmentation Summary: Autonomic per-worker free block cache sizing, tunable coalition policies, fixes to per-size block statistics, retuned gain and bandwidth of some feedback loop filters to allow quicker reactivity to abrupt changes in ambient demand, and other heuristics to reduce fragmentation of the CMS old gen. Also tightened some assertions, including those related to locking. Reviewed-by: jmasa
author ysr
date Wed, 23 Dec 2009 09:23:54 -0800
parents 148e5441d916
children 0bfd3fb24150
comparison
equal deleted inserted replaced
1111:44f61c24ddab 1145:e018e6884bd8
48 _evacuate_followers(this, &_to_space_closure, &_old_gen_closure, 48 _evacuate_followers(this, &_to_space_closure, &_old_gen_closure,
49 &_to_space_root_closure, gen_, &_old_gen_root_closure, 49 &_to_space_root_closure, gen_, &_old_gen_root_closure,
50 work_queue_set_, &term_), 50 work_queue_set_, &term_),
51 _is_alive_closure(gen_), _scan_weak_ref_closure(gen_, this), 51 _is_alive_closure(gen_), _scan_weak_ref_closure(gen_, this),
52 _keep_alive_closure(&_scan_weak_ref_closure), 52 _keep_alive_closure(&_scan_weak_ref_closure),
53 _promotion_failure_size(0),
53 _pushes(0), _pops(0), _steals(0), _steal_attempts(0), _term_attempts(0), 54 _pushes(0), _pops(0), _steals(0), _steal_attempts(0), _term_attempts(0),
54 _strong_roots_time(0.0), _term_time(0.0) 55 _strong_roots_time(0.0), _term_time(0.0)
55 { 56 {
56 _survivor_chunk_array = 57 _survivor_chunk_array =
57 (ChunkArray*) old_gen()->get_data_recorder(thread_num()); 58 (ChunkArray*) old_gen()->get_data_recorder(thread_num());
247 } else { 248 } else {
248 CollectedHeap::fill_with_object(obj, word_sz); 249 CollectedHeap::fill_with_object(obj, word_sz);
249 } 250 }
250 } 251 }
251 252
253 void ParScanThreadState::print_and_clear_promotion_failure_size() {
254 if (_promotion_failure_size != 0) {
255 if (PrintPromotionFailure) {
256 gclog_or_tty->print(" (%d: promotion failure size = " SIZE_FORMAT ") ",
257 _thread_num, _promotion_failure_size);
258 }
259 _promotion_failure_size = 0;
260 }
261 }
262
252 class ParScanThreadStateSet: private ResourceArray { 263 class ParScanThreadStateSet: private ResourceArray {
253 public: 264 public:
254 // Initializes states for the specified number of threads; 265 // Initializes states for the specified number of threads;
255 ParScanThreadStateSet(int num_threads, 266 ParScanThreadStateSet(int num_threads,
256 Space& to_space, 267 Space& to_space,
258 Generation& old_gen, 269 Generation& old_gen,
259 ObjToScanQueueSet& queue_set, 270 ObjToScanQueueSet& queue_set,
260 GrowableArray<oop>** overflow_stacks_, 271 GrowableArray<oop>** overflow_stacks_,
261 size_t desired_plab_sz, 272 size_t desired_plab_sz,
262 ParallelTaskTerminator& term); 273 ParallelTaskTerminator& term);
263 inline ParScanThreadState& thread_sate(int i); 274 inline ParScanThreadState& thread_state(int i);
264 int pushes() { return _pushes; } 275 int pushes() { return _pushes; }
265 int pops() { return _pops; } 276 int pops() { return _pops; }
266 int steals() { return _steals; } 277 int steals() { return _steals; }
267 void reset(); 278 void reset(bool promotion_failed);
268 void flush(); 279 void flush();
269 private: 280 private:
270 ParallelTaskTerminator& _term; 281 ParallelTaskTerminator& _term;
271 ParNewGeneration& _gen; 282 ParNewGeneration& _gen;
272 Generation& _next_gen; 283 Generation& _next_gen;
293 ParScanThreadState(&to_space, &gen, &old_gen, i, &queue_set, 304 ParScanThreadState(&to_space, &gen, &old_gen, i, &queue_set,
294 overflow_stack_set_, desired_plab_sz, term); 305 overflow_stack_set_, desired_plab_sz, term);
295 } 306 }
296 } 307 }
297 308
298 inline ParScanThreadState& ParScanThreadStateSet::thread_sate(int i) 309 inline ParScanThreadState& ParScanThreadStateSet::thread_state(int i)
299 { 310 {
300 assert(i >= 0 && i < length(), "sanity check!"); 311 assert(i >= 0 && i < length(), "sanity check!");
301 return ((ParScanThreadState*)_data)[i]; 312 return ((ParScanThreadState*)_data)[i];
302 } 313 }
303 314
304 315
305 void ParScanThreadStateSet::reset() 316 void ParScanThreadStateSet::reset(bool promotion_failed)
306 { 317 {
307 _term.reset_for_reuse(); 318 _term.reset_for_reuse();
319 if (promotion_failed) {
320 for (int i = 0; i < length(); ++i) {
321 thread_state(i).print_and_clear_promotion_failure_size();
322 }
323 }
308 } 324 }
309 325
310 void ParScanThreadStateSet::flush() 326 void ParScanThreadStateSet::flush()
311 { 327 {
328 // Work in this loop should be kept as lightweight as
329 // possible since this might otherwise become a bottleneck
330 // to scaling. Should we add heavy-weight work into this
331 // loop, consider parallelizing the loop into the worker threads.
312 for (int i = 0; i < length(); ++i) { 332 for (int i = 0; i < length(); ++i) {
313 ParScanThreadState& par_scan_state = thread_sate(i); 333 ParScanThreadState& par_scan_state = thread_state(i);
314 334
315 // Flush stats related to To-space PLAB activity and 335 // Flush stats related to To-space PLAB activity and
316 // retire the last buffer. 336 // retire the last buffer.
317 par_scan_state.to_space_alloc_buffer()-> 337 par_scan_state.to_space_alloc_buffer()->
318 flush_stats_and_retire(_gen.plab_stats(), 338 flush_stats_and_retire(_gen.plab_stats(),
360 term * 1000.0, (term*100.0/elapsed), 380 term * 1000.0, (term*100.0/elapsed),
361 par_scan_state.term_attempts()); 381 par_scan_state.term_attempts());
362 } 382 }
363 } 383 }
364 } 384 }
385 if (UseConcMarkSweepGC && ParallelGCThreads > 0) {
386 // We need to call this even when ResizeOldPLAB is disabled
387 // so as to avoid breaking some asserts. While we may be able
388 // to avoid this by reorganizing the code a bit, I am loathe
389 // to do that unless we find cases where ergo leads to bad
390 // performance.
391 CFLS_LAB::compute_desired_plab_size();
392 }
365 } 393 }
366 394
367 ParScanClosure::ParScanClosure(ParNewGeneration* g, 395 ParScanClosure::ParScanClosure(ParNewGeneration* g,
368 ParScanThreadState* par_scan_state) : 396 ParScanThreadState* par_scan_state) :
369 OopsInGenClosure(g), _par_scan_state(par_scan_state), _g(g) 397 OopsInGenClosure(g), _par_scan_state(par_scan_state), _g(g)
473 // We would need multiple old-gen queues otherwise. 501 // We would need multiple old-gen queues otherwise.
474 assert(gch->n_gens() == 2, "Par young collection currently only works with one older gen."); 502 assert(gch->n_gens() == 2, "Par young collection currently only works with one older gen.");
475 503
476 Generation* old_gen = gch->next_gen(_gen); 504 Generation* old_gen = gch->next_gen(_gen);
477 505
478 ParScanThreadState& par_scan_state = _state_set->thread_sate(i); 506 ParScanThreadState& par_scan_state = _state_set->thread_state(i);
479 par_scan_state.set_young_old_boundary(_young_old_boundary); 507 par_scan_state.set_young_old_boundary(_young_old_boundary);
480 508
481 par_scan_state.start_strong_roots(); 509 par_scan_state.start_strong_roots();
482 gch->gen_process_strong_roots(_gen->level(), 510 gch->gen_process_strong_roots(_gen->level(),
483 true, // Process younger gens, if any, 511 true, // Process younger gens, if any,
657 685
658 void ParNewRefProcTaskProxy::work(int i) 686 void ParNewRefProcTaskProxy::work(int i)
659 { 687 {
660 ResourceMark rm; 688 ResourceMark rm;
661 HandleMark hm; 689 HandleMark hm;
662 ParScanThreadState& par_scan_state = _state_set.thread_sate(i); 690 ParScanThreadState& par_scan_state = _state_set.thread_state(i);
663 par_scan_state.set_young_old_boundary(_young_old_boundary); 691 par_scan_state.set_young_old_boundary(_young_old_boundary);
664 _task.work(i, par_scan_state.is_alive_closure(), 692 _task.work(i, par_scan_state.is_alive_closure(),
665 par_scan_state.keep_alive_closure(), 693 par_scan_state.keep_alive_closure(),
666 par_scan_state.evacuate_followers_closure()); 694 par_scan_state.evacuate_followers_closure());
667 } 695 }
691 WorkGang* workers = gch->workers(); 719 WorkGang* workers = gch->workers();
692 assert(workers != NULL, "Need parallel worker threads."); 720 assert(workers != NULL, "Need parallel worker threads.");
693 ParNewRefProcTaskProxy rp_task(task, _generation, *_generation.next_gen(), 721 ParNewRefProcTaskProxy rp_task(task, _generation, *_generation.next_gen(),
694 _generation.reserved().end(), _state_set); 722 _generation.reserved().end(), _state_set);
695 workers->run_task(&rp_task); 723 workers->run_task(&rp_task);
696 _state_set.reset(); 724 _state_set.reset(_generation.promotion_failed());
697 } 725 }
698 726
699 void ParNewRefProcTaskExecutor::execute(EnqueueTask& task) 727 void ParNewRefProcTaskExecutor::execute(EnqueueTask& task)
700 { 728 {
701 GenCollectedHeap* gch = GenCollectedHeap::heap(); 729 GenCollectedHeap* gch = GenCollectedHeap::heap();
811 workers->run_task(&tsk); 839 workers->run_task(&tsk);
812 } else { 840 } else {
813 GenCollectedHeap::StrongRootsScope srs(gch); 841 GenCollectedHeap::StrongRootsScope srs(gch);
814 tsk.work(0); 842 tsk.work(0);
815 } 843 }
816 thread_state_set.reset(); 844 thread_state_set.reset(promotion_failed());
817 845
818 if (PAR_STATS_ENABLED && ParallelGCVerbose) { 846 if (PAR_STATS_ENABLED && ParallelGCVerbose) {
819 gclog_or_tty->print("Thread totals:\n" 847 gclog_or_tty->print("Thread totals:\n"
820 " Pushes: %7d Pops: %7d Steals %7d (sum = %7d).\n", 848 " Pushes: %7d Pops: %7d Steals %7d (sum = %7d).\n",
821 thread_state_set.pushes(), thread_state_set.pops(), 849 thread_state_set.pushes(), thread_state_set.pops(),
880 } 908 }
881 // All the spaces are in play for mark-sweep. 909 // All the spaces are in play for mark-sweep.
882 swap_spaces(); // Make life simpler for CMS || rescan; see 6483690. 910 swap_spaces(); // Make life simpler for CMS || rescan; see 6483690.
883 from()->set_next_compaction_space(to()); 911 from()->set_next_compaction_space(to());
884 gch->set_incremental_collection_will_fail(); 912 gch->set_incremental_collection_will_fail();
913 // Inform the next generation that a promotion failure occurred.
914 _next_gen->promotion_failure_occurred();
885 915
886 // Reset the PromotionFailureALot counters. 916 // Reset the PromotionFailureALot counters.
887 NOT_PRODUCT(Universe::heap()->reset_promotion_should_fail();) 917 NOT_PRODUCT(Universe::heap()->reset_promotion_should_fail();)
888 } 918 }
889 // set new iteration safe limit for the survivor spaces 919 // set new iteration safe limit for the survivor spaces
1027 // promotion failed, forward to self 1057 // promotion failed, forward to self
1028 _promotion_failed = true; 1058 _promotion_failed = true;
1029 new_obj = old; 1059 new_obj = old;
1030 1060
1031 preserve_mark_if_necessary(old, m); 1061 preserve_mark_if_necessary(old, m);
1062 // Log the size of the maiden promotion failure
1063 par_scan_state->log_promotion_failure(sz);
1032 } 1064 }
1033 1065
1034 old->forward_to(new_obj); 1066 old->forward_to(new_obj);
1035 forward_ptr = NULL; 1067 forward_ptr = NULL;
1036 } else { 1068 } else {
1148 1180
1149 _promotion_failed = true; 1181 _promotion_failed = true;
1150 failed_to_promote = true; 1182 failed_to_promote = true;
1151 1183
1152 preserve_mark_if_necessary(old, m); 1184 preserve_mark_if_necessary(old, m);
1185 // Log the size of the maiden promotion failure
1186 par_scan_state->log_promotion_failure(sz);
1153 } 1187 }
1154 } else { 1188 } else {
1155 // Is in to-space; do copying ourselves. 1189 // Is in to-space; do copying ourselves.
1156 Copy::aligned_disjoint_words((HeapWord*)old, (HeapWord*)new_obj, sz); 1190 Copy::aligned_disjoint_words((HeapWord*)old, (HeapWord*)new_obj, sz);
1157 // Restore the mark word copied above. 1191 // Restore the mark word copied above.