comparison src/share/vm/memory/sharedHeap.hpp @ 4095:bca17e38de00

6593758: RFE: Enhance GC ergonomics to dynamically choose ParallelGCThreads Summary: Select number of GC threads dynamically based on heap usage and number of Java threads Reviewed-by: johnc, ysr, jcoomes
author jmasa
date Tue, 09 Aug 2011 10:16:01 -0700
parents 1f4413413144
children 441e946dc1af
comparison
equal deleted inserted replaced
4094:3a298e04d914 4095:bca17e38de00
47 class WorkGang; 47 class WorkGang;
48 class FlexibleWorkGang; 48 class FlexibleWorkGang;
49 class CollectorPolicy; 49 class CollectorPolicy;
50 class KlassHandle; 50 class KlassHandle;
51 51
52 // Note on use of FlexibleWorkGang's for GC.
53 // There are three places where task completion is determined.
54 // In
55 // 1) ParallelTaskTerminator::offer_termination() where _n_threads
56 // must be set to the correct value so that count of workers that
57 // have offered termination will exactly match the number
58 // working on the task. Tasks such as those derived from GCTask
59 // use ParallelTaskTerminator's. Tasks that want load balancing
60 // by work stealing use this method to gauge completion.
61 // 2) SubTasksDone has a variable _n_threads that is used in
62 // all_tasks_completed() to determine completion. all_tasks_complete()
63 // counts the number of tasks that have been done and then reset
64 // the SubTasksDone so that it can be used again. When the number of
65 // tasks is set to the number of GC workers, then _n_threads must
66 // be set to the number of active GC workers. G1CollectedHeap,
67 // HRInto_G1RemSet, GenCollectedHeap and SharedHeap have SubTasksDone.
68 // This seems too many.
69 // 3) SequentialSubTasksDone has an _n_threads that is used in
70 // a way similar to SubTasksDone and has the same dependency on the
71 // number of active GC workers. CompactibleFreeListSpace and Space
72 // have SequentialSubTasksDone's.
73 // Example of using SubTasksDone and SequentialSubTasksDone
74 // G1CollectedHeap::g1_process_strong_roots() calls
75 // process_strong_roots(false, // no scoping; this is parallel code
76 // collecting_perm_gen, so,
77 // &buf_scan_non_heap_roots,
78 // &eager_scan_code_roots,
79 // &buf_scan_perm);
80 // which delegates to SharedHeap::process_strong_roots() and uses
81 // SubTasksDone* _process_strong_tasks to claim tasks.
82 // process_strong_roots() calls
83 // rem_set()->younger_refs_iterate(perm_gen(), perm_blk);
84 // to scan the card table and which eventually calls down into
85 // CardTableModRefBS::par_non_clean_card_iterate_work(). This method
86 // uses SequentialSubTasksDone* _pst to claim tasks.
87 // Both SubTasksDone and SequentialSubTasksDone call their method
88 // all_tasks_completed() to count the number of GC workers that have
89 // finished their work. That logic is "when all the workers are
90 // finished the tasks are finished".
91 //
92 // The pattern that appears in the code is to set _n_threads
93 // to a value > 1 before a task that you would like executed in parallel
94 // and then to set it to 0 after that task has completed. A value of
95 // 0 is a "special" value in set_n_threads() which translates to
96 // setting _n_threads to 1.
97 //
98 // Some code uses _n_terminiation to decide if work should be done in
99 // parallel. The notorious possibly_parallel_oops_do() in threads.cpp
100 // is an example of such code. Look for variable "is_par" for other
101 // examples.
102 //
103 // The active_workers is not reset to 0 after a parallel phase. It's
104 // value may be used in later phases and in one instance at least
105 // (the parallel remark) it has to be used (the parallel remark depends
106 // on the partitioning done in the previous parallel scavenge).
107
52 class SharedHeap : public CollectedHeap { 108 class SharedHeap : public CollectedHeap {
53 friend class VMStructs; 109 friend class VMStructs;
54 110
55 friend class VM_GC_Operation; 111 friend class VM_GC_Operation;
56 friend class VM_CGC_Operation; 112 friend class VM_CGC_Operation;
81 // for this variable. 137 // for this variable.
82 int _strong_roots_parity; 138 int _strong_roots_parity;
83 139
84 // If we're doing parallel GC, use this gang of threads. 140 // If we're doing parallel GC, use this gang of threads.
85 FlexibleWorkGang* _workers; 141 FlexibleWorkGang* _workers;
86
87 // Number of parallel threads currently working on GC tasks.
88 // O indicates use sequential code; 1 means use parallel code even with
89 // only one thread, for performance testing purposes.
90 int _n_par_threads;
91 142
92 // Full initialization is done in a concrete subtype's "initialize" 143 // Full initialization is done in a concrete subtype's "initialize"
93 // function. 144 // function.
94 SharedHeap(CollectorPolicy* policy_); 145 SharedHeap(CollectorPolicy* policy_);
95 146
105 static SharedHeap* heap() { return _sh; } 156 static SharedHeap* heap() { return _sh; }
106 157
107 CollectorPolicy *collector_policy() const { return _collector_policy; } 158 CollectorPolicy *collector_policy() const { return _collector_policy; }
108 159
109 void set_barrier_set(BarrierSet* bs); 160 void set_barrier_set(BarrierSet* bs);
161 SubTasksDone* process_strong_tasks() { return _process_strong_tasks; }
110 162
111 // Does operations required after initialization has been done. 163 // Does operations required after initialization has been done.
112 virtual void post_initialize(); 164 virtual void post_initialize();
113 165
114 // Initialization of ("weak") reference processing support 166 // Initialization of ("weak") reference processing support
195 SO_Strings = 0x4, 247 SO_Strings = 0x4,
196 SO_CodeCache = 0x8 248 SO_CodeCache = 0x8
197 }; 249 };
198 250
199 FlexibleWorkGang* workers() const { return _workers; } 251 FlexibleWorkGang* workers() const { return _workers; }
200
201 // Sets the number of parallel threads that will be doing tasks
202 // (such as process strong roots) subsequently.
203 virtual void set_par_threads(int t);
204
205 // Number of threads currently working on GC tasks.
206 int n_par_threads() { return _n_par_threads; }
207 252
208 // Invoke the "do_oop" method the closure "roots" on all root locations. 253 // Invoke the "do_oop" method the closure "roots" on all root locations.
209 // If "collecting_perm_gen" is false, then roots that may only contain 254 // If "collecting_perm_gen" is false, then roots that may only contain
210 // references to permGen objects are not scanned; instead, in that case, 255 // references to permGen objects are not scanned; instead, in that case,
211 // the "perm_blk" closure is applied to all outgoing refs in the 256 // the "perm_blk" closure is applied to all outgoing refs in the
238 283
239 // Do anything common to GC's. 284 // Do anything common to GC's.
240 virtual void gc_prologue(bool full) = 0; 285 virtual void gc_prologue(bool full) = 0;
241 virtual void gc_epilogue(bool full) = 0; 286 virtual void gc_epilogue(bool full) = 0;
242 287
288 // Sets the number of parallel threads that will be doing tasks
289 // (such as process strong roots) subsequently.
290 virtual void set_par_threads(int t);
291
292 int n_termination();
293 void set_n_termination(int t);
294
243 // 295 //
244 // New methods from CollectedHeap 296 // New methods from CollectedHeap
245 // 297 //
246 298
247 size_t permanent_capacity() const { 299 size_t permanent_capacity() const {