Mercurial > hg > truffle
annotate src/share/vm/utilities/taskqueue.hpp @ 14649:f6301b007a16
6498581: ThreadInterruptTest3 produces wrong output on Windows
Summary: There is race condition between os::interrupt and os::is_interrupted on Windows. In JVM_Sleep(Thread.sleep), check if thread gets interrupted, it may see interrupted but not really interrupted so cause spurious waking up (early return from sleep). Fix by checking if interrupt event really gets set thus prevent false return. For intrinsic of _isInterrupted, on Windows, go fastpath only on bit not set.
Reviewed-by: acorn, kvn
Contributed-by: david.holmes@oracle.com, yumin.qi@oracle.com
author | minqi |
---|---|
date | Wed, 26 Feb 2014 15:20:41 -0800 |
parents | 2b8e28fdf503 |
children | 4ca6dc0799b6 ce8f6bb717c9 |
rev | line source |
---|---|
0 | 1 /* |
12087
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1311
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1311
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1311
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #ifndef SHARE_VM_UTILITIES_TASKQUEUE_HPP |
26 #define SHARE_VM_UTILITIES_TASKQUEUE_HPP | |
27 | |
28 #include "memory/allocation.hpp" | |
29 #include "memory/allocation.inline.hpp" | |
30 #include "runtime/mutex.hpp" | |
31 #include "utilities/stack.hpp" | |
32 #ifdef TARGET_OS_ARCH_linux_x86 | |
33 # include "orderAccess_linux_x86.inline.hpp" | |
34 #endif | |
35 #ifdef TARGET_OS_ARCH_linux_sparc | |
36 # include "orderAccess_linux_sparc.inline.hpp" | |
37 #endif | |
38 #ifdef TARGET_OS_ARCH_linux_zero | |
39 # include "orderAccess_linux_zero.inline.hpp" | |
40 #endif | |
41 #ifdef TARGET_OS_ARCH_solaris_x86 | |
42 # include "orderAccess_solaris_x86.inline.hpp" | |
43 #endif | |
44 #ifdef TARGET_OS_ARCH_solaris_sparc | |
45 # include "orderAccess_solaris_sparc.inline.hpp" | |
46 #endif | |
47 #ifdef TARGET_OS_ARCH_windows_x86 | |
48 # include "orderAccess_windows_x86.inline.hpp" | |
49 #endif | |
2192
b92c45f2bc75
7016023: Enable building ARM and PPC from src/closed repository
bobv
parents:
1972
diff
changeset
|
50 #ifdef TARGET_OS_ARCH_linux_arm |
b92c45f2bc75
7016023: Enable building ARM and PPC from src/closed repository
bobv
parents:
1972
diff
changeset
|
51 # include "orderAccess_linux_arm.inline.hpp" |
b92c45f2bc75
7016023: Enable building ARM and PPC from src/closed repository
bobv
parents:
1972
diff
changeset
|
52 #endif |
b92c45f2bc75
7016023: Enable building ARM and PPC from src/closed repository
bobv
parents:
1972
diff
changeset
|
53 #ifdef TARGET_OS_ARCH_linux_ppc |
b92c45f2bc75
7016023: Enable building ARM and PPC from src/closed repository
bobv
parents:
1972
diff
changeset
|
54 # include "orderAccess_linux_ppc.inline.hpp" |
b92c45f2bc75
7016023: Enable building ARM and PPC from src/closed repository
bobv
parents:
1972
diff
changeset
|
55 #endif |
14411 | 56 #ifdef TARGET_OS_ARCH_aix_ppc |
57 # include "orderAccess_aix_ppc.inline.hpp" | |
58 #endif | |
3960 | 59 #ifdef TARGET_OS_ARCH_bsd_x86 |
60 # include "orderAccess_bsd_x86.inline.hpp" | |
61 #endif | |
62 #ifdef TARGET_OS_ARCH_bsd_zero | |
63 # include "orderAccess_bsd_zero.inline.hpp" | |
64 #endif | |
1972 | 65 |
1665 | 66 // Simple TaskQueue stats that are collected by default in debug builds. |
67 | |
68 #if !defined(TASKQUEUE_STATS) && defined(ASSERT) | |
69 #define TASKQUEUE_STATS 1 | |
70 #elif !defined(TASKQUEUE_STATS) | |
71 #define TASKQUEUE_STATS 0 | |
72 #endif | |
73 | |
74 #if TASKQUEUE_STATS | |
75 #define TASKQUEUE_STATS_ONLY(code) code | |
76 #else | |
77 #define TASKQUEUE_STATS_ONLY(code) | |
78 #endif // TASKQUEUE_STATS | |
79 | |
80 #if TASKQUEUE_STATS | |
81 class TaskQueueStats { | |
82 public: | |
83 enum StatId { | |
84 push, // number of taskqueue pushes | |
85 pop, // number of taskqueue pops | |
86 pop_slow, // subset of taskqueue pops that were done slow-path | |
87 steal_attempt, // number of taskqueue steal attempts | |
88 steal, // number of taskqueue steals | |
89 overflow, // number of overflow pushes | |
90 overflow_max_len, // max length of overflow stack | |
91 last_stat_id | |
92 }; | |
93 | |
94 public: | |
95 inline TaskQueueStats() { reset(); } | |
96 | |
97 inline void record_push() { ++_stats[push]; } | |
98 inline void record_pop() { ++_stats[pop]; } | |
99 inline void record_pop_slow() { record_pop(); ++_stats[pop_slow]; } | |
100 inline void record_steal(bool success); | |
101 inline void record_overflow(size_t new_length); | |
102 | |
1709 | 103 TaskQueueStats & operator +=(const TaskQueueStats & addend); |
104 | |
1665 | 105 inline size_t get(StatId id) const { return _stats[id]; } |
106 inline const size_t* get() const { return _stats; } | |
107 | |
108 inline void reset(); | |
109 | |
1709 | 110 // Print the specified line of the header (does not include a line separator). |
1665 | 111 static void print_header(unsigned int line, outputStream* const stream = tty, |
112 unsigned int width = 10); | |
1709 | 113 // Print the statistics (does not include a line separator). |
1665 | 114 void print(outputStream* const stream = tty, unsigned int width = 10) const; |
115 | |
1709 | 116 DEBUG_ONLY(void verify() const;) |
117 | |
1665 | 118 private: |
119 size_t _stats[last_stat_id]; | |
120 static const char * const _names[last_stat_id]; | |
121 }; | |
122 | |
123 void TaskQueueStats::record_steal(bool success) { | |
124 ++_stats[steal_attempt]; | |
125 if (success) ++_stats[steal]; | |
126 } | |
127 | |
128 void TaskQueueStats::record_overflow(size_t new_len) { | |
129 ++_stats[overflow]; | |
130 if (new_len > _stats[overflow_max_len]) _stats[overflow_max_len] = new_len; | |
131 } | |
132 | |
133 void TaskQueueStats::reset() { | |
134 memset(_stats, 0, sizeof(_stats)); | |
135 } | |
136 #endif // TASKQUEUE_STATS | |
137 | |
12087
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
138 // TaskQueueSuper collects functionality common to all GenericTaskQueue instances. |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
139 |
6197 | 140 template <unsigned int N, MEMFLAGS F> |
141 class TaskQueueSuper: public CHeapObj<F> { | |
0 | 142 protected: |
907 | 143 // Internal type for indexing the queue; also used for the tag. |
144 typedef NOT_LP64(uint16_t) LP64_ONLY(uint32_t) idx_t; | |
145 | |
146 // The first free element after the last one pushed (mod N). | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
147 volatile uint _bottom; |
0 | 148 |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
149 enum { MOD_N_MASK = N - 1 }; |
907 | 150 |
151 class Age { | |
152 public: | |
153 Age(size_t data = 0) { _data = data; } | |
154 Age(const Age& age) { _data = age._data; } | |
155 Age(idx_t top, idx_t tag) { _fields._top = top; _fields._tag = tag; } | |
0 | 156 |
907 | 157 Age get() const volatile { return _data; } |
158 void set(Age age) volatile { _data = age._data; } | |
159 | |
160 idx_t top() const volatile { return _fields._top; } | |
161 idx_t tag() const volatile { return _fields._tag; } | |
0 | 162 |
907 | 163 // Increment top; if it wraps, increment tag also. |
164 void increment() { | |
165 _fields._top = increment_index(_fields._top); | |
166 if (_fields._top == 0) ++_fields._tag; | |
167 } | |
0 | 168 |
907 | 169 Age cmpxchg(const Age new_age, const Age old_age) volatile { |
170 return (size_t) Atomic::cmpxchg_ptr((intptr_t)new_age._data, | |
171 (volatile intptr_t *)&_data, | |
172 (intptr_t)old_age._data); | |
173 } | |
174 | |
175 bool operator ==(const Age& other) const { return _data == other._data; } | |
0 | 176 |
907 | 177 private: |
178 struct fields { | |
179 idx_t _top; | |
180 idx_t _tag; | |
181 }; | |
182 union { | |
183 size_t _data; | |
184 fields _fields; | |
185 }; | |
0 | 186 }; |
907 | 187 |
188 volatile Age _age; | |
189 | |
190 // These both operate mod N. | |
191 static uint increment_index(uint ind) { | |
192 return (ind + 1) & MOD_N_MASK; | |
0 | 193 } |
907 | 194 static uint decrement_index(uint ind) { |
195 return (ind - 1) & MOD_N_MASK; | |
0 | 196 } |
197 | |
907 | 198 // Returns a number in the range [0..N). If the result is "N-1", it should be |
199 // interpreted as 0. | |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
200 uint dirty_size(uint bot, uint top) const { |
907 | 201 return (bot - top) & MOD_N_MASK; |
0 | 202 } |
203 | |
204 // Returns the size corresponding to the given "bot" and "top". | |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
205 uint size(uint bot, uint top) const { |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
206 uint sz = dirty_size(bot, top); |
907 | 207 // Has the queue "wrapped", so that bottom is less than top? There's a |
208 // complicated special case here. A pair of threads could perform pop_local | |
209 // and pop_global operations concurrently, starting from a state in which | |
210 // _bottom == _top+1. The pop_local could succeed in decrementing _bottom, | |
211 // and the pop_global in incrementing _top (in which case the pop_global | |
212 // will be awarded the contested queue element.) The resulting state must | |
213 // be interpreted as an empty queue. (We only need to worry about one such | |
214 // event: only the queue owner performs pop_local's, and several concurrent | |
215 // threads attempting to perform the pop_global will all perform the same | |
216 // CAS, and only one can succeed.) Any stealing thread that reads after | |
217 // either the increment or decrement will see an empty queue, and will not | |
218 // join the competitors. The "sz == -1 || sz == N-1" state will not be | |
219 // modified by concurrent queues, so the owner thread can reset the state to | |
220 // _bottom == top so subsequent pushes will be performed normally. | |
221 return (sz == N - 1) ? 0 : sz; | |
0 | 222 } |
223 | |
224 public: | |
225 TaskQueueSuper() : _bottom(0), _age() {} | |
226 | |
1638 | 227 // Return true if the TaskQueue contains/does not contain any tasks. |
228 bool peek() const { return _bottom != _age.top(); } | |
229 bool is_empty() const { return size() == 0; } | |
0 | 230 |
231 // Return an estimate of the number of elements in the queue. | |
232 // The "careful" version admits the possibility of pop_local/pop_global | |
233 // races. | |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
234 uint size() const { |
907 | 235 return size(_bottom, _age.top()); |
0 | 236 } |
237 | |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
238 uint dirty_size() const { |
907 | 239 return dirty_size(_bottom, _age.top()); |
0 | 240 } |
241 | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
242 void set_empty() { |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
243 _bottom = 0; |
907 | 244 _age.set(0); |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
245 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
246 |
0 | 247 // Maximum number of elements allowed in the queue. This is two less |
248 // than the actual queue size, for somewhat complicated reasons. | |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
249 uint max_elems() const { return N - 2; } |
1284 | 250 |
251 // Total size of queue. | |
252 static const uint total_size() { return N; } | |
1665 | 253 |
254 TASKQUEUE_STATS_ONLY(TaskQueueStats stats;) | |
0 | 255 }; |
256 | |
12087
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
257 // |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
258 // GenericTaskQueue implements an ABP, Aurora-Blumofe-Plaxton, double- |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
259 // ended-queue (deque), intended for use in work stealing. Queue operations |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
260 // are non-blocking. |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
261 // |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
262 // A queue owner thread performs push() and pop_local() operations on one end |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
263 // of the queue, while other threads may steal work using the pop_global() |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
264 // method. |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
265 // |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
266 // The main difference to the original algorithm is that this |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
267 // implementation allows wrap-around at the end of its allocated |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
268 // storage, which is an array. |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
269 // |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
270 // The original paper is: |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
271 // |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
272 // Arora, N. S., Blumofe, R. D., and Plaxton, C. G. |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
273 // Thread scheduling for multiprogrammed multiprocessors. |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
274 // Theory of Computing Systems 34, 2 (2001), 115-144. |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
275 // |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
276 // The following paper provides an correctness proof and an |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
277 // implementation for weakly ordered memory models including (pseudo-) |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
278 // code containing memory barriers for a Chase-Lev deque. Chase-Lev is |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
279 // similar to ABP, with the main difference that it allows resizing of the |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
280 // underlying storage: |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
281 // |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
282 // Le, N. M., Pop, A., Cohen A., and Nardell, F. Z. |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
283 // Correct and efficient work-stealing for weak memory models |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
284 // Proceedings of the 18th ACM SIGPLAN symposium on Principles and |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
285 // practice of parallel programming (PPoPP 2013), 69-80 |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
286 // |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
287 |
6197 | 288 template <class E, MEMFLAGS F, unsigned int N = TASKQUEUE_SIZE> |
289 class GenericTaskQueue: public TaskQueueSuper<N, F> { | |
9073
83f27710f5f7
7197666: java -d64 -version core dumps in a box with lots of memory
brutisso
parents:
6924
diff
changeset
|
290 ArrayAllocator<E, F> _array_allocator; |
6197 | 291 protected: |
292 typedef typename TaskQueueSuper<N, F>::Age Age; | |
293 typedef typename TaskQueueSuper<N, F>::idx_t idx_t; | |
294 | |
295 using TaskQueueSuper<N, F>::_bottom; | |
296 using TaskQueueSuper<N, F>::_age; | |
297 using TaskQueueSuper<N, F>::increment_index; | |
298 using TaskQueueSuper<N, F>::decrement_index; | |
299 using TaskQueueSuper<N, F>::dirty_size; | |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
300 |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
301 public: |
6197 | 302 using TaskQueueSuper<N, F>::max_elems; |
303 using TaskQueueSuper<N, F>::size; | |
304 | |
305 #if TASKQUEUE_STATS | |
306 using TaskQueueSuper<N, F>::stats; | |
307 #endif | |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
308 |
0 | 309 private: |
310 // Slow paths for push, pop_local. (pop_global has no fast path.) | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
311 bool push_slow(E t, uint dirty_n_elems); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
312 bool pop_local_slow(uint localBot, Age oldAge); |
0 | 313 |
314 public: | |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
315 typedef E element_type; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
316 |
0 | 317 // Initializes the queue to empty. |
318 GenericTaskQueue(); | |
319 | |
320 void initialize(); | |
321 | |
1638 | 322 // Push the task "t" on the queue. Returns "false" iff the queue is full. |
0 | 323 inline bool push(E t); |
324 | |
1638 | 325 // Attempts to claim a task from the "local" end of the queue (the most |
326 // recently pushed). If successful, returns true and sets t to the task; | |
327 // otherwise, returns false (the queue is empty). | |
12316
190899198332
7195622: CheckUnhandledOops has limited usefulness now
hseigel
parents:
12087
diff
changeset
|
328 inline bool pop_local(volatile E& t); |
0 | 329 |
1638 | 330 // Like pop_local(), but uses the "global" end of the queue (the least |
331 // recently pushed). | |
12316
190899198332
7195622: CheckUnhandledOops has limited usefulness now
hseigel
parents:
12087
diff
changeset
|
332 bool pop_global(volatile E& t); |
0 | 333 |
334 // Delete any resource associated with the queue. | |
335 ~GenericTaskQueue(); | |
336 | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
337 // apply the closure to all elements in the task queue |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
338 void oops_do(OopClosure* f); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
339 |
0 | 340 private: |
341 // Element array. | |
342 volatile E* _elems; | |
343 }; | |
344 | |
6197 | 345 template<class E, MEMFLAGS F, unsigned int N> |
346 GenericTaskQueue<E, F, N>::GenericTaskQueue() { | |
907 | 347 assert(sizeof(Age) == sizeof(size_t), "Depends on this."); |
0 | 348 } |
349 | |
6197 | 350 template<class E, MEMFLAGS F, unsigned int N> |
351 void GenericTaskQueue<E, F, N>::initialize() { | |
9073
83f27710f5f7
7197666: java -d64 -version core dumps in a box with lots of memory
brutisso
parents:
6924
diff
changeset
|
352 _elems = _array_allocator.allocate(N); |
0 | 353 } |
354 | |
6197 | 355 template<class E, MEMFLAGS F, unsigned int N> |
356 void GenericTaskQueue<E, F, N>::oops_do(OopClosure* f) { | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
357 // tty->print_cr("START OopTaskQueue::oops_do"); |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
358 uint iters = size(); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
359 uint index = _bottom; |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
360 for (uint i = 0; i < iters; ++i) { |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
361 index = decrement_index(index); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
362 // tty->print_cr(" doing entry %d," INTPTR_T " -> " INTPTR_T, |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
363 // index, &_elems[index], _elems[index]); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
364 E* t = (E*)&_elems[index]; // cast away volatility |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
365 oop* p = (oop*)t; |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
366 assert((*t)->is_oop_or_null(), "Not an oop or null"); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
367 f->do_oop(p); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
368 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
369 // tty->print_cr("END OopTaskQueue::oops_do"); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
370 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
371 |
6197 | 372 template<class E, MEMFLAGS F, unsigned int N> |
373 bool GenericTaskQueue<E, F, N>::push_slow(E t, uint dirty_n_elems) { | |
907 | 374 if (dirty_n_elems == N - 1) { |
0 | 375 // Actually means 0, so do the push. |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
376 uint localBot = _bottom; |
10973
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
377 // g++ complains if the volatile result of the assignment is |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
378 // unused, so we cast the volatile away. We cannot cast directly |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
379 // to void, because gcc treats that as not using the result of the |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
380 // assignment. However, casting to E& means that we trigger an |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
381 // unused-value warning. So, we cast the E& to void. |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
382 (void)const_cast<E&>(_elems[localBot] = t); |
1024
2c03ce058f55
6888847: TaskQueue needs release_store() for correctness on RMO machines
bobv
parents:
907
diff
changeset
|
383 OrderAccess::release_store(&_bottom, increment_index(localBot)); |
1665 | 384 TASKQUEUE_STATS_ONLY(stats.record_push()); |
0 | 385 return true; |
907 | 386 } |
387 return false; | |
0 | 388 } |
389 | |
1833
8b10f48633dc
6984287: Regularize how GC parallel workers are specified.
jmasa
parents:
1709
diff
changeset
|
390 // pop_local_slow() is done by the owning thread and is trying to |
8b10f48633dc
6984287: Regularize how GC parallel workers are specified.
jmasa
parents:
1709
diff
changeset
|
391 // get the last task in the queue. It will compete with pop_global() |
8b10f48633dc
6984287: Regularize how GC parallel workers are specified.
jmasa
parents:
1709
diff
changeset
|
392 // that will be used by other threads. The tag age is incremented |
8b10f48633dc
6984287: Regularize how GC parallel workers are specified.
jmasa
parents:
1709
diff
changeset
|
393 // whenever the queue goes empty which it will do here if this thread |
8b10f48633dc
6984287: Regularize how GC parallel workers are specified.
jmasa
parents:
1709
diff
changeset
|
394 // gets the last task or in pop_global() if the queue wraps (top == 0 |
8b10f48633dc
6984287: Regularize how GC parallel workers are specified.
jmasa
parents:
1709
diff
changeset
|
395 // and pop_global() succeeds, see pop_global()). |
6197 | 396 template<class E, MEMFLAGS F, unsigned int N> |
397 bool GenericTaskQueue<E, F, N>::pop_local_slow(uint localBot, Age oldAge) { | |
0 | 398 // This queue was observed to contain exactly one element; either this |
399 // thread will claim it, or a competing "pop_global". In either case, | |
400 // the queue will be logically empty afterwards. Create a new Age value | |
401 // that represents the empty queue for the given value of "_bottom". (We | |
402 // must also increment "tag" because of the case where "bottom == 1", | |
403 // "top == 0". A pop_global could read the queue element in that case, | |
404 // then have the owner thread do a pop followed by another push. Without | |
405 // the incrementing of "tag", the pop_global's CAS could succeed, | |
406 // allowing it to believe it has claimed the stale element.) | |
907 | 407 Age newAge((idx_t)localBot, oldAge.tag() + 1); |
0 | 408 // Perhaps a competing pop_global has already incremented "top", in which |
409 // case it wins the element. | |
410 if (localBot == oldAge.top()) { | |
411 // No competing pop_global has yet incremented "top"; we'll try to | |
412 // install new_age, thus claiming the element. | |
907 | 413 Age tempAge = _age.cmpxchg(newAge, oldAge); |
0 | 414 if (tempAge == oldAge) { |
415 // We win. | |
907 | 416 assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); |
1665 | 417 TASKQUEUE_STATS_ONLY(stats.record_pop_slow()); |
0 | 418 return true; |
419 } | |
420 } | |
907 | 421 // We lose; a completing pop_global gets the element. But the queue is empty |
422 // and top is greater than bottom. Fix this representation of the empty queue | |
423 // to become the canonical one. | |
424 _age.set(newAge); | |
425 assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); | |
0 | 426 return false; |
427 } | |
428 | |
6197 | 429 template<class E, MEMFLAGS F, unsigned int N> |
12316
190899198332
7195622: CheckUnhandledOops has limited usefulness now
hseigel
parents:
12087
diff
changeset
|
430 bool GenericTaskQueue<E, F, N>::pop_global(volatile E& t) { |
907 | 431 Age oldAge = _age.get(); |
11997 | 432 // Architectures with weak memory model require a barrier here |
433 // to guarantee that bottom is not older than age, | |
434 // which is crucial for the correctness of the algorithm. | |
435 #if !(defined SPARC || defined IA32 || defined AMD64) | |
436 OrderAccess::fence(); | |
437 #endif | |
438 uint localBot = OrderAccess::load_acquire((volatile juint*)&_bottom); | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
439 uint n_elems = size(localBot, oldAge.top()); |
0 | 440 if (n_elems == 0) { |
441 return false; | |
442 } | |
907 | 443 |
10973
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
444 // g++ complains if the volatile result of the assignment is |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
445 // unused, so we cast the volatile away. We cannot cast directly |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
446 // to void, because gcc treats that as not using the result of the |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
447 // assignment. However, casting to E& means that we trigger an |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
448 // unused-value warning. So, we cast the E& to void. |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
449 (void) const_cast<E&>(t = _elems[oldAge.top()]); |
907 | 450 Age newAge(oldAge); |
451 newAge.increment(); | |
452 Age resAge = _age.cmpxchg(newAge, oldAge); | |
453 | |
0 | 454 // Note that using "_bottom" here might fail, since a pop_local might |
455 // have decremented it. | |
907 | 456 assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity"); |
457 return resAge == oldAge; | |
0 | 458 } |
459 | |
6197 | 460 template<class E, MEMFLAGS F, unsigned int N> |
461 GenericTaskQueue<E, F, N>::~GenericTaskQueue() { | |
462 FREE_C_HEAP_ARRAY(E, _elems, F); | |
0 | 463 } |
464 | |
1638 | 465 // OverflowTaskQueue is a TaskQueue that also includes an overflow stack for |
466 // elements that do not fit in the TaskQueue. | |
467 // | |
1836
894b1d7c7e01
6423256: GC stacks should use a better data structure
jcoomes
parents:
1833
diff
changeset
|
468 // This class hides two methods from super classes: |
1638 | 469 // |
470 // push() - push onto the task queue or, if that fails, onto the overflow stack | |
471 // is_empty() - return true if both the TaskQueue and overflow stack are empty | |
472 // | |
1836
894b1d7c7e01
6423256: GC stacks should use a better data structure
jcoomes
parents:
1833
diff
changeset
|
473 // Note that size() is not hidden--it returns the number of elements in the |
1638 | 474 // TaskQueue, and does not include the size of the overflow stack. This |
475 // simplifies replacement of GenericTaskQueues with OverflowTaskQueues. | |
6197 | 476 template<class E, MEMFLAGS F, unsigned int N = TASKQUEUE_SIZE> |
477 class OverflowTaskQueue: public GenericTaskQueue<E, F, N> | |
1638 | 478 { |
479 public: | |
6197 | 480 typedef Stack<E, F> overflow_t; |
481 typedef GenericTaskQueue<E, F, N> taskqueue_t; | |
1638 | 482 |
1665 | 483 TASKQUEUE_STATS_ONLY(using taskqueue_t::stats;) |
484 | |
1638 | 485 // Push task t onto the queue or onto the overflow stack. Return true. |
486 inline bool push(E t); | |
487 | |
488 // Attempt to pop from the overflow stack; return true if anything was popped. | |
489 inline bool pop_overflow(E& t); | |
490 | |
1836
894b1d7c7e01
6423256: GC stacks should use a better data structure
jcoomes
parents:
1833
diff
changeset
|
491 inline overflow_t* overflow_stack() { return &_overflow_stack; } |
894b1d7c7e01
6423256: GC stacks should use a better data structure
jcoomes
parents:
1833
diff
changeset
|
492 |
1638 | 493 inline bool taskqueue_empty() const { return taskqueue_t::is_empty(); } |
1836
894b1d7c7e01
6423256: GC stacks should use a better data structure
jcoomes
parents:
1833
diff
changeset
|
494 inline bool overflow_empty() const { return _overflow_stack.is_empty(); } |
1638 | 495 inline bool is_empty() const { |
496 return taskqueue_empty() && overflow_empty(); | |
497 } | |
498 | |
499 private: | |
1836
894b1d7c7e01
6423256: GC stacks should use a better data structure
jcoomes
parents:
1833
diff
changeset
|
500 overflow_t _overflow_stack; |
1638 | 501 }; |
502 | |
6197 | 503 template <class E, MEMFLAGS F, unsigned int N> |
504 bool OverflowTaskQueue<E, F, N>::push(E t) | |
1638 | 505 { |
506 if (!taskqueue_t::push(t)) { | |
507 overflow_stack()->push(t); | |
1836
894b1d7c7e01
6423256: GC stacks should use a better data structure
jcoomes
parents:
1833
diff
changeset
|
508 TASKQUEUE_STATS_ONLY(stats.record_overflow(overflow_stack()->size())); |
1638 | 509 } |
510 return true; | |
511 } | |
512 | |
6197 | 513 template <class E, MEMFLAGS F, unsigned int N> |
514 bool OverflowTaskQueue<E, F, N>::pop_overflow(E& t) | |
1638 | 515 { |
516 if (overflow_empty()) return false; | |
517 t = overflow_stack()->pop(); | |
518 return true; | |
519 } | |
520 | |
6197 | 521 class TaskQueueSetSuper { |
0 | 522 protected: |
523 static int randomParkAndMiller(int* seed0); | |
524 public: | |
525 // Returns "true" if some TaskQueue in the set contains a task. | |
526 virtual bool peek() = 0; | |
527 }; | |
528 | |
6197 | 529 template <MEMFLAGS F> class TaskQueueSetSuperImpl: public CHeapObj<F>, public TaskQueueSetSuper { |
530 }; | |
531 | |
532 template<class T, MEMFLAGS F> | |
533 class GenericTaskQueueSet: public TaskQueueSetSuperImpl<F> { | |
0 | 534 private: |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
535 uint _n; |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
536 T** _queues; |
0 | 537 |
538 public: | |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
539 typedef typename T::element_type E; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
540 |
0 | 541 GenericTaskQueueSet(int n) : _n(n) { |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
542 typedef T* GenericTaskQueuePtr; |
6197 | 543 _queues = NEW_C_HEAP_ARRAY(GenericTaskQueuePtr, n, F); |
0 | 544 for (int i = 0; i < n; i++) { |
545 _queues[i] = NULL; | |
546 } | |
547 } | |
548 | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
549 bool steal_best_of_2(uint queue_num, int* seed, E& t); |
0 | 550 |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
551 void register_queue(uint i, T* q); |
0 | 552 |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
553 T* queue(uint n); |
0 | 554 |
1638 | 555 // The thread with queue number "queue_num" (and whose random number seed is |
556 // at "seed") is trying to steal a task from some other queue. (It may try | |
557 // several queues, according to some configuration parameter.) If some steal | |
558 // succeeds, returns "true" and sets "t" to the stolen task, otherwise returns | |
559 // false. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
560 bool steal(uint queue_num, int* seed, E& t); |
0 | 561 |
562 bool peek(); | |
563 }; | |
564 | |
6197 | 565 template<class T, MEMFLAGS F> void |
566 GenericTaskQueueSet<T, F>::register_queue(uint i, T* q) { | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
567 assert(i < _n, "index out of range."); |
0 | 568 _queues[i] = q; |
569 } | |
570 | |
6197 | 571 template<class T, MEMFLAGS F> T* |
572 GenericTaskQueueSet<T, F>::queue(uint i) { | |
0 | 573 return _queues[i]; |
574 } | |
575 | |
6197 | 576 template<class T, MEMFLAGS F> bool |
577 GenericTaskQueueSet<T, F>::steal(uint queue_num, int* seed, E& t) { | |
1665 | 578 for (uint i = 0; i < 2 * _n; i++) { |
579 if (steal_best_of_2(queue_num, seed, t)) { | |
580 TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(true)); | |
0 | 581 return true; |
1665 | 582 } |
583 } | |
584 TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(false)); | |
0 | 585 return false; |
586 } | |
587 | |
6197 | 588 template<class T, MEMFLAGS F> bool |
589 GenericTaskQueueSet<T, F>::steal_best_of_2(uint queue_num, int* seed, E& t) { | |
0 | 590 if (_n > 2) { |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
591 uint k1 = queue_num; |
6197 | 592 while (k1 == queue_num) k1 = TaskQueueSetSuper::randomParkAndMiller(seed) % _n; |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
593 uint k2 = queue_num; |
6197 | 594 while (k2 == queue_num || k2 == k1) k2 = TaskQueueSetSuper::randomParkAndMiller(seed) % _n; |
0 | 595 // Sample both and try the larger. |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
596 uint sz1 = _queues[k1]->size(); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
597 uint sz2 = _queues[k2]->size(); |
0 | 598 if (sz2 > sz1) return _queues[k2]->pop_global(t); |
599 else return _queues[k1]->pop_global(t); | |
600 } else if (_n == 2) { | |
601 // Just try the other one. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
602 uint k = (queue_num + 1) % 2; |
0 | 603 return _queues[k]->pop_global(t); |
604 } else { | |
605 assert(_n == 1, "can't be zero."); | |
606 return false; | |
607 } | |
608 } | |
609 | |
6197 | 610 template<class T, MEMFLAGS F> |
611 bool GenericTaskQueueSet<T, F>::peek() { | |
0 | 612 // Try all the queues. |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
613 for (uint j = 0; j < _n; j++) { |
0 | 614 if (_queues[j]->peek()) |
615 return true; | |
616 } | |
617 return false; | |
618 } | |
619 | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
620 // When to terminate from the termination protocol. |
6197 | 621 class TerminatorTerminator: public CHeapObj<mtInternal> { |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
622 public: |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
623 virtual bool should_exit_termination() = 0; |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
624 }; |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
625 |
0 | 626 // A class to aid in the termination of a set of parallel tasks using |
627 // TaskQueueSet's for work stealing. | |
628 | |
546
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
629 #undef TRACESPINNING |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
630 |
0 | 631 class ParallelTaskTerminator: public StackObj { |
632 private: | |
633 int _n_threads; | |
634 TaskQueueSetSuper* _queue_set; | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
635 int _offered_termination; |
0 | 636 |
546
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
637 #ifdef TRACESPINNING |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
638 static uint _total_yields; |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
639 static uint _total_spins; |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
640 static uint _total_peeks; |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
641 #endif |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
642 |
0 | 643 bool peek_in_queue_set(); |
644 protected: | |
645 virtual void yield(); | |
646 void sleep(uint millis); | |
647 | |
648 public: | |
649 | |
650 // "n_threads" is the number of threads to be terminated. "queue_set" is a | |
651 // queue sets of work queues of other threads. | |
652 ParallelTaskTerminator(int n_threads, TaskQueueSetSuper* queue_set); | |
653 | |
654 // The current thread has no work, and is ready to terminate if everyone | |
655 // else is. If returns "true", all threads are terminated. If returns | |
656 // "false", available work has been observed in one of the task queues, | |
657 // so the global task is not complete. | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
658 bool offer_termination() { |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
659 return offer_termination(NULL); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
660 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
661 |
907 | 662 // As above, but it also terminates if the should_exit_termination() |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
663 // method of the terminator parameter returns true. If terminator is |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
664 // NULL, then it is ignored. |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
665 bool offer_termination(TerminatorTerminator* terminator); |
0 | 666 |
667 // Reset the terminator, so that it may be reused again. | |
668 // The caller is responsible for ensuring that this is done | |
669 // in an MT-safe manner, once the previous round of use of | |
670 // the terminator is finished. | |
671 void reset_for_reuse(); | |
1833
8b10f48633dc
6984287: Regularize how GC parallel workers are specified.
jmasa
parents:
1709
diff
changeset
|
672 // Same as above but the number of parallel threads is set to the |
8b10f48633dc
6984287: Regularize how GC parallel workers are specified.
jmasa
parents:
1709
diff
changeset
|
673 // given number. |
8b10f48633dc
6984287: Regularize how GC parallel workers are specified.
jmasa
parents:
1709
diff
changeset
|
674 void reset_for_reuse(int n_threads); |
0 | 675 |
546
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
676 #ifdef TRACESPINNING |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
677 static uint total_yields() { return _total_yields; } |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
678 static uint total_spins() { return _total_spins; } |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
679 static uint total_peeks() { return _total_peeks; } |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
680 static void print_termination_counts(); |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
681 #endif |
0 | 682 }; |
683 | |
6197 | 684 template<class E, MEMFLAGS F, unsigned int N> inline bool |
685 GenericTaskQueue<E, F, N>::push(E t) { | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
686 uint localBot = _bottom; |
11997 | 687 assert(localBot < N, "_bottom out of range."); |
907 | 688 idx_t top = _age.top(); |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
689 uint dirty_n_elems = dirty_size(localBot, top); |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
690 assert(dirty_n_elems < N, "n_elems out of range."); |
0 | 691 if (dirty_n_elems < max_elems()) { |
10973
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
692 // g++ complains if the volatile result of the assignment is |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
693 // unused, so we cast the volatile away. We cannot cast directly |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
694 // to void, because gcc treats that as not using the result of the |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
695 // assignment. However, casting to E& means that we trigger an |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
696 // unused-value warning. So, we cast the E& to void. |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
697 (void) const_cast<E&>(_elems[localBot] = t); |
1024
2c03ce058f55
6888847: TaskQueue needs release_store() for correctness on RMO machines
bobv
parents:
907
diff
changeset
|
698 OrderAccess::release_store(&_bottom, increment_index(localBot)); |
1665 | 699 TASKQUEUE_STATS_ONLY(stats.record_push()); |
0 | 700 return true; |
701 } else { | |
702 return push_slow(t, dirty_n_elems); | |
703 } | |
704 } | |
705 | |
6197 | 706 template<class E, MEMFLAGS F, unsigned int N> inline bool |
12316
190899198332
7195622: CheckUnhandledOops has limited usefulness now
hseigel
parents:
12087
diff
changeset
|
707 GenericTaskQueue<E, F, N>::pop_local(volatile E& t) { |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
708 uint localBot = _bottom; |
907 | 709 // This value cannot be N-1. That can only occur as a result of |
0 | 710 // the assignment to bottom in this method. If it does, this method |
1638 | 711 // resets the size to 0 before the next call (which is sequential, |
0 | 712 // since this is pop_local.) |
907 | 713 uint dirty_n_elems = dirty_size(localBot, _age.top()); |
714 assert(dirty_n_elems != N - 1, "Shouldn't be possible..."); | |
0 | 715 if (dirty_n_elems == 0) return false; |
716 localBot = decrement_index(localBot); | |
717 _bottom = localBot; | |
718 // This is necessary to prevent any read below from being reordered | |
719 // before the store just above. | |
720 OrderAccess::fence(); | |
10973
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
721 // g++ complains if the volatile result of the assignment is |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
722 // unused, so we cast the volatile away. We cannot cast directly |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
723 // to void, because gcc treats that as not using the result of the |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
724 // assignment. However, casting to E& means that we trigger an |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
725 // unused-value warning. So, we cast the E& to void. |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
726 (void) const_cast<E&>(t = _elems[localBot]); |
0 | 727 // This is a second read of "age"; the "size()" above is the first. |
728 // If there's still at least one element in the queue, based on the | |
729 // "_bottom" and "age" we've read, then there can be no interference with | |
730 // a "pop_global" operation, and we're done. | |
907 | 731 idx_t tp = _age.top(); // XXX |
0 | 732 if (size(localBot, tp) > 0) { |
907 | 733 assert(dirty_size(localBot, tp) != N - 1, "sanity"); |
1665 | 734 TASKQUEUE_STATS_ONLY(stats.record_pop()); |
0 | 735 return true; |
736 } else { | |
737 // Otherwise, the queue contained exactly one element; we take the slow | |
738 // path. | |
907 | 739 return pop_local_slow(localBot, _age.get()); |
0 | 740 } |
741 } | |
742 | |
6197 | 743 typedef GenericTaskQueue<oop, mtGC> OopTaskQueue; |
744 typedef GenericTaskQueueSet<OopTaskQueue, mtGC> OopTaskQueueSet; | |
0 | 745 |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
746 #ifdef _MSC_VER |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
747 #pragma warning(push) |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
748 // warning C4522: multiple assignment operators specified |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
749 #pragma warning(disable:4522) |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
750 #endif |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
751 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
752 // This is a container class for either an oop* or a narrowOop*. |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
753 // Both are pushed onto a task queue and the consumer will test is_narrow() |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
754 // to determine which should be processed. |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
755 class StarTask { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
756 void* _holder; // either union oop* or narrowOop* |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
757 |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
758 enum { COMPRESSED_OOP_MASK = 1 }; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
759 |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
760 public: |
845
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
761 StarTask(narrowOop* p) { |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
762 assert(((uintptr_t)p & COMPRESSED_OOP_MASK) == 0, "Information loss!"); |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
763 _holder = (void *)((uintptr_t)p | COMPRESSED_OOP_MASK); |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
764 } |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
765 StarTask(oop* p) { |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
766 assert(((uintptr_t)p & COMPRESSED_OOP_MASK) == 0, "Information loss!"); |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
767 _holder = (void*)p; |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
768 } |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
769 StarTask() { _holder = NULL; } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
770 operator oop*() { return (oop*)_holder; } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
771 operator narrowOop*() { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
772 return (narrowOop*)((uintptr_t)_holder & ~COMPRESSED_OOP_MASK); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
773 } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
774 |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
775 StarTask& operator=(const StarTask& t) { |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
776 _holder = t._holder; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
777 return *this; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
778 } |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
779 volatile StarTask& operator=(const volatile StarTask& t) volatile { |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
780 _holder = t._holder; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
781 return *this; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
782 } |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
783 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
784 bool is_narrow() const { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
785 return (((uintptr_t)_holder & COMPRESSED_OOP_MASK) != 0); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
786 } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
787 }; |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
788 |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
789 class ObjArrayTask |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
790 { |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
791 public: |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
792 ObjArrayTask(oop o = NULL, int idx = 0): _obj(o), _index(idx) { } |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
793 ObjArrayTask(oop o, size_t idx): _obj(o), _index(int(idx)) { |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
794 assert(idx <= size_t(max_jint), "too big"); |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
795 } |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
796 ObjArrayTask(const ObjArrayTask& t): _obj(t._obj), _index(t._index) { } |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
797 |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
798 ObjArrayTask& operator =(const ObjArrayTask& t) { |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
799 _obj = t._obj; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
800 _index = t._index; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
801 return *this; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
802 } |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
803 volatile ObjArrayTask& |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
804 operator =(const volatile ObjArrayTask& t) volatile { |
12316
190899198332
7195622: CheckUnhandledOops has limited usefulness now
hseigel
parents:
12087
diff
changeset
|
805 (void)const_cast<oop&>(_obj = t._obj); |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
806 _index = t._index; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
807 return *this; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
808 } |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
809 |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
810 inline oop obj() const { return _obj; } |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
811 inline int index() const { return _index; } |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
812 |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
813 DEBUG_ONLY(bool is_valid() const); // Tasks to be pushed/popped must be valid. |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
814 |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
815 private: |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
816 oop _obj; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
817 int _index; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
818 }; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
819 |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
820 #ifdef _MSC_VER |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
821 #pragma warning(pop) |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
822 #endif |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
823 |
6197 | 824 typedef OverflowTaskQueue<StarTask, mtClass> OopStarTaskQueue; |
825 typedef GenericTaskQueueSet<OopStarTaskQueue, mtClass> OopStarTaskQueueSet; | |
0 | 826 |
6197 | 827 typedef OverflowTaskQueue<size_t, mtInternal> RegionTaskQueue; |
828 typedef GenericTaskQueueSet<RegionTaskQueue, mtClass> RegionTaskQueueSet; | |
1833
8b10f48633dc
6984287: Regularize how GC parallel workers are specified.
jmasa
parents:
1709
diff
changeset
|
829 |
1972 | 830 |
831 #endif // SHARE_VM_UTILITIES_TASKQUEUE_HPP |