Mercurial > hg > truffle
annotate src/share/vm/utilities/taskqueue.hpp @ 20543:e7d0505c8a30
8059758: Footprint regressions with JDK-8038423
Summary: Changes in JDK-8038423 always initialize (zero out) virtual memory used for auxiliary data structures. This causes a footprint regression for G1 in startup benchmarks. This is because they do not touch that memory at all, so the operating system does not actually commit these pages. The fix is to, if the initialization value of the data structures matches the default value of just committed memory (=0), do not do anything.
Reviewed-by: jwilhelm, brutisso
author | tschatzl |
---|---|
date | Fri, 10 Oct 2014 15:51:58 +0200 |
parents | ce8f6bb717c9 |
children | 7848fc12602b |
rev | line source |
---|---|
0 | 1 /* |
12087
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1311
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1311
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1311
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #ifndef SHARE_VM_UTILITIES_TASKQUEUE_HPP |
26 #define SHARE_VM_UTILITIES_TASKQUEUE_HPP | |
27 | |
28 #include "memory/allocation.hpp" | |
29 #include "memory/allocation.inline.hpp" | |
30 #include "runtime/mutex.hpp" | |
20197
ce8f6bb717c9
8042195: Introduce umbrella header orderAccess.inline.hpp.
goetz
parents:
14422
diff
changeset
|
31 #include "runtime/orderAccess.inline.hpp" |
1972 | 32 #include "utilities/stack.hpp" |
33 | |
1665 | 34 // Simple TaskQueue stats that are collected by default in debug builds. |
35 | |
36 #if !defined(TASKQUEUE_STATS) && defined(ASSERT) | |
37 #define TASKQUEUE_STATS 1 | |
38 #elif !defined(TASKQUEUE_STATS) | |
39 #define TASKQUEUE_STATS 0 | |
40 #endif | |
41 | |
42 #if TASKQUEUE_STATS | |
43 #define TASKQUEUE_STATS_ONLY(code) code | |
44 #else | |
45 #define TASKQUEUE_STATS_ONLY(code) | |
46 #endif // TASKQUEUE_STATS | |
47 | |
48 #if TASKQUEUE_STATS | |
49 class TaskQueueStats { | |
50 public: | |
51 enum StatId { | |
52 push, // number of taskqueue pushes | |
53 pop, // number of taskqueue pops | |
54 pop_slow, // subset of taskqueue pops that were done slow-path | |
55 steal_attempt, // number of taskqueue steal attempts | |
56 steal, // number of taskqueue steals | |
57 overflow, // number of overflow pushes | |
58 overflow_max_len, // max length of overflow stack | |
59 last_stat_id | |
60 }; | |
61 | |
62 public: | |
63 inline TaskQueueStats() { reset(); } | |
64 | |
65 inline void record_push() { ++_stats[push]; } | |
66 inline void record_pop() { ++_stats[pop]; } | |
67 inline void record_pop_slow() { record_pop(); ++_stats[pop_slow]; } | |
68 inline void record_steal(bool success); | |
69 inline void record_overflow(size_t new_length); | |
70 | |
1709 | 71 TaskQueueStats & operator +=(const TaskQueueStats & addend); |
72 | |
1665 | 73 inline size_t get(StatId id) const { return _stats[id]; } |
74 inline const size_t* get() const { return _stats; } | |
75 | |
76 inline void reset(); | |
77 | |
1709 | 78 // Print the specified line of the header (does not include a line separator). |
1665 | 79 static void print_header(unsigned int line, outputStream* const stream = tty, |
80 unsigned int width = 10); | |
1709 | 81 // Print the statistics (does not include a line separator). |
1665 | 82 void print(outputStream* const stream = tty, unsigned int width = 10) const; |
83 | |
1709 | 84 DEBUG_ONLY(void verify() const;) |
85 | |
1665 | 86 private: |
87 size_t _stats[last_stat_id]; | |
88 static const char * const _names[last_stat_id]; | |
89 }; | |
90 | |
91 void TaskQueueStats::record_steal(bool success) { | |
92 ++_stats[steal_attempt]; | |
93 if (success) ++_stats[steal]; | |
94 } | |
95 | |
96 void TaskQueueStats::record_overflow(size_t new_len) { | |
97 ++_stats[overflow]; | |
98 if (new_len > _stats[overflow_max_len]) _stats[overflow_max_len] = new_len; | |
99 } | |
100 | |
101 void TaskQueueStats::reset() { | |
102 memset(_stats, 0, sizeof(_stats)); | |
103 } | |
104 #endif // TASKQUEUE_STATS | |
105 | |
12087
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
106 // TaskQueueSuper collects functionality common to all GenericTaskQueue instances. |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
107 |
6197 | 108 template <unsigned int N, MEMFLAGS F> |
109 class TaskQueueSuper: public CHeapObj<F> { | |
0 | 110 protected: |
907 | 111 // Internal type for indexing the queue; also used for the tag. |
112 typedef NOT_LP64(uint16_t) LP64_ONLY(uint32_t) idx_t; | |
113 | |
114 // The first free element after the last one pushed (mod N). | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
115 volatile uint _bottom; |
0 | 116 |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
117 enum { MOD_N_MASK = N - 1 }; |
907 | 118 |
119 class Age { | |
120 public: | |
121 Age(size_t data = 0) { _data = data; } | |
122 Age(const Age& age) { _data = age._data; } | |
123 Age(idx_t top, idx_t tag) { _fields._top = top; _fields._tag = tag; } | |
0 | 124 |
907 | 125 Age get() const volatile { return _data; } |
126 void set(Age age) volatile { _data = age._data; } | |
127 | |
128 idx_t top() const volatile { return _fields._top; } | |
129 idx_t tag() const volatile { return _fields._tag; } | |
0 | 130 |
907 | 131 // Increment top; if it wraps, increment tag also. |
132 void increment() { | |
133 _fields._top = increment_index(_fields._top); | |
134 if (_fields._top == 0) ++_fields._tag; | |
135 } | |
0 | 136 |
907 | 137 Age cmpxchg(const Age new_age, const Age old_age) volatile { |
138 return (size_t) Atomic::cmpxchg_ptr((intptr_t)new_age._data, | |
139 (volatile intptr_t *)&_data, | |
140 (intptr_t)old_age._data); | |
141 } | |
142 | |
143 bool operator ==(const Age& other) const { return _data == other._data; } | |
0 | 144 |
907 | 145 private: |
146 struct fields { | |
147 idx_t _top; | |
148 idx_t _tag; | |
149 }; | |
150 union { | |
151 size_t _data; | |
152 fields _fields; | |
153 }; | |
0 | 154 }; |
907 | 155 |
156 volatile Age _age; | |
157 | |
158 // These both operate mod N. | |
159 static uint increment_index(uint ind) { | |
160 return (ind + 1) & MOD_N_MASK; | |
0 | 161 } |
907 | 162 static uint decrement_index(uint ind) { |
163 return (ind - 1) & MOD_N_MASK; | |
0 | 164 } |
165 | |
907 | 166 // Returns a number in the range [0..N). If the result is "N-1", it should be |
167 // interpreted as 0. | |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
168 uint dirty_size(uint bot, uint top) const { |
907 | 169 return (bot - top) & MOD_N_MASK; |
0 | 170 } |
171 | |
172 // Returns the size corresponding to the given "bot" and "top". | |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
173 uint size(uint bot, uint top) const { |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
174 uint sz = dirty_size(bot, top); |
907 | 175 // Has the queue "wrapped", so that bottom is less than top? There's a |
176 // complicated special case here. A pair of threads could perform pop_local | |
177 // and pop_global operations concurrently, starting from a state in which | |
178 // _bottom == _top+1. The pop_local could succeed in decrementing _bottom, | |
179 // and the pop_global in incrementing _top (in which case the pop_global | |
180 // will be awarded the contested queue element.) The resulting state must | |
181 // be interpreted as an empty queue. (We only need to worry about one such | |
182 // event: only the queue owner performs pop_local's, and several concurrent | |
183 // threads attempting to perform the pop_global will all perform the same | |
184 // CAS, and only one can succeed.) Any stealing thread that reads after | |
185 // either the increment or decrement will see an empty queue, and will not | |
186 // join the competitors. The "sz == -1 || sz == N-1" state will not be | |
187 // modified by concurrent queues, so the owner thread can reset the state to | |
188 // _bottom == top so subsequent pushes will be performed normally. | |
189 return (sz == N - 1) ? 0 : sz; | |
0 | 190 } |
191 | |
192 public: | |
193 TaskQueueSuper() : _bottom(0), _age() {} | |
194 | |
1638 | 195 // Return true if the TaskQueue contains/does not contain any tasks. |
196 bool peek() const { return _bottom != _age.top(); } | |
197 bool is_empty() const { return size() == 0; } | |
0 | 198 |
199 // Return an estimate of the number of elements in the queue. | |
200 // The "careful" version admits the possibility of pop_local/pop_global | |
201 // races. | |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
202 uint size() const { |
907 | 203 return size(_bottom, _age.top()); |
0 | 204 } |
205 | |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
206 uint dirty_size() const { |
907 | 207 return dirty_size(_bottom, _age.top()); |
0 | 208 } |
209 | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
210 void set_empty() { |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
211 _bottom = 0; |
907 | 212 _age.set(0); |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
213 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
214 |
0 | 215 // Maximum number of elements allowed in the queue. This is two less |
216 // than the actual queue size, for somewhat complicated reasons. | |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
217 uint max_elems() const { return N - 2; } |
1284 | 218 |
219 // Total size of queue. | |
220 static const uint total_size() { return N; } | |
1665 | 221 |
222 TASKQUEUE_STATS_ONLY(TaskQueueStats stats;) | |
0 | 223 }; |
224 | |
12087
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
225 // |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
226 // GenericTaskQueue implements an ABP, Aurora-Blumofe-Plaxton, double- |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
227 // ended-queue (deque), intended for use in work stealing. Queue operations |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
228 // are non-blocking. |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
229 // |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
230 // A queue owner thread performs push() and pop_local() operations on one end |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
231 // of the queue, while other threads may steal work using the pop_global() |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
232 // method. |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
233 // |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
234 // The main difference to the original algorithm is that this |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
235 // implementation allows wrap-around at the end of its allocated |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
236 // storage, which is an array. |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
237 // |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
238 // The original paper is: |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
239 // |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
240 // Arora, N. S., Blumofe, R. D., and Plaxton, C. G. |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
241 // Thread scheduling for multiprogrammed multiprocessors. |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
242 // Theory of Computing Systems 34, 2 (2001), 115-144. |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
243 // |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
244 // The following paper provides an correctness proof and an |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
245 // implementation for weakly ordered memory models including (pseudo-) |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
246 // code containing memory barriers for a Chase-Lev deque. Chase-Lev is |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
247 // similar to ABP, with the main difference that it allows resizing of the |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
248 // underlying storage: |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
249 // |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
250 // Le, N. M., Pop, A., Cohen A., and Nardell, F. Z. |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
251 // Correct and efficient work-stealing for weak memory models |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
252 // Proceedings of the 18th ACM SIGPLAN symposium on Principles and |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
253 // practice of parallel programming (PPoPP 2013), 69-80 |
61521bd65100
8022784: TaskQueue misses minimal documentation and references for analysis
tschatzl
parents:
11997
diff
changeset
|
254 // |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
255 |
6197 | 256 template <class E, MEMFLAGS F, unsigned int N = TASKQUEUE_SIZE> |
257 class GenericTaskQueue: public TaskQueueSuper<N, F> { | |
9073
83f27710f5f7
7197666: java -d64 -version core dumps in a box with lots of memory
brutisso
parents:
6924
diff
changeset
|
258 ArrayAllocator<E, F> _array_allocator; |
6197 | 259 protected: |
260 typedef typename TaskQueueSuper<N, F>::Age Age; | |
261 typedef typename TaskQueueSuper<N, F>::idx_t idx_t; | |
262 | |
263 using TaskQueueSuper<N, F>::_bottom; | |
264 using TaskQueueSuper<N, F>::_age; | |
265 using TaskQueueSuper<N, F>::increment_index; | |
266 using TaskQueueSuper<N, F>::decrement_index; | |
267 using TaskQueueSuper<N, F>::dirty_size; | |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
268 |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
269 public: |
6197 | 270 using TaskQueueSuper<N, F>::max_elems; |
271 using TaskQueueSuper<N, F>::size; | |
272 | |
273 #if TASKQUEUE_STATS | |
274 using TaskQueueSuper<N, F>::stats; | |
275 #endif | |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
276 |
0 | 277 private: |
278 // Slow paths for push, pop_local. (pop_global has no fast path.) | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
279 bool push_slow(E t, uint dirty_n_elems); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
280 bool pop_local_slow(uint localBot, Age oldAge); |
0 | 281 |
282 public: | |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
283 typedef E element_type; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
284 |
0 | 285 // Initializes the queue to empty. |
286 GenericTaskQueue(); | |
287 | |
288 void initialize(); | |
289 | |
1638 | 290 // Push the task "t" on the queue. Returns "false" iff the queue is full. |
0 | 291 inline bool push(E t); |
292 | |
1638 | 293 // Attempts to claim a task from the "local" end of the queue (the most |
294 // recently pushed). If successful, returns true and sets t to the task; | |
295 // otherwise, returns false (the queue is empty). | |
12316
190899198332
7195622: CheckUnhandledOops has limited usefulness now
hseigel
parents:
12087
diff
changeset
|
296 inline bool pop_local(volatile E& t); |
0 | 297 |
1638 | 298 // Like pop_local(), but uses the "global" end of the queue (the least |
299 // recently pushed). | |
12316
190899198332
7195622: CheckUnhandledOops has limited usefulness now
hseigel
parents:
12087
diff
changeset
|
300 bool pop_global(volatile E& t); |
0 | 301 |
302 // Delete any resource associated with the queue. | |
303 ~GenericTaskQueue(); | |
304 | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
305 // apply the closure to all elements in the task queue |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
306 void oops_do(OopClosure* f); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
307 |
0 | 308 private: |
309 // Element array. | |
310 volatile E* _elems; | |
311 }; | |
312 | |
6197 | 313 template<class E, MEMFLAGS F, unsigned int N> |
314 GenericTaskQueue<E, F, N>::GenericTaskQueue() { | |
907 | 315 assert(sizeof(Age) == sizeof(size_t), "Depends on this."); |
0 | 316 } |
317 | |
6197 | 318 template<class E, MEMFLAGS F, unsigned int N> |
319 void GenericTaskQueue<E, F, N>::initialize() { | |
9073
83f27710f5f7
7197666: java -d64 -version core dumps in a box with lots of memory
brutisso
parents:
6924
diff
changeset
|
320 _elems = _array_allocator.allocate(N); |
0 | 321 } |
322 | |
6197 | 323 template<class E, MEMFLAGS F, unsigned int N> |
324 void GenericTaskQueue<E, F, N>::oops_do(OopClosure* f) { | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
325 // tty->print_cr("START OopTaskQueue::oops_do"); |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
326 uint iters = size(); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
327 uint index = _bottom; |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
328 for (uint i = 0; i < iters; ++i) { |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
329 index = decrement_index(index); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
330 // tty->print_cr(" doing entry %d," INTPTR_T " -> " INTPTR_T, |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
331 // index, &_elems[index], _elems[index]); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
332 E* t = (E*)&_elems[index]; // cast away volatility |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
333 oop* p = (oop*)t; |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
334 assert((*t)->is_oop_or_null(), "Not an oop or null"); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
335 f->do_oop(p); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
336 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
337 // tty->print_cr("END OopTaskQueue::oops_do"); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
338 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
339 |
6197 | 340 template<class E, MEMFLAGS F, unsigned int N> |
341 bool GenericTaskQueue<E, F, N>::push_slow(E t, uint dirty_n_elems) { | |
907 | 342 if (dirty_n_elems == N - 1) { |
0 | 343 // Actually means 0, so do the push. |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
344 uint localBot = _bottom; |
10973
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
345 // g++ complains if the volatile result of the assignment is |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
346 // unused, so we cast the volatile away. We cannot cast directly |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
347 // to void, because gcc treats that as not using the result of the |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
348 // assignment. However, casting to E& means that we trigger an |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
349 // unused-value warning. So, we cast the E& to void. |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
350 (void)const_cast<E&>(_elems[localBot] = t); |
1024
2c03ce058f55
6888847: TaskQueue needs release_store() for correctness on RMO machines
bobv
parents:
907
diff
changeset
|
351 OrderAccess::release_store(&_bottom, increment_index(localBot)); |
1665 | 352 TASKQUEUE_STATS_ONLY(stats.record_push()); |
0 | 353 return true; |
907 | 354 } |
355 return false; | |
0 | 356 } |
357 | |
1833
8b10f48633dc
6984287: Regularize how GC parallel workers are specified.
jmasa
parents:
1709
diff
changeset
|
358 // pop_local_slow() is done by the owning thread and is trying to |
8b10f48633dc
6984287: Regularize how GC parallel workers are specified.
jmasa
parents:
1709
diff
changeset
|
359 // get the last task in the queue. It will compete with pop_global() |
8b10f48633dc
6984287: Regularize how GC parallel workers are specified.
jmasa
parents:
1709
diff
changeset
|
360 // that will be used by other threads. The tag age is incremented |
8b10f48633dc
6984287: Regularize how GC parallel workers are specified.
jmasa
parents:
1709
diff
changeset
|
361 // whenever the queue goes empty which it will do here if this thread |
8b10f48633dc
6984287: Regularize how GC parallel workers are specified.
jmasa
parents:
1709
diff
changeset
|
362 // gets the last task or in pop_global() if the queue wraps (top == 0 |
8b10f48633dc
6984287: Regularize how GC parallel workers are specified.
jmasa
parents:
1709
diff
changeset
|
363 // and pop_global() succeeds, see pop_global()). |
6197 | 364 template<class E, MEMFLAGS F, unsigned int N> |
365 bool GenericTaskQueue<E, F, N>::pop_local_slow(uint localBot, Age oldAge) { | |
0 | 366 // This queue was observed to contain exactly one element; either this |
367 // thread will claim it, or a competing "pop_global". In either case, | |
368 // the queue will be logically empty afterwards. Create a new Age value | |
369 // that represents the empty queue for the given value of "_bottom". (We | |
370 // must also increment "tag" because of the case where "bottom == 1", | |
371 // "top == 0". A pop_global could read the queue element in that case, | |
372 // then have the owner thread do a pop followed by another push. Without | |
373 // the incrementing of "tag", the pop_global's CAS could succeed, | |
374 // allowing it to believe it has claimed the stale element.) | |
907 | 375 Age newAge((idx_t)localBot, oldAge.tag() + 1); |
0 | 376 // Perhaps a competing pop_global has already incremented "top", in which |
377 // case it wins the element. | |
378 if (localBot == oldAge.top()) { | |
379 // No competing pop_global has yet incremented "top"; we'll try to | |
380 // install new_age, thus claiming the element. | |
907 | 381 Age tempAge = _age.cmpxchg(newAge, oldAge); |
0 | 382 if (tempAge == oldAge) { |
383 // We win. | |
907 | 384 assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); |
1665 | 385 TASKQUEUE_STATS_ONLY(stats.record_pop_slow()); |
0 | 386 return true; |
387 } | |
388 } | |
907 | 389 // We lose; a completing pop_global gets the element. But the queue is empty |
390 // and top is greater than bottom. Fix this representation of the empty queue | |
391 // to become the canonical one. | |
392 _age.set(newAge); | |
393 assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); | |
0 | 394 return false; |
395 } | |
396 | |
6197 | 397 template<class E, MEMFLAGS F, unsigned int N> |
12316
190899198332
7195622: CheckUnhandledOops has limited usefulness now
hseigel
parents:
12087
diff
changeset
|
398 bool GenericTaskQueue<E, F, N>::pop_global(volatile E& t) { |
907 | 399 Age oldAge = _age.get(); |
11997 | 400 // Architectures with weak memory model require a barrier here |
401 // to guarantee that bottom is not older than age, | |
402 // which is crucial for the correctness of the algorithm. | |
403 #if !(defined SPARC || defined IA32 || defined AMD64) | |
404 OrderAccess::fence(); | |
405 #endif | |
406 uint localBot = OrderAccess::load_acquire((volatile juint*)&_bottom); | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
407 uint n_elems = size(localBot, oldAge.top()); |
0 | 408 if (n_elems == 0) { |
409 return false; | |
410 } | |
907 | 411 |
10973
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
412 // g++ complains if the volatile result of the assignment is |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
413 // unused, so we cast the volatile away. We cannot cast directly |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
414 // to void, because gcc treats that as not using the result of the |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
415 // assignment. However, casting to E& means that we trigger an |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
416 // unused-value warning. So, we cast the E& to void. |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
417 (void) const_cast<E&>(t = _elems[oldAge.top()]); |
907 | 418 Age newAge(oldAge); |
419 newAge.increment(); | |
420 Age resAge = _age.cmpxchg(newAge, oldAge); | |
421 | |
0 | 422 // Note that using "_bottom" here might fail, since a pop_local might |
423 // have decremented it. | |
907 | 424 assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity"); |
425 return resAge == oldAge; | |
0 | 426 } |
427 | |
6197 | 428 template<class E, MEMFLAGS F, unsigned int N> |
429 GenericTaskQueue<E, F, N>::~GenericTaskQueue() { | |
430 FREE_C_HEAP_ARRAY(E, _elems, F); | |
0 | 431 } |
432 | |
1638 | 433 // OverflowTaskQueue is a TaskQueue that also includes an overflow stack for |
434 // elements that do not fit in the TaskQueue. | |
435 // | |
1836
894b1d7c7e01
6423256: GC stacks should use a better data structure
jcoomes
parents:
1833
diff
changeset
|
436 // This class hides two methods from super classes: |
1638 | 437 // |
438 // push() - push onto the task queue or, if that fails, onto the overflow stack | |
439 // is_empty() - return true if both the TaskQueue and overflow stack are empty | |
440 // | |
1836
894b1d7c7e01
6423256: GC stacks should use a better data structure
jcoomes
parents:
1833
diff
changeset
|
441 // Note that size() is not hidden--it returns the number of elements in the |
1638 | 442 // TaskQueue, and does not include the size of the overflow stack. This |
443 // simplifies replacement of GenericTaskQueues with OverflowTaskQueues. | |
6197 | 444 template<class E, MEMFLAGS F, unsigned int N = TASKQUEUE_SIZE> |
445 class OverflowTaskQueue: public GenericTaskQueue<E, F, N> | |
1638 | 446 { |
447 public: | |
6197 | 448 typedef Stack<E, F> overflow_t; |
449 typedef GenericTaskQueue<E, F, N> taskqueue_t; | |
1638 | 450 |
1665 | 451 TASKQUEUE_STATS_ONLY(using taskqueue_t::stats;) |
452 | |
1638 | 453 // Push task t onto the queue or onto the overflow stack. Return true. |
454 inline bool push(E t); | |
455 | |
456 // Attempt to pop from the overflow stack; return true if anything was popped. | |
457 inline bool pop_overflow(E& t); | |
458 | |
1836
894b1d7c7e01
6423256: GC stacks should use a better data structure
jcoomes
parents:
1833
diff
changeset
|
459 inline overflow_t* overflow_stack() { return &_overflow_stack; } |
894b1d7c7e01
6423256: GC stacks should use a better data structure
jcoomes
parents:
1833
diff
changeset
|
460 |
1638 | 461 inline bool taskqueue_empty() const { return taskqueue_t::is_empty(); } |
1836
894b1d7c7e01
6423256: GC stacks should use a better data structure
jcoomes
parents:
1833
diff
changeset
|
462 inline bool overflow_empty() const { return _overflow_stack.is_empty(); } |
1638 | 463 inline bool is_empty() const { |
464 return taskqueue_empty() && overflow_empty(); | |
465 } | |
466 | |
467 private: | |
1836
894b1d7c7e01
6423256: GC stacks should use a better data structure
jcoomes
parents:
1833
diff
changeset
|
468 overflow_t _overflow_stack; |
1638 | 469 }; |
470 | |
6197 | 471 template <class E, MEMFLAGS F, unsigned int N> |
472 bool OverflowTaskQueue<E, F, N>::push(E t) | |
1638 | 473 { |
474 if (!taskqueue_t::push(t)) { | |
475 overflow_stack()->push(t); | |
1836
894b1d7c7e01
6423256: GC stacks should use a better data structure
jcoomes
parents:
1833
diff
changeset
|
476 TASKQUEUE_STATS_ONLY(stats.record_overflow(overflow_stack()->size())); |
1638 | 477 } |
478 return true; | |
479 } | |
480 | |
6197 | 481 template <class E, MEMFLAGS F, unsigned int N> |
482 bool OverflowTaskQueue<E, F, N>::pop_overflow(E& t) | |
1638 | 483 { |
484 if (overflow_empty()) return false; | |
485 t = overflow_stack()->pop(); | |
486 return true; | |
487 } | |
488 | |
6197 | 489 class TaskQueueSetSuper { |
0 | 490 protected: |
491 static int randomParkAndMiller(int* seed0); | |
492 public: | |
493 // Returns "true" if some TaskQueue in the set contains a task. | |
494 virtual bool peek() = 0; | |
495 }; | |
496 | |
6197 | 497 template <MEMFLAGS F> class TaskQueueSetSuperImpl: public CHeapObj<F>, public TaskQueueSetSuper { |
498 }; | |
499 | |
500 template<class T, MEMFLAGS F> | |
501 class GenericTaskQueueSet: public TaskQueueSetSuperImpl<F> { | |
0 | 502 private: |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
503 uint _n; |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
504 T** _queues; |
0 | 505 |
506 public: | |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
507 typedef typename T::element_type E; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
508 |
0 | 509 GenericTaskQueueSet(int n) : _n(n) { |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
510 typedef T* GenericTaskQueuePtr; |
6197 | 511 _queues = NEW_C_HEAP_ARRAY(GenericTaskQueuePtr, n, F); |
0 | 512 for (int i = 0; i < n; i++) { |
513 _queues[i] = NULL; | |
514 } | |
515 } | |
516 | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
517 bool steal_best_of_2(uint queue_num, int* seed, E& t); |
0 | 518 |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
519 void register_queue(uint i, T* q); |
0 | 520 |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
521 T* queue(uint n); |
0 | 522 |
1638 | 523 // The thread with queue number "queue_num" (and whose random number seed is |
524 // at "seed") is trying to steal a task from some other queue. (It may try | |
525 // several queues, according to some configuration parameter.) If some steal | |
526 // succeeds, returns "true" and sets "t" to the stolen task, otherwise returns | |
527 // false. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
528 bool steal(uint queue_num, int* seed, E& t); |
0 | 529 |
530 bool peek(); | |
531 }; | |
532 | |
6197 | 533 template<class T, MEMFLAGS F> void |
534 GenericTaskQueueSet<T, F>::register_queue(uint i, T* q) { | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
535 assert(i < _n, "index out of range."); |
0 | 536 _queues[i] = q; |
537 } | |
538 | |
6197 | 539 template<class T, MEMFLAGS F> T* |
540 GenericTaskQueueSet<T, F>::queue(uint i) { | |
0 | 541 return _queues[i]; |
542 } | |
543 | |
6197 | 544 template<class T, MEMFLAGS F> bool |
545 GenericTaskQueueSet<T, F>::steal(uint queue_num, int* seed, E& t) { | |
1665 | 546 for (uint i = 0; i < 2 * _n; i++) { |
547 if (steal_best_of_2(queue_num, seed, t)) { | |
548 TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(true)); | |
0 | 549 return true; |
1665 | 550 } |
551 } | |
552 TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(false)); | |
0 | 553 return false; |
554 } | |
555 | |
6197 | 556 template<class T, MEMFLAGS F> bool |
557 GenericTaskQueueSet<T, F>::steal_best_of_2(uint queue_num, int* seed, E& t) { | |
0 | 558 if (_n > 2) { |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
559 uint k1 = queue_num; |
6197 | 560 while (k1 == queue_num) k1 = TaskQueueSetSuper::randomParkAndMiller(seed) % _n; |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
561 uint k2 = queue_num; |
6197 | 562 while (k2 == queue_num || k2 == k1) k2 = TaskQueueSetSuper::randomParkAndMiller(seed) % _n; |
0 | 563 // Sample both and try the larger. |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
564 uint sz1 = _queues[k1]->size(); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
565 uint sz2 = _queues[k2]->size(); |
0 | 566 if (sz2 > sz1) return _queues[k2]->pop_global(t); |
567 else return _queues[k1]->pop_global(t); | |
568 } else if (_n == 2) { | |
569 // Just try the other one. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
570 uint k = (queue_num + 1) % 2; |
0 | 571 return _queues[k]->pop_global(t); |
572 } else { | |
573 assert(_n == 1, "can't be zero."); | |
574 return false; | |
575 } | |
576 } | |
577 | |
6197 | 578 template<class T, MEMFLAGS F> |
579 bool GenericTaskQueueSet<T, F>::peek() { | |
0 | 580 // Try all the queues. |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
581 for (uint j = 0; j < _n; j++) { |
0 | 582 if (_queues[j]->peek()) |
583 return true; | |
584 } | |
585 return false; | |
586 } | |
587 | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
588 // When to terminate from the termination protocol. |
6197 | 589 class TerminatorTerminator: public CHeapObj<mtInternal> { |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
590 public: |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
591 virtual bool should_exit_termination() = 0; |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
592 }; |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
593 |
0 | 594 // A class to aid in the termination of a set of parallel tasks using |
595 // TaskQueueSet's for work stealing. | |
596 | |
546
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
597 #undef TRACESPINNING |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
598 |
0 | 599 class ParallelTaskTerminator: public StackObj { |
600 private: | |
601 int _n_threads; | |
602 TaskQueueSetSuper* _queue_set; | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
603 int _offered_termination; |
0 | 604 |
546
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
605 #ifdef TRACESPINNING |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
606 static uint _total_yields; |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
607 static uint _total_spins; |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
608 static uint _total_peeks; |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
609 #endif |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
610 |
0 | 611 bool peek_in_queue_set(); |
612 protected: | |
613 virtual void yield(); | |
614 void sleep(uint millis); | |
615 | |
616 public: | |
617 | |
618 // "n_threads" is the number of threads to be terminated. "queue_set" is a | |
619 // queue sets of work queues of other threads. | |
620 ParallelTaskTerminator(int n_threads, TaskQueueSetSuper* queue_set); | |
621 | |
622 // The current thread has no work, and is ready to terminate if everyone | |
623 // else is. If returns "true", all threads are terminated. If returns | |
624 // "false", available work has been observed in one of the task queues, | |
625 // so the global task is not complete. | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
626 bool offer_termination() { |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
627 return offer_termination(NULL); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
628 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
629 |
907 | 630 // As above, but it also terminates if the should_exit_termination() |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
631 // method of the terminator parameter returns true. If terminator is |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
632 // NULL, then it is ignored. |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
633 bool offer_termination(TerminatorTerminator* terminator); |
0 | 634 |
635 // Reset the terminator, so that it may be reused again. | |
636 // The caller is responsible for ensuring that this is done | |
637 // in an MT-safe manner, once the previous round of use of | |
638 // the terminator is finished. | |
639 void reset_for_reuse(); | |
1833
8b10f48633dc
6984287: Regularize how GC parallel workers are specified.
jmasa
parents:
1709
diff
changeset
|
640 // Same as above but the number of parallel threads is set to the |
8b10f48633dc
6984287: Regularize how GC parallel workers are specified.
jmasa
parents:
1709
diff
changeset
|
641 // given number. |
8b10f48633dc
6984287: Regularize how GC parallel workers are specified.
jmasa
parents:
1709
diff
changeset
|
642 void reset_for_reuse(int n_threads); |
0 | 643 |
546
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
644 #ifdef TRACESPINNING |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
645 static uint total_yields() { return _total_yields; } |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
646 static uint total_spins() { return _total_spins; } |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
647 static uint total_peeks() { return _total_peeks; } |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
648 static void print_termination_counts(); |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
649 #endif |
0 | 650 }; |
651 | |
6197 | 652 template<class E, MEMFLAGS F, unsigned int N> inline bool |
653 GenericTaskQueue<E, F, N>::push(E t) { | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
654 uint localBot = _bottom; |
11997 | 655 assert(localBot < N, "_bottom out of range."); |
907 | 656 idx_t top = _age.top(); |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
657 uint dirty_n_elems = dirty_size(localBot, top); |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
658 assert(dirty_n_elems < N, "n_elems out of range."); |
0 | 659 if (dirty_n_elems < max_elems()) { |
10973
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
660 // g++ complains if the volatile result of the assignment is |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
661 // unused, so we cast the volatile away. We cannot cast directly |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
662 // to void, because gcc treats that as not using the result of the |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
663 // assignment. However, casting to E& means that we trigger an |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
664 // unused-value warning. So, we cast the E& to void. |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
665 (void) const_cast<E&>(_elems[localBot] = t); |
1024
2c03ce058f55
6888847: TaskQueue needs release_store() for correctness on RMO machines
bobv
parents:
907
diff
changeset
|
666 OrderAccess::release_store(&_bottom, increment_index(localBot)); |
1665 | 667 TASKQUEUE_STATS_ONLY(stats.record_push()); |
0 | 668 return true; |
669 } else { | |
670 return push_slow(t, dirty_n_elems); | |
671 } | |
672 } | |
673 | |
6197 | 674 template<class E, MEMFLAGS F, unsigned int N> inline bool |
12316
190899198332
7195622: CheckUnhandledOops has limited usefulness now
hseigel
parents:
12087
diff
changeset
|
675 GenericTaskQueue<E, F, N>::pop_local(volatile E& t) { |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
676 uint localBot = _bottom; |
907 | 677 // This value cannot be N-1. That can only occur as a result of |
0 | 678 // the assignment to bottom in this method. If it does, this method |
1638 | 679 // resets the size to 0 before the next call (which is sequential, |
0 | 680 // since this is pop_local.) |
907 | 681 uint dirty_n_elems = dirty_size(localBot, _age.top()); |
682 assert(dirty_n_elems != N - 1, "Shouldn't be possible..."); | |
0 | 683 if (dirty_n_elems == 0) return false; |
684 localBot = decrement_index(localBot); | |
685 _bottom = localBot; | |
686 // This is necessary to prevent any read below from being reordered | |
687 // before the store just above. | |
688 OrderAccess::fence(); | |
10973
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
689 // g++ complains if the volatile result of the assignment is |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
690 // unused, so we cast the volatile away. We cannot cast directly |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
691 // to void, because gcc treats that as not using the result of the |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
692 // assignment. However, casting to E& means that we trigger an |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
693 // unused-value warning. So, we cast the E& to void. |
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
9073
diff
changeset
|
694 (void) const_cast<E&>(t = _elems[localBot]); |
0 | 695 // This is a second read of "age"; the "size()" above is the first. |
696 // If there's still at least one element in the queue, based on the | |
697 // "_bottom" and "age" we've read, then there can be no interference with | |
698 // a "pop_global" operation, and we're done. | |
907 | 699 idx_t tp = _age.top(); // XXX |
0 | 700 if (size(localBot, tp) > 0) { |
907 | 701 assert(dirty_size(localBot, tp) != N - 1, "sanity"); |
1665 | 702 TASKQUEUE_STATS_ONLY(stats.record_pop()); |
0 | 703 return true; |
704 } else { | |
705 // Otherwise, the queue contained exactly one element; we take the slow | |
706 // path. | |
907 | 707 return pop_local_slow(localBot, _age.get()); |
0 | 708 } |
709 } | |
710 | |
6197 | 711 typedef GenericTaskQueue<oop, mtGC> OopTaskQueue; |
712 typedef GenericTaskQueueSet<OopTaskQueue, mtGC> OopTaskQueueSet; | |
0 | 713 |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
714 #ifdef _MSC_VER |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
715 #pragma warning(push) |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
716 // warning C4522: multiple assignment operators specified |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
717 #pragma warning(disable:4522) |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
718 #endif |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
719 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
720 // This is a container class for either an oop* or a narrowOop*. |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
721 // Both are pushed onto a task queue and the consumer will test is_narrow() |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
722 // to determine which should be processed. |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
723 class StarTask { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
724 void* _holder; // either union oop* or narrowOop* |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
725 |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
726 enum { COMPRESSED_OOP_MASK = 1 }; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
727 |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
728 public: |
845
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
729 StarTask(narrowOop* p) { |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
730 assert(((uintptr_t)p & COMPRESSED_OOP_MASK) == 0, "Information loss!"); |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
731 _holder = (void *)((uintptr_t)p | COMPRESSED_OOP_MASK); |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
732 } |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
733 StarTask(oop* p) { |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
734 assert(((uintptr_t)p & COMPRESSED_OOP_MASK) == 0, "Information loss!"); |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
735 _holder = (void*)p; |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
736 } |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
737 StarTask() { _holder = NULL; } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
738 operator oop*() { return (oop*)_holder; } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
739 operator narrowOop*() { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
740 return (narrowOop*)((uintptr_t)_holder & ~COMPRESSED_OOP_MASK); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
741 } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
742 |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
743 StarTask& operator=(const StarTask& t) { |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
744 _holder = t._holder; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
745 return *this; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
746 } |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
747 volatile StarTask& operator=(const volatile StarTask& t) volatile { |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
748 _holder = t._holder; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
749 return *this; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
750 } |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
751 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
752 bool is_narrow() const { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
753 return (((uintptr_t)_holder & COMPRESSED_OOP_MASK) != 0); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
754 } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
755 }; |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
756 |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
757 class ObjArrayTask |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
758 { |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
759 public: |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
760 ObjArrayTask(oop o = NULL, int idx = 0): _obj(o), _index(idx) { } |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
761 ObjArrayTask(oop o, size_t idx): _obj(o), _index(int(idx)) { |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
762 assert(idx <= size_t(max_jint), "too big"); |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
763 } |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
764 ObjArrayTask(const ObjArrayTask& t): _obj(t._obj), _index(t._index) { } |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
765 |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
766 ObjArrayTask& operator =(const ObjArrayTask& t) { |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
767 _obj = t._obj; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
768 _index = t._index; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
769 return *this; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
770 } |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
771 volatile ObjArrayTask& |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
772 operator =(const volatile ObjArrayTask& t) volatile { |
12316
190899198332
7195622: CheckUnhandledOops has limited usefulness now
hseigel
parents:
12087
diff
changeset
|
773 (void)const_cast<oop&>(_obj = t._obj); |
1311
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
774 _index = t._index; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
775 return *this; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
776 } |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
777 |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
778 inline oop obj() const { return _obj; } |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
779 inline int index() const { return _index; } |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
780 |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
781 DEBUG_ONLY(bool is_valid() const); // Tasks to be pushed/popped must be valid. |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
782 |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
783 private: |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
784 oop _obj; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
785 int _index; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
786 }; |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
787 |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
788 #ifdef _MSC_VER |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
789 #pragma warning(pop) |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
790 #endif |
2a1472c30599
4396719: Mark Sweep stack overflow on deeply nested Object arrays
jcoomes
parents:
1284
diff
changeset
|
791 |
6197 | 792 typedef OverflowTaskQueue<StarTask, mtClass> OopStarTaskQueue; |
793 typedef GenericTaskQueueSet<OopStarTaskQueue, mtClass> OopStarTaskQueueSet; | |
0 | 794 |
6197 | 795 typedef OverflowTaskQueue<size_t, mtInternal> RegionTaskQueue; |
796 typedef GenericTaskQueueSet<RegionTaskQueue, mtClass> RegionTaskQueueSet; | |
1833
8b10f48633dc
6984287: Regularize how GC parallel workers are specified.
jmasa
parents:
1709
diff
changeset
|
797 |
1972 | 798 |
799 #endif // SHARE_VM_UTILITIES_TASKQUEUE_HPP |