Mercurial > hg > truffle
annotate src/share/vm/utilities/taskqueue.hpp @ 1244:745c853ee57f
6885297: java -XX:RefDiscoveryPolicy=2 or -XX:TLABWasteTargetPercent=0 cause VM crash
Summary: Interval checking is now being performed on the values passed in for these two flags. The current acceptable range for RefDiscoveryPolicy is [0..1], and for TLABWasteTargetPercent it is [1..100].
Reviewed-by: apetrusenko, ysr
author | johnc |
---|---|
date | Fri, 29 Jan 2010 14:51:38 -0800 |
parents | 1ee412f7fec9 |
children | 5f1f51edaff6 |
rev | line source |
---|---|
0 | 1 /* |
579 | 2 * Copyright 2001-2009 Sun Microsystems, Inc. All Rights Reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
20 * CA 95054 USA or visit www.sun.com if you need additional information or | |
21 * have any questions. | |
22 * | |
23 */ | |
24 | |
25 class TaskQueueSuper: public CHeapObj { | |
26 protected: | |
907 | 27 // Internal type for indexing the queue; also used for the tag. |
28 typedef NOT_LP64(uint16_t) LP64_ONLY(uint32_t) idx_t; | |
29 | |
30 // The first free element after the last one pushed (mod N). | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
31 volatile uint _bottom; |
0 | 32 |
907 | 33 enum { |
34 N = 1 << NOT_LP64(14) LP64_ONLY(17), // Queue size: 16K or 128K | |
35 MOD_N_MASK = N - 1 // To compute x mod N efficiently. | |
0 | 36 }; |
907 | 37 |
38 class Age { | |
39 public: | |
40 Age(size_t data = 0) { _data = data; } | |
41 Age(const Age& age) { _data = age._data; } | |
42 Age(idx_t top, idx_t tag) { _fields._top = top; _fields._tag = tag; } | |
0 | 43 |
907 | 44 Age get() const volatile { return _data; } |
45 void set(Age age) volatile { _data = age._data; } | |
46 | |
47 idx_t top() const volatile { return _fields._top; } | |
48 idx_t tag() const volatile { return _fields._tag; } | |
0 | 49 |
907 | 50 // Increment top; if it wraps, increment tag also. |
51 void increment() { | |
52 _fields._top = increment_index(_fields._top); | |
53 if (_fields._top == 0) ++_fields._tag; | |
54 } | |
0 | 55 |
907 | 56 Age cmpxchg(const Age new_age, const Age old_age) volatile { |
57 return (size_t) Atomic::cmpxchg_ptr((intptr_t)new_age._data, | |
58 (volatile intptr_t *)&_data, | |
59 (intptr_t)old_age._data); | |
60 } | |
61 | |
62 bool operator ==(const Age& other) const { return _data == other._data; } | |
0 | 63 |
907 | 64 private: |
65 struct fields { | |
66 idx_t _top; | |
67 idx_t _tag; | |
68 }; | |
69 union { | |
70 size_t _data; | |
71 fields _fields; | |
72 }; | |
0 | 73 }; |
907 | 74 |
75 volatile Age _age; | |
76 | |
77 // These both operate mod N. | |
78 static uint increment_index(uint ind) { | |
79 return (ind + 1) & MOD_N_MASK; | |
0 | 80 } |
907 | 81 static uint decrement_index(uint ind) { |
82 return (ind - 1) & MOD_N_MASK; | |
0 | 83 } |
84 | |
907 | 85 // Returns a number in the range [0..N). If the result is "N-1", it should be |
86 // interpreted as 0. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
87 uint dirty_size(uint bot, uint top) { |
907 | 88 return (bot - top) & MOD_N_MASK; |
0 | 89 } |
90 | |
91 // Returns the size corresponding to the given "bot" and "top". | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
92 uint size(uint bot, uint top) { |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
93 uint sz = dirty_size(bot, top); |
907 | 94 // Has the queue "wrapped", so that bottom is less than top? There's a |
95 // complicated special case here. A pair of threads could perform pop_local | |
96 // and pop_global operations concurrently, starting from a state in which | |
97 // _bottom == _top+1. The pop_local could succeed in decrementing _bottom, | |
98 // and the pop_global in incrementing _top (in which case the pop_global | |
99 // will be awarded the contested queue element.) The resulting state must | |
100 // be interpreted as an empty queue. (We only need to worry about one such | |
101 // event: only the queue owner performs pop_local's, and several concurrent | |
102 // threads attempting to perform the pop_global will all perform the same | |
103 // CAS, and only one can succeed.) Any stealing thread that reads after | |
104 // either the increment or decrement will see an empty queue, and will not | |
105 // join the competitors. The "sz == -1 || sz == N-1" state will not be | |
106 // modified by concurrent queues, so the owner thread can reset the state to | |
107 // _bottom == top so subsequent pushes will be performed normally. | |
108 return (sz == N - 1) ? 0 : sz; | |
0 | 109 } |
110 | |
111 public: | |
112 TaskQueueSuper() : _bottom(0), _age() {} | |
113 | |
114 // Return "true" if the TaskQueue contains any tasks. | |
115 bool peek(); | |
116 | |
117 // Return an estimate of the number of elements in the queue. | |
118 // The "careful" version admits the possibility of pop_local/pop_global | |
119 // races. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
120 uint size() { |
907 | 121 return size(_bottom, _age.top()); |
0 | 122 } |
123 | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
124 uint dirty_size() { |
907 | 125 return dirty_size(_bottom, _age.top()); |
0 | 126 } |
127 | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
128 void set_empty() { |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
129 _bottom = 0; |
907 | 130 _age.set(0); |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
131 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
132 |
0 | 133 // Maximum number of elements allowed in the queue. This is two less |
134 // than the actual queue size, for somewhat complicated reasons. | |
907 | 135 uint max_elems() { return N - 2; } |
0 | 136 }; |
137 | |
138 template<class E> class GenericTaskQueue: public TaskQueueSuper { | |
139 private: | |
140 // Slow paths for push, pop_local. (pop_global has no fast path.) | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
141 bool push_slow(E t, uint dirty_n_elems); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
142 bool pop_local_slow(uint localBot, Age oldAge); |
0 | 143 |
144 public: | |
145 // Initializes the queue to empty. | |
146 GenericTaskQueue(); | |
147 | |
148 void initialize(); | |
149 | |
150 // Push the task "t" on the queue. Returns "false" iff the queue is | |
151 // full. | |
152 inline bool push(E t); | |
153 | |
154 // If succeeds in claiming a task (from the 'local' end, that is, the | |
155 // most recently pushed task), returns "true" and sets "t" to that task. | |
156 // Otherwise, the queue is empty and returns false. | |
157 inline bool pop_local(E& t); | |
158 | |
159 // If succeeds in claiming a task (from the 'global' end, that is, the | |
160 // least recently pushed task), returns "true" and sets "t" to that task. | |
161 // Otherwise, the queue is empty and returns false. | |
162 bool pop_global(E& t); | |
163 | |
164 // Delete any resource associated with the queue. | |
165 ~GenericTaskQueue(); | |
166 | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
167 // apply the closure to all elements in the task queue |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
168 void oops_do(OopClosure* f); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
169 |
0 | 170 private: |
171 // Element array. | |
172 volatile E* _elems; | |
173 }; | |
174 | |
175 template<class E> | |
176 GenericTaskQueue<E>::GenericTaskQueue():TaskQueueSuper() { | |
907 | 177 assert(sizeof(Age) == sizeof(size_t), "Depends on this."); |
0 | 178 } |
179 | |
180 template<class E> | |
181 void GenericTaskQueue<E>::initialize() { | |
907 | 182 _elems = NEW_C_HEAP_ARRAY(E, N); |
0 | 183 guarantee(_elems != NULL, "Allocation failed."); |
184 } | |
185 | |
186 template<class E> | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
187 void GenericTaskQueue<E>::oops_do(OopClosure* f) { |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
188 // tty->print_cr("START OopTaskQueue::oops_do"); |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
189 uint iters = size(); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
190 uint index = _bottom; |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
191 for (uint i = 0; i < iters; ++i) { |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
192 index = decrement_index(index); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
193 // tty->print_cr(" doing entry %d," INTPTR_T " -> " INTPTR_T, |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
194 // index, &_elems[index], _elems[index]); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
195 E* t = (E*)&_elems[index]; // cast away volatility |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
196 oop* p = (oop*)t; |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
197 assert((*t)->is_oop_or_null(), "Not an oop or null"); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
198 f->do_oop(p); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
199 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
200 // tty->print_cr("END OopTaskQueue::oops_do"); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
201 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
202 |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
203 |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
204 template<class E> |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
205 bool GenericTaskQueue<E>::push_slow(E t, uint dirty_n_elems) { |
907 | 206 if (dirty_n_elems == N - 1) { |
0 | 207 // Actually means 0, so do the push. |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
208 uint localBot = _bottom; |
0 | 209 _elems[localBot] = t; |
1024
2c03ce058f55
6888847: TaskQueue needs release_store() for correctness on RMO machines
bobv
parents:
907
diff
changeset
|
210 OrderAccess::release_store(&_bottom, increment_index(localBot)); |
0 | 211 return true; |
907 | 212 } |
213 return false; | |
0 | 214 } |
215 | |
216 template<class E> | |
217 bool GenericTaskQueue<E>:: | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
218 pop_local_slow(uint localBot, Age oldAge) { |
0 | 219 // This queue was observed to contain exactly one element; either this |
220 // thread will claim it, or a competing "pop_global". In either case, | |
221 // the queue will be logically empty afterwards. Create a new Age value | |
222 // that represents the empty queue for the given value of "_bottom". (We | |
223 // must also increment "tag" because of the case where "bottom == 1", | |
224 // "top == 0". A pop_global could read the queue element in that case, | |
225 // then have the owner thread do a pop followed by another push. Without | |
226 // the incrementing of "tag", the pop_global's CAS could succeed, | |
227 // allowing it to believe it has claimed the stale element.) | |
907 | 228 Age newAge((idx_t)localBot, oldAge.tag() + 1); |
0 | 229 // Perhaps a competing pop_global has already incremented "top", in which |
230 // case it wins the element. | |
231 if (localBot == oldAge.top()) { | |
232 // No competing pop_global has yet incremented "top"; we'll try to | |
233 // install new_age, thus claiming the element. | |
907 | 234 Age tempAge = _age.cmpxchg(newAge, oldAge); |
0 | 235 if (tempAge == oldAge) { |
236 // We win. | |
907 | 237 assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); |
0 | 238 return true; |
239 } | |
240 } | |
907 | 241 // We lose; a completing pop_global gets the element. But the queue is empty |
242 // and top is greater than bottom. Fix this representation of the empty queue | |
243 // to become the canonical one. | |
244 _age.set(newAge); | |
245 assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); | |
0 | 246 return false; |
247 } | |
248 | |
249 template<class E> | |
250 bool GenericTaskQueue<E>::pop_global(E& t) { | |
907 | 251 Age oldAge = _age.get(); |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
252 uint localBot = _bottom; |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
253 uint n_elems = size(localBot, oldAge.top()); |
0 | 254 if (n_elems == 0) { |
255 return false; | |
256 } | |
907 | 257 |
0 | 258 t = _elems[oldAge.top()]; |
907 | 259 Age newAge(oldAge); |
260 newAge.increment(); | |
261 Age resAge = _age.cmpxchg(newAge, oldAge); | |
262 | |
0 | 263 // Note that using "_bottom" here might fail, since a pop_local might |
264 // have decremented it. | |
907 | 265 assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity"); |
266 return resAge == oldAge; | |
0 | 267 } |
268 | |
269 template<class E> | |
270 GenericTaskQueue<E>::~GenericTaskQueue() { | |
271 FREE_C_HEAP_ARRAY(E, _elems); | |
272 } | |
273 | |
274 // Inherits the typedef of "Task" from above. | |
275 class TaskQueueSetSuper: public CHeapObj { | |
276 protected: | |
277 static int randomParkAndMiller(int* seed0); | |
278 public: | |
279 // Returns "true" if some TaskQueue in the set contains a task. | |
280 virtual bool peek() = 0; | |
281 }; | |
282 | |
283 template<class E> class GenericTaskQueueSet: public TaskQueueSetSuper { | |
284 private: | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
285 uint _n; |
0 | 286 GenericTaskQueue<E>** _queues; |
287 | |
288 public: | |
289 GenericTaskQueueSet(int n) : _n(n) { | |
290 typedef GenericTaskQueue<E>* GenericTaskQueuePtr; | |
291 _queues = NEW_C_HEAP_ARRAY(GenericTaskQueuePtr, n); | |
292 guarantee(_queues != NULL, "Allocation failure."); | |
293 for (int i = 0; i < n; i++) { | |
294 _queues[i] = NULL; | |
295 } | |
296 } | |
297 | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
298 bool steal_1_random(uint queue_num, int* seed, E& t); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
299 bool steal_best_of_2(uint queue_num, int* seed, E& t); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
300 bool steal_best_of_all(uint queue_num, int* seed, E& t); |
0 | 301 |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
302 void register_queue(uint i, GenericTaskQueue<E>* q); |
0 | 303 |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
304 GenericTaskQueue<E>* queue(uint n); |
0 | 305 |
306 // The thread with queue number "queue_num" (and whose random number seed | |
307 // is at "seed") is trying to steal a task from some other queue. (It | |
308 // may try several queues, according to some configuration parameter.) | |
309 // If some steal succeeds, returns "true" and sets "t" the stolen task, | |
310 // otherwise returns false. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
311 bool steal(uint queue_num, int* seed, E& t); |
0 | 312 |
313 bool peek(); | |
314 }; | |
315 | |
316 template<class E> | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
317 void GenericTaskQueueSet<E>::register_queue(uint i, GenericTaskQueue<E>* q) { |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
318 assert(i < _n, "index out of range."); |
0 | 319 _queues[i] = q; |
320 } | |
321 | |
322 template<class E> | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
323 GenericTaskQueue<E>* GenericTaskQueueSet<E>::queue(uint i) { |
0 | 324 return _queues[i]; |
325 } | |
326 | |
327 template<class E> | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
328 bool GenericTaskQueueSet<E>::steal(uint queue_num, int* seed, E& t) { |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
329 for (uint i = 0; i < 2 * _n; i++) |
0 | 330 if (steal_best_of_2(queue_num, seed, t)) |
331 return true; | |
332 return false; | |
333 } | |
334 | |
335 template<class E> | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
336 bool GenericTaskQueueSet<E>::steal_best_of_all(uint queue_num, int* seed, E& t) { |
0 | 337 if (_n > 2) { |
338 int best_k; | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
339 uint best_sz = 0; |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
340 for (uint k = 0; k < _n; k++) { |
0 | 341 if (k == queue_num) continue; |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
342 uint sz = _queues[k]->size(); |
0 | 343 if (sz > best_sz) { |
344 best_sz = sz; | |
345 best_k = k; | |
346 } | |
347 } | |
348 return best_sz > 0 && _queues[best_k]->pop_global(t); | |
349 } else if (_n == 2) { | |
350 // Just try the other one. | |
351 int k = (queue_num + 1) % 2; | |
352 return _queues[k]->pop_global(t); | |
353 } else { | |
354 assert(_n == 1, "can't be zero."); | |
355 return false; | |
356 } | |
357 } | |
358 | |
359 template<class E> | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
360 bool GenericTaskQueueSet<E>::steal_1_random(uint queue_num, int* seed, E& t) { |
0 | 361 if (_n > 2) { |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
362 uint k = queue_num; |
0 | 363 while (k == queue_num) k = randomParkAndMiller(seed) % _n; |
364 return _queues[2]->pop_global(t); | |
365 } else if (_n == 2) { | |
366 // Just try the other one. | |
367 int k = (queue_num + 1) % 2; | |
368 return _queues[k]->pop_global(t); | |
369 } else { | |
370 assert(_n == 1, "can't be zero."); | |
371 return false; | |
372 } | |
373 } | |
374 | |
375 template<class E> | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
376 bool GenericTaskQueueSet<E>::steal_best_of_2(uint queue_num, int* seed, E& t) { |
0 | 377 if (_n > 2) { |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
378 uint k1 = queue_num; |
0 | 379 while (k1 == queue_num) k1 = randomParkAndMiller(seed) % _n; |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
380 uint k2 = queue_num; |
0 | 381 while (k2 == queue_num || k2 == k1) k2 = randomParkAndMiller(seed) % _n; |
382 // Sample both and try the larger. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
383 uint sz1 = _queues[k1]->size(); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
384 uint sz2 = _queues[k2]->size(); |
0 | 385 if (sz2 > sz1) return _queues[k2]->pop_global(t); |
386 else return _queues[k1]->pop_global(t); | |
387 } else if (_n == 2) { | |
388 // Just try the other one. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
389 uint k = (queue_num + 1) % 2; |
0 | 390 return _queues[k]->pop_global(t); |
391 } else { | |
392 assert(_n == 1, "can't be zero."); | |
393 return false; | |
394 } | |
395 } | |
396 | |
397 template<class E> | |
398 bool GenericTaskQueueSet<E>::peek() { | |
399 // Try all the queues. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
400 for (uint j = 0; j < _n; j++) { |
0 | 401 if (_queues[j]->peek()) |
402 return true; | |
403 } | |
404 return false; | |
405 } | |
406 | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
407 // When to terminate from the termination protocol. |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
408 class TerminatorTerminator: public CHeapObj { |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
409 public: |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
410 virtual bool should_exit_termination() = 0; |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
411 }; |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
412 |
0 | 413 // A class to aid in the termination of a set of parallel tasks using |
414 // TaskQueueSet's for work stealing. | |
415 | |
546
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
416 #undef TRACESPINNING |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
417 |
0 | 418 class ParallelTaskTerminator: public StackObj { |
419 private: | |
420 int _n_threads; | |
421 TaskQueueSetSuper* _queue_set; | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
422 int _offered_termination; |
0 | 423 |
546
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
424 #ifdef TRACESPINNING |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
425 static uint _total_yields; |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
426 static uint _total_spins; |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
427 static uint _total_peeks; |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
428 #endif |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
429 |
0 | 430 bool peek_in_queue_set(); |
431 protected: | |
432 virtual void yield(); | |
433 void sleep(uint millis); | |
434 | |
435 public: | |
436 | |
437 // "n_threads" is the number of threads to be terminated. "queue_set" is a | |
438 // queue sets of work queues of other threads. | |
439 ParallelTaskTerminator(int n_threads, TaskQueueSetSuper* queue_set); | |
440 | |
441 // The current thread has no work, and is ready to terminate if everyone | |
442 // else is. If returns "true", all threads are terminated. If returns | |
443 // "false", available work has been observed in one of the task queues, | |
444 // so the global task is not complete. | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
445 bool offer_termination() { |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
446 return offer_termination(NULL); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
447 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
448 |
907 | 449 // As above, but it also terminates if the should_exit_termination() |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
450 // method of the terminator parameter returns true. If terminator is |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
451 // NULL, then it is ignored. |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
452 bool offer_termination(TerminatorTerminator* terminator); |
0 | 453 |
454 // Reset the terminator, so that it may be reused again. | |
455 // The caller is responsible for ensuring that this is done | |
456 // in an MT-safe manner, once the previous round of use of | |
457 // the terminator is finished. | |
458 void reset_for_reuse(); | |
459 | |
546
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
460 #ifdef TRACESPINNING |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
461 static uint total_yields() { return _total_yields; } |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
462 static uint total_spins() { return _total_spins; } |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
463 static uint total_peeks() { return _total_peeks; } |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
464 static void print_termination_counts(); |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
465 #endif |
0 | 466 }; |
467 | |
468 template<class E> inline bool GenericTaskQueue<E>::push(E t) { | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
469 uint localBot = _bottom; |
907 | 470 assert((localBot >= 0) && (localBot < N), "_bottom out of range."); |
471 idx_t top = _age.top(); | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
472 uint dirty_n_elems = dirty_size(localBot, top); |
907 | 473 assert((dirty_n_elems >= 0) && (dirty_n_elems < N), "n_elems out of range."); |
0 | 474 if (dirty_n_elems < max_elems()) { |
475 _elems[localBot] = t; | |
1024
2c03ce058f55
6888847: TaskQueue needs release_store() for correctness on RMO machines
bobv
parents:
907
diff
changeset
|
476 OrderAccess::release_store(&_bottom, increment_index(localBot)); |
0 | 477 return true; |
478 } else { | |
479 return push_slow(t, dirty_n_elems); | |
480 } | |
481 } | |
482 | |
483 template<class E> inline bool GenericTaskQueue<E>::pop_local(E& t) { | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
484 uint localBot = _bottom; |
907 | 485 // This value cannot be N-1. That can only occur as a result of |
0 | 486 // the assignment to bottom in this method. If it does, this method |
487 // resets the size( to 0 before the next call (which is sequential, | |
488 // since this is pop_local.) | |
907 | 489 uint dirty_n_elems = dirty_size(localBot, _age.top()); |
490 assert(dirty_n_elems != N - 1, "Shouldn't be possible..."); | |
0 | 491 if (dirty_n_elems == 0) return false; |
492 localBot = decrement_index(localBot); | |
493 _bottom = localBot; | |
494 // This is necessary to prevent any read below from being reordered | |
495 // before the store just above. | |
496 OrderAccess::fence(); | |
497 t = _elems[localBot]; | |
498 // This is a second read of "age"; the "size()" above is the first. | |
499 // If there's still at least one element in the queue, based on the | |
500 // "_bottom" and "age" we've read, then there can be no interference with | |
501 // a "pop_global" operation, and we're done. | |
907 | 502 idx_t tp = _age.top(); // XXX |
0 | 503 if (size(localBot, tp) > 0) { |
907 | 504 assert(dirty_size(localBot, tp) != N - 1, "sanity"); |
0 | 505 return true; |
506 } else { | |
507 // Otherwise, the queue contained exactly one element; we take the slow | |
508 // path. | |
907 | 509 return pop_local_slow(localBot, _age.get()); |
0 | 510 } |
511 } | |
512 | |
513 typedef oop Task; | |
514 typedef GenericTaskQueue<Task> OopTaskQueue; | |
515 typedef GenericTaskQueueSet<Task> OopTaskQueueSet; | |
516 | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
517 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
518 #define COMPRESSED_OOP_MASK 1 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
519 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
520 // This is a container class for either an oop* or a narrowOop*. |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
521 // Both are pushed onto a task queue and the consumer will test is_narrow() |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
522 // to determine which should be processed. |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
523 class StarTask { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
524 void* _holder; // either union oop* or narrowOop* |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
525 public: |
845
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
526 StarTask(narrowOop* p) { |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
527 assert(((uintptr_t)p & COMPRESSED_OOP_MASK) == 0, "Information loss!"); |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
528 _holder = (void *)((uintptr_t)p | COMPRESSED_OOP_MASK); |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
529 } |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
530 StarTask(oop* p) { |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
531 assert(((uintptr_t)p & COMPRESSED_OOP_MASK) == 0, "Information loss!"); |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
532 _holder = (void*)p; |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
533 } |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
534 StarTask() { _holder = NULL; } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
535 operator oop*() { return (oop*)_holder; } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
536 operator narrowOop*() { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
537 return (narrowOop*)((uintptr_t)_holder & ~COMPRESSED_OOP_MASK); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
538 } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
539 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
540 // Operators to preserve const/volatile in assignments required by gcc |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
541 void operator=(const volatile StarTask& t) volatile { _holder = t._holder; } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
542 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
543 bool is_narrow() const { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
544 return (((uintptr_t)_holder & COMPRESSED_OOP_MASK) != 0); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
545 } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
546 }; |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
547 |
0 | 548 typedef GenericTaskQueue<StarTask> OopStarTaskQueue; |
549 typedef GenericTaskQueueSet<StarTask> OopStarTaskQueueSet; | |
550 | |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
551 typedef size_t RegionTask; // index for region |
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
552 typedef GenericTaskQueue<RegionTask> RegionTaskQueue; |
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
553 typedef GenericTaskQueueSet<RegionTask> RegionTaskQueueSet; |
0 | 554 |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
555 class RegionTaskQueueWithOverflow: public CHeapObj { |
0 | 556 protected: |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
557 RegionTaskQueue _region_queue; |
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
558 GrowableArray<RegionTask>* _overflow_stack; |
0 | 559 |
560 public: | |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
561 RegionTaskQueueWithOverflow() : _overflow_stack(NULL) {} |
0 | 562 // Initialize both stealable queue and overflow |
563 void initialize(); | |
564 // Save first to stealable queue and then to overflow | |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
565 void save(RegionTask t); |
0 | 566 // Retrieve first from overflow and then from stealable queue |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
567 bool retrieve(RegionTask& region_index); |
0 | 568 // Retrieve from stealable queue |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
569 bool retrieve_from_stealable_queue(RegionTask& region_index); |
0 | 570 // Retrieve from overflow |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
571 bool retrieve_from_overflow(RegionTask& region_index); |
0 | 572 bool is_empty(); |
573 bool stealable_is_empty(); | |
574 bool overflow_is_empty(); | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
575 uint stealable_size() { return _region_queue.size(); } |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
576 RegionTaskQueue* task_queue() { return &_region_queue; } |
0 | 577 }; |
578 | |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
579 #define USE_RegionTaskQueueWithOverflow |