Mercurial > hg > truffle
annotate src/share/vm/utilities/taskqueue.hpp @ 1284:5f1f51edaff6
6928081: G1: rename parameters common with CMS
Summary: Rename marking stack sizing flags to be common between G1 and CMS
Reviewed-by: ysr, tonyp
author | jmasa |
---|---|
date | Wed, 24 Feb 2010 07:00:33 -0800 |
parents | 1ee412f7fec9 |
children | 2a1472c30599 |
rev | line source |
---|---|
0 | 1 /* |
579 | 2 * Copyright 2001-2009 Sun Microsystems, Inc. All Rights Reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
20 * CA 95054 USA or visit www.sun.com if you need additional information or | |
21 * have any questions. | |
22 * | |
23 */ | |
24 | |
25 class TaskQueueSuper: public CHeapObj { | |
26 protected: | |
907 | 27 // Internal type for indexing the queue; also used for the tag. |
28 typedef NOT_LP64(uint16_t) LP64_ONLY(uint32_t) idx_t; | |
29 | |
30 // The first free element after the last one pushed (mod N). | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
31 volatile uint _bottom; |
0 | 32 |
907 | 33 enum { |
34 N = 1 << NOT_LP64(14) LP64_ONLY(17), // Queue size: 16K or 128K | |
35 MOD_N_MASK = N - 1 // To compute x mod N efficiently. | |
0 | 36 }; |
907 | 37 |
38 class Age { | |
39 public: | |
40 Age(size_t data = 0) { _data = data; } | |
41 Age(const Age& age) { _data = age._data; } | |
42 Age(idx_t top, idx_t tag) { _fields._top = top; _fields._tag = tag; } | |
0 | 43 |
907 | 44 Age get() const volatile { return _data; } |
45 void set(Age age) volatile { _data = age._data; } | |
46 | |
47 idx_t top() const volatile { return _fields._top; } | |
48 idx_t tag() const volatile { return _fields._tag; } | |
0 | 49 |
907 | 50 // Increment top; if it wraps, increment tag also. |
51 void increment() { | |
52 _fields._top = increment_index(_fields._top); | |
53 if (_fields._top == 0) ++_fields._tag; | |
54 } | |
0 | 55 |
907 | 56 Age cmpxchg(const Age new_age, const Age old_age) volatile { |
57 return (size_t) Atomic::cmpxchg_ptr((intptr_t)new_age._data, | |
58 (volatile intptr_t *)&_data, | |
59 (intptr_t)old_age._data); | |
60 } | |
61 | |
62 bool operator ==(const Age& other) const { return _data == other._data; } | |
0 | 63 |
907 | 64 private: |
65 struct fields { | |
66 idx_t _top; | |
67 idx_t _tag; | |
68 }; | |
69 union { | |
70 size_t _data; | |
71 fields _fields; | |
72 }; | |
0 | 73 }; |
907 | 74 |
75 volatile Age _age; | |
76 | |
77 // These both operate mod N. | |
78 static uint increment_index(uint ind) { | |
79 return (ind + 1) & MOD_N_MASK; | |
0 | 80 } |
907 | 81 static uint decrement_index(uint ind) { |
82 return (ind - 1) & MOD_N_MASK; | |
0 | 83 } |
84 | |
907 | 85 // Returns a number in the range [0..N). If the result is "N-1", it should be |
86 // interpreted as 0. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
87 uint dirty_size(uint bot, uint top) { |
907 | 88 return (bot - top) & MOD_N_MASK; |
0 | 89 } |
90 | |
91 // Returns the size corresponding to the given "bot" and "top". | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
92 uint size(uint bot, uint top) { |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
93 uint sz = dirty_size(bot, top); |
907 | 94 // Has the queue "wrapped", so that bottom is less than top? There's a |
95 // complicated special case here. A pair of threads could perform pop_local | |
96 // and pop_global operations concurrently, starting from a state in which | |
97 // _bottom == _top+1. The pop_local could succeed in decrementing _bottom, | |
98 // and the pop_global in incrementing _top (in which case the pop_global | |
99 // will be awarded the contested queue element.) The resulting state must | |
100 // be interpreted as an empty queue. (We only need to worry about one such | |
101 // event: only the queue owner performs pop_local's, and several concurrent | |
102 // threads attempting to perform the pop_global will all perform the same | |
103 // CAS, and only one can succeed.) Any stealing thread that reads after | |
104 // either the increment or decrement will see an empty queue, and will not | |
105 // join the competitors. The "sz == -1 || sz == N-1" state will not be | |
106 // modified by concurrent queues, so the owner thread can reset the state to | |
107 // _bottom == top so subsequent pushes will be performed normally. | |
108 return (sz == N - 1) ? 0 : sz; | |
0 | 109 } |
110 | |
111 public: | |
112 TaskQueueSuper() : _bottom(0), _age() {} | |
113 | |
114 // Return "true" if the TaskQueue contains any tasks. | |
115 bool peek(); | |
116 | |
117 // Return an estimate of the number of elements in the queue. | |
118 // The "careful" version admits the possibility of pop_local/pop_global | |
119 // races. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
120 uint size() { |
907 | 121 return size(_bottom, _age.top()); |
0 | 122 } |
123 | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
124 uint dirty_size() { |
907 | 125 return dirty_size(_bottom, _age.top()); |
0 | 126 } |
127 | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
128 void set_empty() { |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
129 _bottom = 0; |
907 | 130 _age.set(0); |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
131 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
132 |
0 | 133 // Maximum number of elements allowed in the queue. This is two less |
134 // than the actual queue size, for somewhat complicated reasons. | |
907 | 135 uint max_elems() { return N - 2; } |
1284 | 136 |
137 // Total size of queue. | |
138 static const uint total_size() { return N; } | |
0 | 139 }; |
140 | |
141 template<class E> class GenericTaskQueue: public TaskQueueSuper { | |
142 private: | |
143 // Slow paths for push, pop_local. (pop_global has no fast path.) | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
144 bool push_slow(E t, uint dirty_n_elems); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
145 bool pop_local_slow(uint localBot, Age oldAge); |
0 | 146 |
147 public: | |
148 // Initializes the queue to empty. | |
149 GenericTaskQueue(); | |
150 | |
151 void initialize(); | |
152 | |
153 // Push the task "t" on the queue. Returns "false" iff the queue is | |
154 // full. | |
155 inline bool push(E t); | |
156 | |
157 // If succeeds in claiming a task (from the 'local' end, that is, the | |
158 // most recently pushed task), returns "true" and sets "t" to that task. | |
159 // Otherwise, the queue is empty and returns false. | |
160 inline bool pop_local(E& t); | |
161 | |
162 // If succeeds in claiming a task (from the 'global' end, that is, the | |
163 // least recently pushed task), returns "true" and sets "t" to that task. | |
164 // Otherwise, the queue is empty and returns false. | |
165 bool pop_global(E& t); | |
166 | |
167 // Delete any resource associated with the queue. | |
168 ~GenericTaskQueue(); | |
169 | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
170 // apply the closure to all elements in the task queue |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
171 void oops_do(OopClosure* f); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
172 |
0 | 173 private: |
174 // Element array. | |
175 volatile E* _elems; | |
176 }; | |
177 | |
178 template<class E> | |
179 GenericTaskQueue<E>::GenericTaskQueue():TaskQueueSuper() { | |
907 | 180 assert(sizeof(Age) == sizeof(size_t), "Depends on this."); |
0 | 181 } |
182 | |
183 template<class E> | |
184 void GenericTaskQueue<E>::initialize() { | |
907 | 185 _elems = NEW_C_HEAP_ARRAY(E, N); |
0 | 186 guarantee(_elems != NULL, "Allocation failed."); |
187 } | |
188 | |
189 template<class E> | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
190 void GenericTaskQueue<E>::oops_do(OopClosure* f) { |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
191 // tty->print_cr("START OopTaskQueue::oops_do"); |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
192 uint iters = size(); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
193 uint index = _bottom; |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
194 for (uint i = 0; i < iters; ++i) { |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
195 index = decrement_index(index); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
196 // tty->print_cr(" doing entry %d," INTPTR_T " -> " INTPTR_T, |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
197 // index, &_elems[index], _elems[index]); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
198 E* t = (E*)&_elems[index]; // cast away volatility |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
199 oop* p = (oop*)t; |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
200 assert((*t)->is_oop_or_null(), "Not an oop or null"); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
201 f->do_oop(p); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
202 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
203 // tty->print_cr("END OopTaskQueue::oops_do"); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
204 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
205 |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
206 |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
207 template<class E> |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
208 bool GenericTaskQueue<E>::push_slow(E t, uint dirty_n_elems) { |
907 | 209 if (dirty_n_elems == N - 1) { |
0 | 210 // Actually means 0, so do the push. |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
211 uint localBot = _bottom; |
0 | 212 _elems[localBot] = t; |
1024
2c03ce058f55
6888847: TaskQueue needs release_store() for correctness on RMO machines
bobv
parents:
907
diff
changeset
|
213 OrderAccess::release_store(&_bottom, increment_index(localBot)); |
0 | 214 return true; |
907 | 215 } |
216 return false; | |
0 | 217 } |
218 | |
219 template<class E> | |
220 bool GenericTaskQueue<E>:: | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
221 pop_local_slow(uint localBot, Age oldAge) { |
0 | 222 // This queue was observed to contain exactly one element; either this |
223 // thread will claim it, or a competing "pop_global". In either case, | |
224 // the queue will be logically empty afterwards. Create a new Age value | |
225 // that represents the empty queue for the given value of "_bottom". (We | |
226 // must also increment "tag" because of the case where "bottom == 1", | |
227 // "top == 0". A pop_global could read the queue element in that case, | |
228 // then have the owner thread do a pop followed by another push. Without | |
229 // the incrementing of "tag", the pop_global's CAS could succeed, | |
230 // allowing it to believe it has claimed the stale element.) | |
907 | 231 Age newAge((idx_t)localBot, oldAge.tag() + 1); |
0 | 232 // Perhaps a competing pop_global has already incremented "top", in which |
233 // case it wins the element. | |
234 if (localBot == oldAge.top()) { | |
235 // No competing pop_global has yet incremented "top"; we'll try to | |
236 // install new_age, thus claiming the element. | |
907 | 237 Age tempAge = _age.cmpxchg(newAge, oldAge); |
0 | 238 if (tempAge == oldAge) { |
239 // We win. | |
907 | 240 assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); |
0 | 241 return true; |
242 } | |
243 } | |
907 | 244 // We lose; a completing pop_global gets the element. But the queue is empty |
245 // and top is greater than bottom. Fix this representation of the empty queue | |
246 // to become the canonical one. | |
247 _age.set(newAge); | |
248 assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); | |
0 | 249 return false; |
250 } | |
251 | |
252 template<class E> | |
253 bool GenericTaskQueue<E>::pop_global(E& t) { | |
907 | 254 Age oldAge = _age.get(); |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
255 uint localBot = _bottom; |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
256 uint n_elems = size(localBot, oldAge.top()); |
0 | 257 if (n_elems == 0) { |
258 return false; | |
259 } | |
907 | 260 |
0 | 261 t = _elems[oldAge.top()]; |
907 | 262 Age newAge(oldAge); |
263 newAge.increment(); | |
264 Age resAge = _age.cmpxchg(newAge, oldAge); | |
265 | |
0 | 266 // Note that using "_bottom" here might fail, since a pop_local might |
267 // have decremented it. | |
907 | 268 assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity"); |
269 return resAge == oldAge; | |
0 | 270 } |
271 | |
272 template<class E> | |
273 GenericTaskQueue<E>::~GenericTaskQueue() { | |
274 FREE_C_HEAP_ARRAY(E, _elems); | |
275 } | |
276 | |
277 // Inherits the typedef of "Task" from above. | |
278 class TaskQueueSetSuper: public CHeapObj { | |
279 protected: | |
280 static int randomParkAndMiller(int* seed0); | |
281 public: | |
282 // Returns "true" if some TaskQueue in the set contains a task. | |
283 virtual bool peek() = 0; | |
284 }; | |
285 | |
286 template<class E> class GenericTaskQueueSet: public TaskQueueSetSuper { | |
287 private: | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
288 uint _n; |
0 | 289 GenericTaskQueue<E>** _queues; |
290 | |
291 public: | |
292 GenericTaskQueueSet(int n) : _n(n) { | |
293 typedef GenericTaskQueue<E>* GenericTaskQueuePtr; | |
294 _queues = NEW_C_HEAP_ARRAY(GenericTaskQueuePtr, n); | |
295 guarantee(_queues != NULL, "Allocation failure."); | |
296 for (int i = 0; i < n; i++) { | |
297 _queues[i] = NULL; | |
298 } | |
299 } | |
300 | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
301 bool steal_1_random(uint queue_num, int* seed, E& t); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
302 bool steal_best_of_2(uint queue_num, int* seed, E& t); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
303 bool steal_best_of_all(uint queue_num, int* seed, E& t); |
0 | 304 |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
305 void register_queue(uint i, GenericTaskQueue<E>* q); |
0 | 306 |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
307 GenericTaskQueue<E>* queue(uint n); |
0 | 308 |
309 // The thread with queue number "queue_num" (and whose random number seed | |
310 // is at "seed") is trying to steal a task from some other queue. (It | |
311 // may try several queues, according to some configuration parameter.) | |
312 // If some steal succeeds, returns "true" and sets "t" the stolen task, | |
313 // otherwise returns false. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
314 bool steal(uint queue_num, int* seed, E& t); |
0 | 315 |
316 bool peek(); | |
317 }; | |
318 | |
319 template<class E> | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
320 void GenericTaskQueueSet<E>::register_queue(uint i, GenericTaskQueue<E>* q) { |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
321 assert(i < _n, "index out of range."); |
0 | 322 _queues[i] = q; |
323 } | |
324 | |
325 template<class E> | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
326 GenericTaskQueue<E>* GenericTaskQueueSet<E>::queue(uint i) { |
0 | 327 return _queues[i]; |
328 } | |
329 | |
330 template<class E> | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
331 bool GenericTaskQueueSet<E>::steal(uint queue_num, int* seed, E& t) { |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
332 for (uint i = 0; i < 2 * _n; i++) |
0 | 333 if (steal_best_of_2(queue_num, seed, t)) |
334 return true; | |
335 return false; | |
336 } | |
337 | |
338 template<class E> | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
339 bool GenericTaskQueueSet<E>::steal_best_of_all(uint queue_num, int* seed, E& t) { |
0 | 340 if (_n > 2) { |
341 int best_k; | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
342 uint best_sz = 0; |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
343 for (uint k = 0; k < _n; k++) { |
0 | 344 if (k == queue_num) continue; |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
345 uint sz = _queues[k]->size(); |
0 | 346 if (sz > best_sz) { |
347 best_sz = sz; | |
348 best_k = k; | |
349 } | |
350 } | |
351 return best_sz > 0 && _queues[best_k]->pop_global(t); | |
352 } else if (_n == 2) { | |
353 // Just try the other one. | |
354 int k = (queue_num + 1) % 2; | |
355 return _queues[k]->pop_global(t); | |
356 } else { | |
357 assert(_n == 1, "can't be zero."); | |
358 return false; | |
359 } | |
360 } | |
361 | |
362 template<class E> | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
363 bool GenericTaskQueueSet<E>::steal_1_random(uint queue_num, int* seed, E& t) { |
0 | 364 if (_n > 2) { |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
365 uint k = queue_num; |
0 | 366 while (k == queue_num) k = randomParkAndMiller(seed) % _n; |
367 return _queues[2]->pop_global(t); | |
368 } else if (_n == 2) { | |
369 // Just try the other one. | |
370 int k = (queue_num + 1) % 2; | |
371 return _queues[k]->pop_global(t); | |
372 } else { | |
373 assert(_n == 1, "can't be zero."); | |
374 return false; | |
375 } | |
376 } | |
377 | |
378 template<class E> | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
379 bool GenericTaskQueueSet<E>::steal_best_of_2(uint queue_num, int* seed, E& t) { |
0 | 380 if (_n > 2) { |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
381 uint k1 = queue_num; |
0 | 382 while (k1 == queue_num) k1 = randomParkAndMiller(seed) % _n; |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
383 uint k2 = queue_num; |
0 | 384 while (k2 == queue_num || k2 == k1) k2 = randomParkAndMiller(seed) % _n; |
385 // Sample both and try the larger. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
386 uint sz1 = _queues[k1]->size(); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
387 uint sz2 = _queues[k2]->size(); |
0 | 388 if (sz2 > sz1) return _queues[k2]->pop_global(t); |
389 else return _queues[k1]->pop_global(t); | |
390 } else if (_n == 2) { | |
391 // Just try the other one. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
392 uint k = (queue_num + 1) % 2; |
0 | 393 return _queues[k]->pop_global(t); |
394 } else { | |
395 assert(_n == 1, "can't be zero."); | |
396 return false; | |
397 } | |
398 } | |
399 | |
400 template<class E> | |
401 bool GenericTaskQueueSet<E>::peek() { | |
402 // Try all the queues. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
403 for (uint j = 0; j < _n; j++) { |
0 | 404 if (_queues[j]->peek()) |
405 return true; | |
406 } | |
407 return false; | |
408 } | |
409 | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
410 // When to terminate from the termination protocol. |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
411 class TerminatorTerminator: public CHeapObj { |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
412 public: |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
413 virtual bool should_exit_termination() = 0; |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
414 }; |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
415 |
0 | 416 // A class to aid in the termination of a set of parallel tasks using |
417 // TaskQueueSet's for work stealing. | |
418 | |
546
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
419 #undef TRACESPINNING |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
420 |
0 | 421 class ParallelTaskTerminator: public StackObj { |
422 private: | |
423 int _n_threads; | |
424 TaskQueueSetSuper* _queue_set; | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
425 int _offered_termination; |
0 | 426 |
546
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
427 #ifdef TRACESPINNING |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
428 static uint _total_yields; |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
429 static uint _total_spins; |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
430 static uint _total_peeks; |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
431 #endif |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
432 |
0 | 433 bool peek_in_queue_set(); |
434 protected: | |
435 virtual void yield(); | |
436 void sleep(uint millis); | |
437 | |
438 public: | |
439 | |
440 // "n_threads" is the number of threads to be terminated. "queue_set" is a | |
441 // queue sets of work queues of other threads. | |
442 ParallelTaskTerminator(int n_threads, TaskQueueSetSuper* queue_set); | |
443 | |
444 // The current thread has no work, and is ready to terminate if everyone | |
445 // else is. If returns "true", all threads are terminated. If returns | |
446 // "false", available work has been observed in one of the task queues, | |
447 // so the global task is not complete. | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
448 bool offer_termination() { |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
449 return offer_termination(NULL); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
450 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
451 |
907 | 452 // As above, but it also terminates if the should_exit_termination() |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
453 // method of the terminator parameter returns true. If terminator is |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
454 // NULL, then it is ignored. |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
455 bool offer_termination(TerminatorTerminator* terminator); |
0 | 456 |
457 // Reset the terminator, so that it may be reused again. | |
458 // The caller is responsible for ensuring that this is done | |
459 // in an MT-safe manner, once the previous round of use of | |
460 // the terminator is finished. | |
461 void reset_for_reuse(); | |
462 | |
546
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
463 #ifdef TRACESPINNING |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
464 static uint total_yields() { return _total_yields; } |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
465 static uint total_spins() { return _total_spins; } |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
466 static uint total_peeks() { return _total_peeks; } |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
467 static void print_termination_counts(); |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
468 #endif |
0 | 469 }; |
470 | |
471 template<class E> inline bool GenericTaskQueue<E>::push(E t) { | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
472 uint localBot = _bottom; |
907 | 473 assert((localBot >= 0) && (localBot < N), "_bottom out of range."); |
474 idx_t top = _age.top(); | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
475 uint dirty_n_elems = dirty_size(localBot, top); |
907 | 476 assert((dirty_n_elems >= 0) && (dirty_n_elems < N), "n_elems out of range."); |
0 | 477 if (dirty_n_elems < max_elems()) { |
478 _elems[localBot] = t; | |
1024
2c03ce058f55
6888847: TaskQueue needs release_store() for correctness on RMO machines
bobv
parents:
907
diff
changeset
|
479 OrderAccess::release_store(&_bottom, increment_index(localBot)); |
0 | 480 return true; |
481 } else { | |
482 return push_slow(t, dirty_n_elems); | |
483 } | |
484 } | |
485 | |
486 template<class E> inline bool GenericTaskQueue<E>::pop_local(E& t) { | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
487 uint localBot = _bottom; |
907 | 488 // This value cannot be N-1. That can only occur as a result of |
0 | 489 // the assignment to bottom in this method. If it does, this method |
490 // resets the size( to 0 before the next call (which is sequential, | |
491 // since this is pop_local.) | |
907 | 492 uint dirty_n_elems = dirty_size(localBot, _age.top()); |
493 assert(dirty_n_elems != N - 1, "Shouldn't be possible..."); | |
0 | 494 if (dirty_n_elems == 0) return false; |
495 localBot = decrement_index(localBot); | |
496 _bottom = localBot; | |
497 // This is necessary to prevent any read below from being reordered | |
498 // before the store just above. | |
499 OrderAccess::fence(); | |
500 t = _elems[localBot]; | |
501 // This is a second read of "age"; the "size()" above is the first. | |
502 // If there's still at least one element in the queue, based on the | |
503 // "_bottom" and "age" we've read, then there can be no interference with | |
504 // a "pop_global" operation, and we're done. | |
907 | 505 idx_t tp = _age.top(); // XXX |
0 | 506 if (size(localBot, tp) > 0) { |
907 | 507 assert(dirty_size(localBot, tp) != N - 1, "sanity"); |
0 | 508 return true; |
509 } else { | |
510 // Otherwise, the queue contained exactly one element; we take the slow | |
511 // path. | |
907 | 512 return pop_local_slow(localBot, _age.get()); |
0 | 513 } |
514 } | |
515 | |
516 typedef oop Task; | |
517 typedef GenericTaskQueue<Task> OopTaskQueue; | |
518 typedef GenericTaskQueueSet<Task> OopTaskQueueSet; | |
519 | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
520 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
521 #define COMPRESSED_OOP_MASK 1 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
522 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
523 // This is a container class for either an oop* or a narrowOop*. |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
524 // Both are pushed onto a task queue and the consumer will test is_narrow() |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
525 // to determine which should be processed. |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
526 class StarTask { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
527 void* _holder; // either union oop* or narrowOop* |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
528 public: |
845
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
529 StarTask(narrowOop* p) { |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
530 assert(((uintptr_t)p & COMPRESSED_OOP_MASK) == 0, "Information loss!"); |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
531 _holder = (void *)((uintptr_t)p | COMPRESSED_OOP_MASK); |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
532 } |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
533 StarTask(oop* p) { |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
534 assert(((uintptr_t)p & COMPRESSED_OOP_MASK) == 0, "Information loss!"); |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
535 _holder = (void*)p; |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
536 } |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
537 StarTask() { _holder = NULL; } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
538 operator oop*() { return (oop*)_holder; } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
539 operator narrowOop*() { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
540 return (narrowOop*)((uintptr_t)_holder & ~COMPRESSED_OOP_MASK); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
541 } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
542 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
543 // Operators to preserve const/volatile in assignments required by gcc |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
544 void operator=(const volatile StarTask& t) volatile { _holder = t._holder; } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
545 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
546 bool is_narrow() const { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
547 return (((uintptr_t)_holder & COMPRESSED_OOP_MASK) != 0); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
548 } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
549 }; |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
550 |
0 | 551 typedef GenericTaskQueue<StarTask> OopStarTaskQueue; |
552 typedef GenericTaskQueueSet<StarTask> OopStarTaskQueueSet; | |
553 | |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
554 typedef size_t RegionTask; // index for region |
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
555 typedef GenericTaskQueue<RegionTask> RegionTaskQueue; |
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
556 typedef GenericTaskQueueSet<RegionTask> RegionTaskQueueSet; |
0 | 557 |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
558 class RegionTaskQueueWithOverflow: public CHeapObj { |
0 | 559 protected: |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
560 RegionTaskQueue _region_queue; |
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
561 GrowableArray<RegionTask>* _overflow_stack; |
0 | 562 |
563 public: | |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
564 RegionTaskQueueWithOverflow() : _overflow_stack(NULL) {} |
0 | 565 // Initialize both stealable queue and overflow |
566 void initialize(); | |
567 // Save first to stealable queue and then to overflow | |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
568 void save(RegionTask t); |
0 | 569 // Retrieve first from overflow and then from stealable queue |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
570 bool retrieve(RegionTask& region_index); |
0 | 571 // Retrieve from stealable queue |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
572 bool retrieve_from_stealable_queue(RegionTask& region_index); |
0 | 573 // Retrieve from overflow |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
574 bool retrieve_from_overflow(RegionTask& region_index); |
0 | 575 bool is_empty(); |
576 bool stealable_is_empty(); | |
577 bool overflow_is_empty(); | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
578 uint stealable_size() { return _region_queue.size(); } |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
579 RegionTaskQueue* task_queue() { return &_region_queue; } |
0 | 580 }; |
581 | |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
582 #define USE_RegionTaskQueueWithOverflow |