Mercurial > hg > graal-compiler
annotate src/share/vm/utilities/taskqueue.hpp @ 1091:6aa7255741f3
6906727: UseCompressedOops: some card-marking fixes related to object arrays
Summary: Introduced a new write_ref_array(HeapWords* start, size_t count) method that does the requisite MemRegion range calculation so (some of the) clients of the erstwhile write_ref_array(MemRegion mr) do not need to worry. This removed all external uses of array_size(), which was also simplified and made private. Asserts were added to catch other possible issues. Further, less essential, fixes stemming from this investigation are deferred to CR 6904516 (to follow shortly in hs17).
Reviewed-by: kvn, coleenp, jmasa
author | ysr |
---|---|
date | Thu, 03 Dec 2009 15:01:57 -0800 |
parents | 1ee412f7fec9 |
children | 5f1f51edaff6 |
rev | line source |
---|---|
0 | 1 /* |
579 | 2 * Copyright 2001-2009 Sun Microsystems, Inc. All Rights Reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
20 * CA 95054 USA or visit www.sun.com if you need additional information or | |
21 * have any questions. | |
22 * | |
23 */ | |
24 | |
25 class TaskQueueSuper: public CHeapObj { | |
26 protected: | |
907 | 27 // Internal type for indexing the queue; also used for the tag. |
28 typedef NOT_LP64(uint16_t) LP64_ONLY(uint32_t) idx_t; | |
29 | |
30 // The first free element after the last one pushed (mod N). | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
31 volatile uint _bottom; |
0 | 32 |
907 | 33 enum { |
34 N = 1 << NOT_LP64(14) LP64_ONLY(17), // Queue size: 16K or 128K | |
35 MOD_N_MASK = N - 1 // To compute x mod N efficiently. | |
0 | 36 }; |
907 | 37 |
38 class Age { | |
39 public: | |
40 Age(size_t data = 0) { _data = data; } | |
41 Age(const Age& age) { _data = age._data; } | |
42 Age(idx_t top, idx_t tag) { _fields._top = top; _fields._tag = tag; } | |
0 | 43 |
907 | 44 Age get() const volatile { return _data; } |
45 void set(Age age) volatile { _data = age._data; } | |
46 | |
47 idx_t top() const volatile { return _fields._top; } | |
48 idx_t tag() const volatile { return _fields._tag; } | |
0 | 49 |
907 | 50 // Increment top; if it wraps, increment tag also. |
51 void increment() { | |
52 _fields._top = increment_index(_fields._top); | |
53 if (_fields._top == 0) ++_fields._tag; | |
54 } | |
0 | 55 |
907 | 56 Age cmpxchg(const Age new_age, const Age old_age) volatile { |
57 return (size_t) Atomic::cmpxchg_ptr((intptr_t)new_age._data, | |
58 (volatile intptr_t *)&_data, | |
59 (intptr_t)old_age._data); | |
60 } | |
61 | |
62 bool operator ==(const Age& other) const { return _data == other._data; } | |
0 | 63 |
907 | 64 private: |
65 struct fields { | |
66 idx_t _top; | |
67 idx_t _tag; | |
68 }; | |
69 union { | |
70 size_t _data; | |
71 fields _fields; | |
72 }; | |
0 | 73 }; |
907 | 74 |
75 volatile Age _age; | |
76 | |
77 // These both operate mod N. | |
78 static uint increment_index(uint ind) { | |
79 return (ind + 1) & MOD_N_MASK; | |
0 | 80 } |
907 | 81 static uint decrement_index(uint ind) { |
82 return (ind - 1) & MOD_N_MASK; | |
0 | 83 } |
84 | |
907 | 85 // Returns a number in the range [0..N). If the result is "N-1", it should be |
86 // interpreted as 0. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
87 uint dirty_size(uint bot, uint top) { |
907 | 88 return (bot - top) & MOD_N_MASK; |
0 | 89 } |
90 | |
91 // Returns the size corresponding to the given "bot" and "top". | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
92 uint size(uint bot, uint top) { |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
93 uint sz = dirty_size(bot, top); |
907 | 94 // Has the queue "wrapped", so that bottom is less than top? There's a |
95 // complicated special case here. A pair of threads could perform pop_local | |
96 // and pop_global operations concurrently, starting from a state in which | |
97 // _bottom == _top+1. The pop_local could succeed in decrementing _bottom, | |
98 // and the pop_global in incrementing _top (in which case the pop_global | |
99 // will be awarded the contested queue element.) The resulting state must | |
100 // be interpreted as an empty queue. (We only need to worry about one such | |
101 // event: only the queue owner performs pop_local's, and several concurrent | |
102 // threads attempting to perform the pop_global will all perform the same | |
103 // CAS, and only one can succeed.) Any stealing thread that reads after | |
104 // either the increment or decrement will see an empty queue, and will not | |
105 // join the competitors. The "sz == -1 || sz == N-1" state will not be | |
106 // modified by concurrent queues, so the owner thread can reset the state to | |
107 // _bottom == top so subsequent pushes will be performed normally. | |
108 return (sz == N - 1) ? 0 : sz; | |
0 | 109 } |
110 | |
111 public: | |
112 TaskQueueSuper() : _bottom(0), _age() {} | |
113 | |
114 // Return "true" if the TaskQueue contains any tasks. | |
115 bool peek(); | |
116 | |
117 // Return an estimate of the number of elements in the queue. | |
118 // The "careful" version admits the possibility of pop_local/pop_global | |
119 // races. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
120 uint size() { |
907 | 121 return size(_bottom, _age.top()); |
0 | 122 } |
123 | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
124 uint dirty_size() { |
907 | 125 return dirty_size(_bottom, _age.top()); |
0 | 126 } |
127 | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
128 void set_empty() { |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
129 _bottom = 0; |
907 | 130 _age.set(0); |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
131 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
132 |
0 | 133 // Maximum number of elements allowed in the queue. This is two less |
134 // than the actual queue size, for somewhat complicated reasons. | |
907 | 135 uint max_elems() { return N - 2; } |
0 | 136 }; |
137 | |
138 template<class E> class GenericTaskQueue: public TaskQueueSuper { | |
139 private: | |
140 // Slow paths for push, pop_local. (pop_global has no fast path.) | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
141 bool push_slow(E t, uint dirty_n_elems); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
142 bool pop_local_slow(uint localBot, Age oldAge); |
0 | 143 |
144 public: | |
145 // Initializes the queue to empty. | |
146 GenericTaskQueue(); | |
147 | |
148 void initialize(); | |
149 | |
150 // Push the task "t" on the queue. Returns "false" iff the queue is | |
151 // full. | |
152 inline bool push(E t); | |
153 | |
154 // If succeeds in claiming a task (from the 'local' end, that is, the | |
155 // most recently pushed task), returns "true" and sets "t" to that task. | |
156 // Otherwise, the queue is empty and returns false. | |
157 inline bool pop_local(E& t); | |
158 | |
159 // If succeeds in claiming a task (from the 'global' end, that is, the | |
160 // least recently pushed task), returns "true" and sets "t" to that task. | |
161 // Otherwise, the queue is empty and returns false. | |
162 bool pop_global(E& t); | |
163 | |
164 // Delete any resource associated with the queue. | |
165 ~GenericTaskQueue(); | |
166 | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
167 // apply the closure to all elements in the task queue |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
168 void oops_do(OopClosure* f); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
169 |
0 | 170 private: |
171 // Element array. | |
172 volatile E* _elems; | |
173 }; | |
174 | |
175 template<class E> | |
176 GenericTaskQueue<E>::GenericTaskQueue():TaskQueueSuper() { | |
907 | 177 assert(sizeof(Age) == sizeof(size_t), "Depends on this."); |
0 | 178 } |
179 | |
180 template<class E> | |
181 void GenericTaskQueue<E>::initialize() { | |
907 | 182 _elems = NEW_C_HEAP_ARRAY(E, N); |
0 | 183 guarantee(_elems != NULL, "Allocation failed."); |
184 } | |
185 | |
186 template<class E> | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
187 void GenericTaskQueue<E>::oops_do(OopClosure* f) { |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
188 // tty->print_cr("START OopTaskQueue::oops_do"); |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
189 uint iters = size(); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
190 uint index = _bottom; |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
191 for (uint i = 0; i < iters; ++i) { |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
192 index = decrement_index(index); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
193 // tty->print_cr(" doing entry %d," INTPTR_T " -> " INTPTR_T, |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
194 // index, &_elems[index], _elems[index]); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
195 E* t = (E*)&_elems[index]; // cast away volatility |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
196 oop* p = (oop*)t; |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
197 assert((*t)->is_oop_or_null(), "Not an oop or null"); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
198 f->do_oop(p); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
199 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
200 // tty->print_cr("END OopTaskQueue::oops_do"); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
201 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
202 |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
203 |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
204 template<class E> |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
205 bool GenericTaskQueue<E>::push_slow(E t, uint dirty_n_elems) { |
907 | 206 if (dirty_n_elems == N - 1) { |
0 | 207 // Actually means 0, so do the push. |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
208 uint localBot = _bottom; |
0 | 209 _elems[localBot] = t; |
1024
2c03ce058f55
6888847: TaskQueue needs release_store() for correctness on RMO machines
bobv
parents:
907
diff
changeset
|
210 OrderAccess::release_store(&_bottom, increment_index(localBot)); |
0 | 211 return true; |
907 | 212 } |
213 return false; | |
0 | 214 } |
215 | |
216 template<class E> | |
217 bool GenericTaskQueue<E>:: | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
218 pop_local_slow(uint localBot, Age oldAge) { |
0 | 219 // This queue was observed to contain exactly one element; either this |
220 // thread will claim it, or a competing "pop_global". In either case, | |
221 // the queue will be logically empty afterwards. Create a new Age value | |
222 // that represents the empty queue for the given value of "_bottom". (We | |
223 // must also increment "tag" because of the case where "bottom == 1", | |
224 // "top == 0". A pop_global could read the queue element in that case, | |
225 // then have the owner thread do a pop followed by another push. Without | |
226 // the incrementing of "tag", the pop_global's CAS could succeed, | |
227 // allowing it to believe it has claimed the stale element.) | |
907 | 228 Age newAge((idx_t)localBot, oldAge.tag() + 1); |
0 | 229 // Perhaps a competing pop_global has already incremented "top", in which |
230 // case it wins the element. | |
231 if (localBot == oldAge.top()) { | |
232 // No competing pop_global has yet incremented "top"; we'll try to | |
233 // install new_age, thus claiming the element. | |
907 | 234 Age tempAge = _age.cmpxchg(newAge, oldAge); |
0 | 235 if (tempAge == oldAge) { |
236 // We win. | |
907 | 237 assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); |
0 | 238 return true; |
239 } | |
240 } | |
907 | 241 // We lose; a completing pop_global gets the element. But the queue is empty |
242 // and top is greater than bottom. Fix this representation of the empty queue | |
243 // to become the canonical one. | |
244 _age.set(newAge); | |
245 assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); | |
0 | 246 return false; |
247 } | |
248 | |
249 template<class E> | |
250 bool GenericTaskQueue<E>::pop_global(E& t) { | |
907 | 251 Age oldAge = _age.get(); |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
252 uint localBot = _bottom; |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
253 uint n_elems = size(localBot, oldAge.top()); |
0 | 254 if (n_elems == 0) { |
255 return false; | |
256 } | |
907 | 257 |
0 | 258 t = _elems[oldAge.top()]; |
907 | 259 Age newAge(oldAge); |
260 newAge.increment(); | |
261 Age resAge = _age.cmpxchg(newAge, oldAge); | |
262 | |
0 | 263 // Note that using "_bottom" here might fail, since a pop_local might |
264 // have decremented it. | |
907 | 265 assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity"); |
266 return resAge == oldAge; | |
0 | 267 } |
268 | |
269 template<class E> | |
270 GenericTaskQueue<E>::~GenericTaskQueue() { | |
271 FREE_C_HEAP_ARRAY(E, _elems); | |
272 } | |
273 | |
274 // Inherits the typedef of "Task" from above. | |
275 class TaskQueueSetSuper: public CHeapObj { | |
276 protected: | |
277 static int randomParkAndMiller(int* seed0); | |
278 public: | |
279 // Returns "true" if some TaskQueue in the set contains a task. | |
280 virtual bool peek() = 0; | |
281 }; | |
282 | |
283 template<class E> class GenericTaskQueueSet: public TaskQueueSetSuper { | |
284 private: | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
285 uint _n; |
0 | 286 GenericTaskQueue<E>** _queues; |
287 | |
288 public: | |
289 GenericTaskQueueSet(int n) : _n(n) { | |
290 typedef GenericTaskQueue<E>* GenericTaskQueuePtr; | |
291 _queues = NEW_C_HEAP_ARRAY(GenericTaskQueuePtr, n); | |
292 guarantee(_queues != NULL, "Allocation failure."); | |
293 for (int i = 0; i < n; i++) { | |
294 _queues[i] = NULL; | |
295 } | |
296 } | |
297 | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
298 bool steal_1_random(uint queue_num, int* seed, E& t); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
299 bool steal_best_of_2(uint queue_num, int* seed, E& t); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
300 bool steal_best_of_all(uint queue_num, int* seed, E& t); |
0 | 301 |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
302 void register_queue(uint i, GenericTaskQueue<E>* q); |
0 | 303 |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
304 GenericTaskQueue<E>* queue(uint n); |
0 | 305 |
306 // The thread with queue number "queue_num" (and whose random number seed | |
307 // is at "seed") is trying to steal a task from some other queue. (It | |
308 // may try several queues, according to some configuration parameter.) | |
309 // If some steal succeeds, returns "true" and sets "t" the stolen task, | |
310 // otherwise returns false. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
311 bool steal(uint queue_num, int* seed, E& t); |
0 | 312 |
313 bool peek(); | |
314 }; | |
315 | |
316 template<class E> | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
317 void GenericTaskQueueSet<E>::register_queue(uint i, GenericTaskQueue<E>* q) { |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
318 assert(i < _n, "index out of range."); |
0 | 319 _queues[i] = q; |
320 } | |
321 | |
322 template<class E> | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
323 GenericTaskQueue<E>* GenericTaskQueueSet<E>::queue(uint i) { |
0 | 324 return _queues[i]; |
325 } | |
326 | |
327 template<class E> | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
328 bool GenericTaskQueueSet<E>::steal(uint queue_num, int* seed, E& t) { |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
329 for (uint i = 0; i < 2 * _n; i++) |
0 | 330 if (steal_best_of_2(queue_num, seed, t)) |
331 return true; | |
332 return false; | |
333 } | |
334 | |
335 template<class E> | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
336 bool GenericTaskQueueSet<E>::steal_best_of_all(uint queue_num, int* seed, E& t) { |
0 | 337 if (_n > 2) { |
338 int best_k; | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
339 uint best_sz = 0; |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
340 for (uint k = 0; k < _n; k++) { |
0 | 341 if (k == queue_num) continue; |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
342 uint sz = _queues[k]->size(); |
0 | 343 if (sz > best_sz) { |
344 best_sz = sz; | |
345 best_k = k; | |
346 } | |
347 } | |
348 return best_sz > 0 && _queues[best_k]->pop_global(t); | |
349 } else if (_n == 2) { | |
350 // Just try the other one. | |
351 int k = (queue_num + 1) % 2; | |
352 return _queues[k]->pop_global(t); | |
353 } else { | |
354 assert(_n == 1, "can't be zero."); | |
355 return false; | |
356 } | |
357 } | |
358 | |
359 template<class E> | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
360 bool GenericTaskQueueSet<E>::steal_1_random(uint queue_num, int* seed, E& t) { |
0 | 361 if (_n > 2) { |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
362 uint k = queue_num; |
0 | 363 while (k == queue_num) k = randomParkAndMiller(seed) % _n; |
364 return _queues[2]->pop_global(t); | |
365 } else if (_n == 2) { | |
366 // Just try the other one. | |
367 int k = (queue_num + 1) % 2; | |
368 return _queues[k]->pop_global(t); | |
369 } else { | |
370 assert(_n == 1, "can't be zero."); | |
371 return false; | |
372 } | |
373 } | |
374 | |
375 template<class E> | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
376 bool GenericTaskQueueSet<E>::steal_best_of_2(uint queue_num, int* seed, E& t) { |
0 | 377 if (_n > 2) { |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
378 uint k1 = queue_num; |
0 | 379 while (k1 == queue_num) k1 = randomParkAndMiller(seed) % _n; |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
380 uint k2 = queue_num; |
0 | 381 while (k2 == queue_num || k2 == k1) k2 = randomParkAndMiller(seed) % _n; |
382 // Sample both and try the larger. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
383 uint sz1 = _queues[k1]->size(); |
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
384 uint sz2 = _queues[k2]->size(); |
0 | 385 if (sz2 > sz1) return _queues[k2]->pop_global(t); |
386 else return _queues[k1]->pop_global(t); | |
387 } else if (_n == 2) { | |
388 // Just try the other one. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
389 uint k = (queue_num + 1) % 2; |
0 | 390 return _queues[k]->pop_global(t); |
391 } else { | |
392 assert(_n == 1, "can't be zero."); | |
393 return false; | |
394 } | |
395 } | |
396 | |
397 template<class E> | |
398 bool GenericTaskQueueSet<E>::peek() { | |
399 // Try all the queues. | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
400 for (uint j = 0; j < _n; j++) { |
0 | 401 if (_queues[j]->peek()) |
402 return true; | |
403 } | |
404 return false; | |
405 } | |
406 | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
407 // When to terminate from the termination protocol. |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
408 class TerminatorTerminator: public CHeapObj { |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
409 public: |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
410 virtual bool should_exit_termination() = 0; |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
411 }; |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
412 |
0 | 413 // A class to aid in the termination of a set of parallel tasks using |
414 // TaskQueueSet's for work stealing. | |
415 | |
546
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
416 #undef TRACESPINNING |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
417 |
0 | 418 class ParallelTaskTerminator: public StackObj { |
419 private: | |
420 int _n_threads; | |
421 TaskQueueSetSuper* _queue_set; | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
422 int _offered_termination; |
0 | 423 |
546
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
424 #ifdef TRACESPINNING |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
425 static uint _total_yields; |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
426 static uint _total_spins; |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
427 static uint _total_peeks; |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
428 #endif |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
429 |
0 | 430 bool peek_in_queue_set(); |
431 protected: | |
432 virtual void yield(); | |
433 void sleep(uint millis); | |
434 | |
435 public: | |
436 | |
437 // "n_threads" is the number of threads to be terminated. "queue_set" is a | |
438 // queue sets of work queues of other threads. | |
439 ParallelTaskTerminator(int n_threads, TaskQueueSetSuper* queue_set); | |
440 | |
441 // The current thread has no work, and is ready to terminate if everyone | |
442 // else is. If returns "true", all threads are terminated. If returns | |
443 // "false", available work has been observed in one of the task queues, | |
444 // so the global task is not complete. | |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
445 bool offer_termination() { |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
446 return offer_termination(NULL); |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
447 } |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
448 |
907 | 449 // As above, but it also terminates if the should_exit_termination() |
342
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
450 // method of the terminator parameter returns true. If terminator is |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
451 // NULL, then it is ignored. |
37f87013dfd8
6711316: Open source the Garbage-First garbage collector
ysr
parents:
113
diff
changeset
|
452 bool offer_termination(TerminatorTerminator* terminator); |
0 | 453 |
454 // Reset the terminator, so that it may be reused again. | |
455 // The caller is responsible for ensuring that this is done | |
456 // in an MT-safe manner, once the previous round of use of | |
457 // the terminator is finished. | |
458 void reset_for_reuse(); | |
459 | |
546
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
460 #ifdef TRACESPINNING |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
461 static uint total_yields() { return _total_yields; } |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
462 static uint total_spins() { return _total_spins; } |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
463 static uint total_peeks() { return _total_peeks; } |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
464 static void print_termination_counts(); |
05c6d52fa7a9
6690928: Use spinning in combination with yields for workstealing termination.
jmasa
parents:
541
diff
changeset
|
465 #endif |
0 | 466 }; |
467 | |
468 template<class E> inline bool GenericTaskQueue<E>::push(E t) { | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
469 uint localBot = _bottom; |
907 | 470 assert((localBot >= 0) && (localBot < N), "_bottom out of range."); |
471 idx_t top = _age.top(); | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
472 uint dirty_n_elems = dirty_size(localBot, top); |
907 | 473 assert((dirty_n_elems >= 0) && (dirty_n_elems < N), "n_elems out of range."); |
0 | 474 if (dirty_n_elems < max_elems()) { |
475 _elems[localBot] = t; | |
1024
2c03ce058f55
6888847: TaskQueue needs release_store() for correctness on RMO machines
bobv
parents:
907
diff
changeset
|
476 OrderAccess::release_store(&_bottom, increment_index(localBot)); |
0 | 477 return true; |
478 } else { | |
479 return push_slow(t, dirty_n_elems); | |
480 } | |
481 } | |
482 | |
483 template<class E> inline bool GenericTaskQueue<E>::pop_local(E& t) { | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
484 uint localBot = _bottom; |
907 | 485 // This value cannot be N-1. That can only occur as a result of |
0 | 486 // the assignment to bottom in this method. If it does, this method |
487 // resets the size( to 0 before the next call (which is sequential, | |
488 // since this is pop_local.) | |
907 | 489 uint dirty_n_elems = dirty_size(localBot, _age.top()); |
490 assert(dirty_n_elems != N - 1, "Shouldn't be possible..."); | |
0 | 491 if (dirty_n_elems == 0) return false; |
492 localBot = decrement_index(localBot); | |
493 _bottom = localBot; | |
494 // This is necessary to prevent any read below from being reordered | |
495 // before the store just above. | |
496 OrderAccess::fence(); | |
497 t = _elems[localBot]; | |
498 // This is a second read of "age"; the "size()" above is the first. | |
499 // If there's still at least one element in the queue, based on the | |
500 // "_bottom" and "age" we've read, then there can be no interference with | |
501 // a "pop_global" operation, and we're done. | |
907 | 502 idx_t tp = _age.top(); // XXX |
0 | 503 if (size(localBot, tp) > 0) { |
907 | 504 assert(dirty_size(localBot, tp) != N - 1, "sanity"); |
0 | 505 return true; |
506 } else { | |
507 // Otherwise, the queue contained exactly one element; we take the slow | |
508 // path. | |
907 | 509 return pop_local_slow(localBot, _age.get()); |
0 | 510 } |
511 } | |
512 | |
513 typedef oop Task; | |
514 typedef GenericTaskQueue<Task> OopTaskQueue; | |
515 typedef GenericTaskQueueSet<Task> OopTaskQueueSet; | |
516 | |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
517 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
518 #define COMPRESSED_OOP_MASK 1 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
519 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
520 // This is a container class for either an oop* or a narrowOop*. |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
521 // Both are pushed onto a task queue and the consumer will test is_narrow() |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
522 // to determine which should be processed. |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
523 class StarTask { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
524 void* _holder; // either union oop* or narrowOop* |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
525 public: |
845
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
526 StarTask(narrowOop* p) { |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
527 assert(((uintptr_t)p & COMPRESSED_OOP_MASK) == 0, "Information loss!"); |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
528 _holder = (void *)((uintptr_t)p | COMPRESSED_OOP_MASK); |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
529 } |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
530 StarTask(oop* p) { |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
531 assert(((uintptr_t)p & COMPRESSED_OOP_MASK) == 0, "Information loss!"); |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
532 _holder = (void*)p; |
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
579
diff
changeset
|
533 } |
113
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
534 StarTask() { _holder = NULL; } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
535 operator oop*() { return (oop*)_holder; } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
536 operator narrowOop*() { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
537 return (narrowOop*)((uintptr_t)_holder & ~COMPRESSED_OOP_MASK); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
538 } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
539 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
540 // Operators to preserve const/volatile in assignments required by gcc |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
541 void operator=(const volatile StarTask& t) volatile { _holder = t._holder; } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
542 |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
543 bool is_narrow() const { |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
544 return (((uintptr_t)_holder & COMPRESSED_OOP_MASK) != 0); |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
545 } |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
546 }; |
ba764ed4b6f2
6420645: Create a vm that uses compressed oops for up to 32gb heapsizes
coleenp
parents:
0
diff
changeset
|
547 |
0 | 548 typedef GenericTaskQueue<StarTask> OopStarTaskQueue; |
549 typedef GenericTaskQueueSet<StarTask> OopStarTaskQueueSet; | |
550 | |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
551 typedef size_t RegionTask; // index for region |
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
552 typedef GenericTaskQueue<RegionTask> RegionTaskQueue; |
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
553 typedef GenericTaskQueueSet<RegionTask> RegionTaskQueueSet; |
0 | 554 |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
555 class RegionTaskQueueWithOverflow: public CHeapObj { |
0 | 556 protected: |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
557 RegionTaskQueue _region_queue; |
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
558 GrowableArray<RegionTask>* _overflow_stack; |
0 | 559 |
560 public: | |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
561 RegionTaskQueueWithOverflow() : _overflow_stack(NULL) {} |
0 | 562 // Initialize both stealable queue and overflow |
563 void initialize(); | |
564 // Save first to stealable queue and then to overflow | |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
565 void save(RegionTask t); |
0 | 566 // Retrieve first from overflow and then from stealable queue |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
567 bool retrieve(RegionTask& region_index); |
0 | 568 // Retrieve from stealable queue |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
569 bool retrieve_from_stealable_queue(RegionTask& region_index); |
0 | 570 // Retrieve from overflow |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
571 bool retrieve_from_overflow(RegionTask& region_index); |
0 | 572 bool is_empty(); |
573 bool stealable_is_empty(); | |
574 bool overflow_is_empty(); | |
541
23673011938d
6787254: Work queue capacity can be increased substantially on some platforms
ysr
parents:
375
diff
changeset
|
575 uint stealable_size() { return _region_queue.size(); } |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
576 RegionTaskQueue* task_queue() { return &_region_queue; } |
0 | 577 }; |
578 | |
375
81cd571500b0
6725697: par compact - rename class ChunkData to RegionData
jcoomes
parents:
356
diff
changeset
|
579 #define USE_RegionTaskQueueWithOverflow |