Mercurial > hg > truffle
comparison src/share/vm/utilities/taskqueue.hpp @ 907:3ee342e25e57
6821693: 64-bit TaskQueue capacity still too small
6821507: Alignment problem in GC taskqueue
Reviewed-by: tonyp, apetrusenko
author | jcoomes |
---|---|
date | Wed, 05 Aug 2009 12:33:29 -0700 |
parents | df6caf649ff7 |
children | 2c03ce058f55 |
comparison
equal
deleted
inserted
replaced
891:703065c670fa | 907:3ee342e25e57 |
---|---|
20 * CA 95054 USA or visit www.sun.com if you need additional information or | 20 * CA 95054 USA or visit www.sun.com if you need additional information or |
21 * have any questions. | 21 * have any questions. |
22 * | 22 * |
23 */ | 23 */ |
24 | 24 |
25 #ifdef LP64 | |
26 typedef juint TAG_TYPE; | |
27 // for a taskqueue size of 4M | |
28 #define LOG_TASKQ_SIZE 22 | |
29 #else | |
30 typedef jushort TAG_TYPE; | |
31 // for a taskqueue size of 16K | |
32 #define LOG_TASKQ_SIZE 14 | |
33 #endif | |
34 | |
35 class TaskQueueSuper: public CHeapObj { | 25 class TaskQueueSuper: public CHeapObj { |
36 protected: | 26 protected: |
37 // The first free element after the last one pushed (mod _n). | 27 // Internal type for indexing the queue; also used for the tag. |
28 typedef NOT_LP64(uint16_t) LP64_ONLY(uint32_t) idx_t; | |
29 | |
30 // The first free element after the last one pushed (mod N). | |
38 volatile uint _bottom; | 31 volatile uint _bottom; |
39 | 32 |
40 // log2 of the size of the queue. | 33 enum { |
41 enum SomeProtectedConstants { | 34 N = 1 << NOT_LP64(14) LP64_ONLY(17), // Queue size: 16K or 128K |
42 Log_n = LOG_TASKQ_SIZE | 35 MOD_N_MASK = N - 1 // To compute x mod N efficiently. |
43 }; | 36 }; |
44 #undef LOG_TASKQ_SIZE | 37 |
45 | 38 class Age { |
46 // Size of the queue. | 39 public: |
47 uint n() { return (1 << Log_n); } | 40 Age(size_t data = 0) { _data = data; } |
48 // For computing "x mod n" efficiently. | 41 Age(const Age& age) { _data = age._data; } |
49 uint n_mod_mask() { return n() - 1; } | 42 Age(idx_t top, idx_t tag) { _fields._top = top; _fields._tag = tag; } |
50 | 43 |
51 struct Age { | 44 Age get() const volatile { return _data; } |
52 TAG_TYPE _top; | 45 void set(Age age) volatile { _data = age._data; } |
53 TAG_TYPE _tag; | 46 |
54 | 47 idx_t top() const volatile { return _fields._top; } |
55 TAG_TYPE tag() const { return _tag; } | 48 idx_t tag() const volatile { return _fields._tag; } |
56 TAG_TYPE top() const { return _top; } | 49 |
57 | 50 // Increment top; if it wraps, increment tag also. |
58 Age() { _tag = 0; _top = 0; } | 51 void increment() { |
59 | 52 _fields._top = increment_index(_fields._top); |
60 friend bool operator ==(const Age& a1, const Age& a2) { | 53 if (_fields._top == 0) ++_fields._tag; |
61 return a1.tag() == a2.tag() && a1.top() == a2.top(); | |
62 } | 54 } |
55 | |
56 Age cmpxchg(const Age new_age, const Age old_age) volatile { | |
57 return (size_t) Atomic::cmpxchg_ptr((intptr_t)new_age._data, | |
58 (volatile intptr_t *)&_data, | |
59 (intptr_t)old_age._data); | |
60 } | |
61 | |
62 bool operator ==(const Age& other) const { return _data == other._data; } | |
63 | |
64 private: | |
65 struct fields { | |
66 idx_t _top; | |
67 idx_t _tag; | |
68 }; | |
69 union { | |
70 size_t _data; | |
71 fields _fields; | |
72 }; | |
63 }; | 73 }; |
64 Age _age; | 74 |
65 // These make sure we do single atomic reads and writes. | 75 volatile Age _age; |
66 Age get_age() { | 76 |
67 uint res = *(volatile uint*)(&_age); | 77 // These both operate mod N. |
68 return *(Age*)(&res); | 78 static uint increment_index(uint ind) { |
69 } | 79 return (ind + 1) & MOD_N_MASK; |
70 void set_age(Age a) { | 80 } |
71 *(volatile uint*)(&_age) = *(uint*)(&a); | 81 static uint decrement_index(uint ind) { |
72 } | 82 return (ind - 1) & MOD_N_MASK; |
73 | 83 } |
74 TAG_TYPE get_top() { | 84 |
75 return get_age().top(); | 85 // Returns a number in the range [0..N). If the result is "N-1", it should be |
76 } | 86 // interpreted as 0. |
77 | |
78 // These both operate mod _n. | |
79 uint increment_index(uint ind) { | |
80 return (ind + 1) & n_mod_mask(); | |
81 } | |
82 uint decrement_index(uint ind) { | |
83 return (ind - 1) & n_mod_mask(); | |
84 } | |
85 | |
86 // Returns a number in the range [0.._n). If the result is "n-1", it | |
87 // should be interpreted as 0. | |
88 uint dirty_size(uint bot, uint top) { | 87 uint dirty_size(uint bot, uint top) { |
89 return ((int)bot - (int)top) & n_mod_mask(); | 88 return (bot - top) & MOD_N_MASK; |
90 } | 89 } |
91 | 90 |
92 // Returns the size corresponding to the given "bot" and "top". | 91 // Returns the size corresponding to the given "bot" and "top". |
93 uint size(uint bot, uint top) { | 92 uint size(uint bot, uint top) { |
94 uint sz = dirty_size(bot, top); | 93 uint sz = dirty_size(bot, top); |
95 // Has the queue "wrapped", so that bottom is less than top? | 94 // Has the queue "wrapped", so that bottom is less than top? There's a |
96 // There's a complicated special case here. A pair of threads could | 95 // complicated special case here. A pair of threads could perform pop_local |
97 // perform pop_local and pop_global operations concurrently, starting | 96 // and pop_global operations concurrently, starting from a state in which |
98 // from a state in which _bottom == _top+1. The pop_local could | 97 // _bottom == _top+1. The pop_local could succeed in decrementing _bottom, |
99 // succeed in decrementing _bottom, and the pop_global in incrementing | 98 // and the pop_global in incrementing _top (in which case the pop_global |
100 // _top (in which case the pop_global will be awarded the contested | 99 // will be awarded the contested queue element.) The resulting state must |
101 // queue element.) The resulting state must be interpreted as an empty | 100 // be interpreted as an empty queue. (We only need to worry about one such |
102 // queue. (We only need to worry about one such event: only the queue | 101 // event: only the queue owner performs pop_local's, and several concurrent |
103 // owner performs pop_local's, and several concurrent threads | 102 // threads attempting to perform the pop_global will all perform the same |
104 // attempting to perform the pop_global will all perform the same CAS, | 103 // CAS, and only one can succeed.) Any stealing thread that reads after |
105 // and only one can succeed. Any stealing thread that reads after | 104 // either the increment or decrement will see an empty queue, and will not |
106 // either the increment or decrement will see an empty queue, and will | 105 // join the competitors. The "sz == -1 || sz == N-1" state will not be |
107 // not join the competitors. The "sz == -1 || sz == _n-1" state will | 106 // modified by concurrent queues, so the owner thread can reset the state to |
108 // not be modified by concurrent queues, so the owner thread can reset | 107 // _bottom == top so subsequent pushes will be performed normally. |
109 // the state to _bottom == top so subsequent pushes will be performed | 108 return (sz == N - 1) ? 0 : sz; |
110 // normally. | |
111 if (sz == (n()-1)) return 0; | |
112 else return sz; | |
113 } | 109 } |
114 | 110 |
115 public: | 111 public: |
116 TaskQueueSuper() : _bottom(0), _age() {} | 112 TaskQueueSuper() : _bottom(0), _age() {} |
117 | 113 |
120 | 116 |
121 // Return an estimate of the number of elements in the queue. | 117 // Return an estimate of the number of elements in the queue. |
122 // The "careful" version admits the possibility of pop_local/pop_global | 118 // The "careful" version admits the possibility of pop_local/pop_global |
123 // races. | 119 // races. |
124 uint size() { | 120 uint size() { |
125 return size(_bottom, get_top()); | 121 return size(_bottom, _age.top()); |
126 } | 122 } |
127 | 123 |
128 uint dirty_size() { | 124 uint dirty_size() { |
129 return dirty_size(_bottom, get_top()); | 125 return dirty_size(_bottom, _age.top()); |
130 } | 126 } |
131 | 127 |
132 void set_empty() { | 128 void set_empty() { |
133 _bottom = 0; | 129 _bottom = 0; |
134 _age = Age(); | 130 _age.set(0); |
135 } | 131 } |
136 | 132 |
137 // Maximum number of elements allowed in the queue. This is two less | 133 // Maximum number of elements allowed in the queue. This is two less |
138 // than the actual queue size, for somewhat complicated reasons. | 134 // than the actual queue size, for somewhat complicated reasons. |
139 uint max_elems() { return n() - 2; } | 135 uint max_elems() { return N - 2; } |
140 | |
141 }; | 136 }; |
142 | 137 |
143 template<class E> class GenericTaskQueue: public TaskQueueSuper { | 138 template<class E> class GenericTaskQueue: public TaskQueueSuper { |
144 private: | 139 private: |
145 // Slow paths for push, pop_local. (pop_global has no fast path.) | 140 // Slow paths for push, pop_local. (pop_global has no fast path.) |
177 volatile E* _elems; | 172 volatile E* _elems; |
178 }; | 173 }; |
179 | 174 |
180 template<class E> | 175 template<class E> |
181 GenericTaskQueue<E>::GenericTaskQueue():TaskQueueSuper() { | 176 GenericTaskQueue<E>::GenericTaskQueue():TaskQueueSuper() { |
182 assert(sizeof(Age) == sizeof(int), "Depends on this."); | 177 assert(sizeof(Age) == sizeof(size_t), "Depends on this."); |
183 } | 178 } |
184 | 179 |
185 template<class E> | 180 template<class E> |
186 void GenericTaskQueue<E>::initialize() { | 181 void GenericTaskQueue<E>::initialize() { |
187 _elems = NEW_C_HEAP_ARRAY(E, n()); | 182 _elems = NEW_C_HEAP_ARRAY(E, N); |
188 guarantee(_elems != NULL, "Allocation failed."); | 183 guarantee(_elems != NULL, "Allocation failed."); |
189 } | 184 } |
190 | 185 |
191 template<class E> | 186 template<class E> |
192 void GenericTaskQueue<E>::oops_do(OopClosure* f) { | 187 void GenericTaskQueue<E>::oops_do(OopClosure* f) { |
206 } | 201 } |
207 | 202 |
208 | 203 |
209 template<class E> | 204 template<class E> |
210 bool GenericTaskQueue<E>::push_slow(E t, uint dirty_n_elems) { | 205 bool GenericTaskQueue<E>::push_slow(E t, uint dirty_n_elems) { |
211 if (dirty_n_elems == n() - 1) { | 206 if (dirty_n_elems == N - 1) { |
212 // Actually means 0, so do the push. | 207 // Actually means 0, so do the push. |
213 uint localBot = _bottom; | 208 uint localBot = _bottom; |
214 _elems[localBot] = t; | 209 _elems[localBot] = t; |
215 _bottom = increment_index(localBot); | 210 _bottom = increment_index(localBot); |
216 return true; | 211 return true; |
217 } else | 212 } |
218 return false; | 213 return false; |
219 } | 214 } |
220 | 215 |
221 template<class E> | 216 template<class E> |
222 bool GenericTaskQueue<E>:: | 217 bool GenericTaskQueue<E>:: |
223 pop_local_slow(uint localBot, Age oldAge) { | 218 pop_local_slow(uint localBot, Age oldAge) { |
228 // must also increment "tag" because of the case where "bottom == 1", | 223 // must also increment "tag" because of the case where "bottom == 1", |
229 // "top == 0". A pop_global could read the queue element in that case, | 224 // "top == 0". A pop_global could read the queue element in that case, |
230 // then have the owner thread do a pop followed by another push. Without | 225 // then have the owner thread do a pop followed by another push. Without |
231 // the incrementing of "tag", the pop_global's CAS could succeed, | 226 // the incrementing of "tag", the pop_global's CAS could succeed, |
232 // allowing it to believe it has claimed the stale element.) | 227 // allowing it to believe it has claimed the stale element.) |
233 Age newAge; | 228 Age newAge((idx_t)localBot, oldAge.tag() + 1); |
234 newAge._top = localBot; | |
235 newAge._tag = oldAge.tag() + 1; | |
236 // Perhaps a competing pop_global has already incremented "top", in which | 229 // Perhaps a competing pop_global has already incremented "top", in which |
237 // case it wins the element. | 230 // case it wins the element. |
238 if (localBot == oldAge.top()) { | 231 if (localBot == oldAge.top()) { |
239 Age tempAge; | |
240 // No competing pop_global has yet incremented "top"; we'll try to | 232 // No competing pop_global has yet incremented "top"; we'll try to |
241 // install new_age, thus claiming the element. | 233 // install new_age, thus claiming the element. |
242 assert(sizeof(Age) == sizeof(int), "Assumption about CAS unit."); | 234 Age tempAge = _age.cmpxchg(newAge, oldAge); |
243 *(uint*)&tempAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge); | |
244 if (tempAge == oldAge) { | 235 if (tempAge == oldAge) { |
245 // We win. | 236 // We win. |
246 assert(dirty_size(localBot, get_top()) != n() - 1, | 237 assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); |
247 "Shouldn't be possible..."); | |
248 return true; | 238 return true; |
249 } | 239 } |
250 } | 240 } |
251 // We fail; a completing pop_global gets the element. But the queue is | 241 // We lose; a completing pop_global gets the element. But the queue is empty |
252 // empty (and top is greater than bottom.) Fix this representation of | 242 // and top is greater than bottom. Fix this representation of the empty queue |
253 // the empty queue to become the canonical one. | 243 // to become the canonical one. |
254 set_age(newAge); | 244 _age.set(newAge); |
255 assert(dirty_size(localBot, get_top()) != n() - 1, | 245 assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); |
256 "Shouldn't be possible..."); | |
257 return false; | 246 return false; |
258 } | 247 } |
259 | 248 |
260 template<class E> | 249 template<class E> |
261 bool GenericTaskQueue<E>::pop_global(E& t) { | 250 bool GenericTaskQueue<E>::pop_global(E& t) { |
262 Age newAge; | 251 Age oldAge = _age.get(); |
263 Age oldAge = get_age(); | |
264 uint localBot = _bottom; | 252 uint localBot = _bottom; |
265 uint n_elems = size(localBot, oldAge.top()); | 253 uint n_elems = size(localBot, oldAge.top()); |
266 if (n_elems == 0) { | 254 if (n_elems == 0) { |
267 return false; | 255 return false; |
268 } | 256 } |
257 | |
269 t = _elems[oldAge.top()]; | 258 t = _elems[oldAge.top()]; |
270 newAge = oldAge; | 259 Age newAge(oldAge); |
271 newAge._top = increment_index(newAge.top()); | 260 newAge.increment(); |
272 if ( newAge._top == 0 ) newAge._tag++; | 261 Age resAge = _age.cmpxchg(newAge, oldAge); |
273 Age resAge; | 262 |
274 *(uint*)&resAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge); | |
275 // Note that using "_bottom" here might fail, since a pop_local might | 263 // Note that using "_bottom" here might fail, since a pop_local might |
276 // have decremented it. | 264 // have decremented it. |
277 assert(dirty_size(localBot, newAge._top) != n() - 1, | 265 assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity"); |
278 "Shouldn't be possible..."); | 266 return resAge == oldAge; |
279 return (resAge == oldAge); | |
280 } | 267 } |
281 | 268 |
282 template<class E> | 269 template<class E> |
283 GenericTaskQueue<E>::~GenericTaskQueue() { | 270 GenericTaskQueue<E>::~GenericTaskQueue() { |
284 FREE_C_HEAP_ARRAY(E, _elems); | 271 FREE_C_HEAP_ARRAY(E, _elems); |
457 // so the global task is not complete. | 444 // so the global task is not complete. |
458 bool offer_termination() { | 445 bool offer_termination() { |
459 return offer_termination(NULL); | 446 return offer_termination(NULL); |
460 } | 447 } |
461 | 448 |
462 // As above, but it also terminates of the should_exit_termination() | 449 // As above, but it also terminates if the should_exit_termination() |
463 // method of the terminator parameter returns true. If terminator is | 450 // method of the terminator parameter returns true. If terminator is |
464 // NULL, then it is ignored. | 451 // NULL, then it is ignored. |
465 bool offer_termination(TerminatorTerminator* terminator); | 452 bool offer_termination(TerminatorTerminator* terminator); |
466 | 453 |
467 // Reset the terminator, so that it may be reused again. | 454 // Reset the terminator, so that it may be reused again. |
490 } else { | 477 } else { |
491 return false; | 478 return false; |
492 } | 479 } |
493 #else | 480 #else |
494 uint localBot = _bottom; | 481 uint localBot = _bottom; |
495 assert((localBot >= 0) && (localBot < n()), "_bottom out of range."); | 482 assert((localBot >= 0) && (localBot < N), "_bottom out of range."); |
496 TAG_TYPE top = get_top(); | 483 idx_t top = _age.top(); |
497 uint dirty_n_elems = dirty_size(localBot, top); | 484 uint dirty_n_elems = dirty_size(localBot, top); |
498 assert((dirty_n_elems >= 0) && (dirty_n_elems < n()), | 485 assert((dirty_n_elems >= 0) && (dirty_n_elems < N), "n_elems out of range."); |
499 "n_elems out of range."); | |
500 if (dirty_n_elems < max_elems()) { | 486 if (dirty_n_elems < max_elems()) { |
501 _elems[localBot] = t; | 487 _elems[localBot] = t; |
502 _bottom = increment_index(localBot); | 488 _bottom = increment_index(localBot); |
503 return true; | 489 return true; |
504 } else { | 490 } else { |
515 t = _elems[localBot]; | 501 t = _elems[localBot]; |
516 _bottom = localBot; | 502 _bottom = localBot; |
517 return true; | 503 return true; |
518 #else | 504 #else |
519 uint localBot = _bottom; | 505 uint localBot = _bottom; |
520 // This value cannot be n-1. That can only occur as a result of | 506 // This value cannot be N-1. That can only occur as a result of |
521 // the assignment to bottom in this method. If it does, this method | 507 // the assignment to bottom in this method. If it does, this method |
522 // resets the size( to 0 before the next call (which is sequential, | 508 // resets the size( to 0 before the next call (which is sequential, |
523 // since this is pop_local.) | 509 // since this is pop_local.) |
524 uint dirty_n_elems = dirty_size(localBot, get_top()); | 510 uint dirty_n_elems = dirty_size(localBot, _age.top()); |
525 assert(dirty_n_elems != n() - 1, "Shouldn't be possible..."); | 511 assert(dirty_n_elems != N - 1, "Shouldn't be possible..."); |
526 if (dirty_n_elems == 0) return false; | 512 if (dirty_n_elems == 0) return false; |
527 localBot = decrement_index(localBot); | 513 localBot = decrement_index(localBot); |
528 _bottom = localBot; | 514 _bottom = localBot; |
529 // This is necessary to prevent any read below from being reordered | 515 // This is necessary to prevent any read below from being reordered |
530 // before the store just above. | 516 // before the store just above. |
532 t = _elems[localBot]; | 518 t = _elems[localBot]; |
533 // This is a second read of "age"; the "size()" above is the first. | 519 // This is a second read of "age"; the "size()" above is the first. |
534 // If there's still at least one element in the queue, based on the | 520 // If there's still at least one element in the queue, based on the |
535 // "_bottom" and "age" we've read, then there can be no interference with | 521 // "_bottom" and "age" we've read, then there can be no interference with |
536 // a "pop_global" operation, and we're done. | 522 // a "pop_global" operation, and we're done. |
537 TAG_TYPE tp = get_top(); // XXX | 523 idx_t tp = _age.top(); // XXX |
538 if (size(localBot, tp) > 0) { | 524 if (size(localBot, tp) > 0) { |
539 assert(dirty_size(localBot, tp) != n() - 1, | 525 assert(dirty_size(localBot, tp) != N - 1, "sanity"); |
540 "Shouldn't be possible..."); | |
541 return true; | 526 return true; |
542 } else { | 527 } else { |
543 // Otherwise, the queue contained exactly one element; we take the slow | 528 // Otherwise, the queue contained exactly one element; we take the slow |
544 // path. | 529 // path. |
545 return pop_local_slow(localBot, get_age()); | 530 return pop_local_slow(localBot, _age.get()); |
546 } | 531 } |
547 #endif | 532 #endif |
548 } | 533 } |
549 | 534 |
550 typedef oop Task; | 535 typedef oop Task; |