comparison src/share/vm/utilities/taskqueue.hpp @ 907:3ee342e25e57

6821693: 64-bit TaskQueue capacity still too small 6821507: Alignment problem in GC taskqueue Reviewed-by: tonyp, apetrusenko
author jcoomes
date Wed, 05 Aug 2009 12:33:29 -0700
parents df6caf649ff7
children 2c03ce058f55
comparison
equal deleted inserted replaced
891:703065c670fa 907:3ee342e25e57
20 * CA 95054 USA or visit www.sun.com if you need additional information or 20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions. 21 * have any questions.
22 * 22 *
23 */ 23 */
24 24
25 #ifdef LP64
26 typedef juint TAG_TYPE;
27 // for a taskqueue size of 4M
28 #define LOG_TASKQ_SIZE 22
29 #else
30 typedef jushort TAG_TYPE;
31 // for a taskqueue size of 16K
32 #define LOG_TASKQ_SIZE 14
33 #endif
34
35 class TaskQueueSuper: public CHeapObj { 25 class TaskQueueSuper: public CHeapObj {
36 protected: 26 protected:
37 // The first free element after the last one pushed (mod _n). 27 // Internal type for indexing the queue; also used for the tag.
28 typedef NOT_LP64(uint16_t) LP64_ONLY(uint32_t) idx_t;
29
30 // The first free element after the last one pushed (mod N).
38 volatile uint _bottom; 31 volatile uint _bottom;
39 32
40 // log2 of the size of the queue. 33 enum {
41 enum SomeProtectedConstants { 34 N = 1 << NOT_LP64(14) LP64_ONLY(17), // Queue size: 16K or 128K
42 Log_n = LOG_TASKQ_SIZE 35 MOD_N_MASK = N - 1 // To compute x mod N efficiently.
43 }; 36 };
44 #undef LOG_TASKQ_SIZE 37
45 38 class Age {
46 // Size of the queue. 39 public:
47 uint n() { return (1 << Log_n); } 40 Age(size_t data = 0) { _data = data; }
48 // For computing "x mod n" efficiently. 41 Age(const Age& age) { _data = age._data; }
49 uint n_mod_mask() { return n() - 1; } 42 Age(idx_t top, idx_t tag) { _fields._top = top; _fields._tag = tag; }
50 43
51 struct Age { 44 Age get() const volatile { return _data; }
52 TAG_TYPE _top; 45 void set(Age age) volatile { _data = age._data; }
53 TAG_TYPE _tag; 46
54 47 idx_t top() const volatile { return _fields._top; }
55 TAG_TYPE tag() const { return _tag; } 48 idx_t tag() const volatile { return _fields._tag; }
56 TAG_TYPE top() const { return _top; } 49
57 50 // Increment top; if it wraps, increment tag also.
58 Age() { _tag = 0; _top = 0; } 51 void increment() {
59 52 _fields._top = increment_index(_fields._top);
60 friend bool operator ==(const Age& a1, const Age& a2) { 53 if (_fields._top == 0) ++_fields._tag;
61 return a1.tag() == a2.tag() && a1.top() == a2.top();
62 } 54 }
55
56 Age cmpxchg(const Age new_age, const Age old_age) volatile {
57 return (size_t) Atomic::cmpxchg_ptr((intptr_t)new_age._data,
58 (volatile intptr_t *)&_data,
59 (intptr_t)old_age._data);
60 }
61
62 bool operator ==(const Age& other) const { return _data == other._data; }
63
64 private:
65 struct fields {
66 idx_t _top;
67 idx_t _tag;
68 };
69 union {
70 size_t _data;
71 fields _fields;
72 };
63 }; 73 };
64 Age _age; 74
65 // These make sure we do single atomic reads and writes. 75 volatile Age _age;
66 Age get_age() { 76
67 uint res = *(volatile uint*)(&_age); 77 // These both operate mod N.
68 return *(Age*)(&res); 78 static uint increment_index(uint ind) {
69 } 79 return (ind + 1) & MOD_N_MASK;
70 void set_age(Age a) { 80 }
71 *(volatile uint*)(&_age) = *(uint*)(&a); 81 static uint decrement_index(uint ind) {
72 } 82 return (ind - 1) & MOD_N_MASK;
73 83 }
74 TAG_TYPE get_top() { 84
75 return get_age().top(); 85 // Returns a number in the range [0..N). If the result is "N-1", it should be
76 } 86 // interpreted as 0.
77
78 // These both operate mod _n.
79 uint increment_index(uint ind) {
80 return (ind + 1) & n_mod_mask();
81 }
82 uint decrement_index(uint ind) {
83 return (ind - 1) & n_mod_mask();
84 }
85
86 // Returns a number in the range [0.._n). If the result is "n-1", it
87 // should be interpreted as 0.
88 uint dirty_size(uint bot, uint top) { 87 uint dirty_size(uint bot, uint top) {
89 return ((int)bot - (int)top) & n_mod_mask(); 88 return (bot - top) & MOD_N_MASK;
90 } 89 }
91 90
92 // Returns the size corresponding to the given "bot" and "top". 91 // Returns the size corresponding to the given "bot" and "top".
93 uint size(uint bot, uint top) { 92 uint size(uint bot, uint top) {
94 uint sz = dirty_size(bot, top); 93 uint sz = dirty_size(bot, top);
95 // Has the queue "wrapped", so that bottom is less than top? 94 // Has the queue "wrapped", so that bottom is less than top? There's a
96 // There's a complicated special case here. A pair of threads could 95 // complicated special case here. A pair of threads could perform pop_local
97 // perform pop_local and pop_global operations concurrently, starting 96 // and pop_global operations concurrently, starting from a state in which
98 // from a state in which _bottom == _top+1. The pop_local could 97 // _bottom == _top+1. The pop_local could succeed in decrementing _bottom,
99 // succeed in decrementing _bottom, and the pop_global in incrementing 98 // and the pop_global in incrementing _top (in which case the pop_global
100 // _top (in which case the pop_global will be awarded the contested 99 // will be awarded the contested queue element.) The resulting state must
101 // queue element.) The resulting state must be interpreted as an empty 100 // be interpreted as an empty queue. (We only need to worry about one such
102 // queue. (We only need to worry about one such event: only the queue 101 // event: only the queue owner performs pop_local's, and several concurrent
103 // owner performs pop_local's, and several concurrent threads 102 // threads attempting to perform the pop_global will all perform the same
104 // attempting to perform the pop_global will all perform the same CAS, 103 // CAS, and only one can succeed.) Any stealing thread that reads after
105 // and only one can succeed. Any stealing thread that reads after 104 // either the increment or decrement will see an empty queue, and will not
106 // either the increment or decrement will see an empty queue, and will 105 // join the competitors. The "sz == -1 || sz == N-1" state will not be
107 // not join the competitors. The "sz == -1 || sz == _n-1" state will 106 // modified by concurrent queues, so the owner thread can reset the state to
108 // not be modified by concurrent queues, so the owner thread can reset 107 // _bottom == top so subsequent pushes will be performed normally.
109 // the state to _bottom == top so subsequent pushes will be performed 108 return (sz == N - 1) ? 0 : sz;
110 // normally.
111 if (sz == (n()-1)) return 0;
112 else return sz;
113 } 109 }
114 110
115 public: 111 public:
116 TaskQueueSuper() : _bottom(0), _age() {} 112 TaskQueueSuper() : _bottom(0), _age() {}
117 113
120 116
121 // Return an estimate of the number of elements in the queue. 117 // Return an estimate of the number of elements in the queue.
122 // The "careful" version admits the possibility of pop_local/pop_global 118 // The "careful" version admits the possibility of pop_local/pop_global
123 // races. 119 // races.
124 uint size() { 120 uint size() {
125 return size(_bottom, get_top()); 121 return size(_bottom, _age.top());
126 } 122 }
127 123
128 uint dirty_size() { 124 uint dirty_size() {
129 return dirty_size(_bottom, get_top()); 125 return dirty_size(_bottom, _age.top());
130 } 126 }
131 127
132 void set_empty() { 128 void set_empty() {
133 _bottom = 0; 129 _bottom = 0;
134 _age = Age(); 130 _age.set(0);
135 } 131 }
136 132
137 // Maximum number of elements allowed in the queue. This is two less 133 // Maximum number of elements allowed in the queue. This is two less
138 // than the actual queue size, for somewhat complicated reasons. 134 // than the actual queue size, for somewhat complicated reasons.
139 uint max_elems() { return n() - 2; } 135 uint max_elems() { return N - 2; }
140
141 }; 136 };
142 137
143 template<class E> class GenericTaskQueue: public TaskQueueSuper { 138 template<class E> class GenericTaskQueue: public TaskQueueSuper {
144 private: 139 private:
145 // Slow paths for push, pop_local. (pop_global has no fast path.) 140 // Slow paths for push, pop_local. (pop_global has no fast path.)
177 volatile E* _elems; 172 volatile E* _elems;
178 }; 173 };
179 174
180 template<class E> 175 template<class E>
181 GenericTaskQueue<E>::GenericTaskQueue():TaskQueueSuper() { 176 GenericTaskQueue<E>::GenericTaskQueue():TaskQueueSuper() {
182 assert(sizeof(Age) == sizeof(int), "Depends on this."); 177 assert(sizeof(Age) == sizeof(size_t), "Depends on this.");
183 } 178 }
184 179
185 template<class E> 180 template<class E>
186 void GenericTaskQueue<E>::initialize() { 181 void GenericTaskQueue<E>::initialize() {
187 _elems = NEW_C_HEAP_ARRAY(E, n()); 182 _elems = NEW_C_HEAP_ARRAY(E, N);
188 guarantee(_elems != NULL, "Allocation failed."); 183 guarantee(_elems != NULL, "Allocation failed.");
189 } 184 }
190 185
191 template<class E> 186 template<class E>
192 void GenericTaskQueue<E>::oops_do(OopClosure* f) { 187 void GenericTaskQueue<E>::oops_do(OopClosure* f) {
206 } 201 }
207 202
208 203
209 template<class E> 204 template<class E>
210 bool GenericTaskQueue<E>::push_slow(E t, uint dirty_n_elems) { 205 bool GenericTaskQueue<E>::push_slow(E t, uint dirty_n_elems) {
211 if (dirty_n_elems == n() - 1) { 206 if (dirty_n_elems == N - 1) {
212 // Actually means 0, so do the push. 207 // Actually means 0, so do the push.
213 uint localBot = _bottom; 208 uint localBot = _bottom;
214 _elems[localBot] = t; 209 _elems[localBot] = t;
215 _bottom = increment_index(localBot); 210 _bottom = increment_index(localBot);
216 return true; 211 return true;
217 } else 212 }
218 return false; 213 return false;
219 } 214 }
220 215
221 template<class E> 216 template<class E>
222 bool GenericTaskQueue<E>:: 217 bool GenericTaskQueue<E>::
223 pop_local_slow(uint localBot, Age oldAge) { 218 pop_local_slow(uint localBot, Age oldAge) {
228 // must also increment "tag" because of the case where "bottom == 1", 223 // must also increment "tag" because of the case where "bottom == 1",
229 // "top == 0". A pop_global could read the queue element in that case, 224 // "top == 0". A pop_global could read the queue element in that case,
230 // then have the owner thread do a pop followed by another push. Without 225 // then have the owner thread do a pop followed by another push. Without
231 // the incrementing of "tag", the pop_global's CAS could succeed, 226 // the incrementing of "tag", the pop_global's CAS could succeed,
232 // allowing it to believe it has claimed the stale element.) 227 // allowing it to believe it has claimed the stale element.)
233 Age newAge; 228 Age newAge((idx_t)localBot, oldAge.tag() + 1);
234 newAge._top = localBot;
235 newAge._tag = oldAge.tag() + 1;
236 // Perhaps a competing pop_global has already incremented "top", in which 229 // Perhaps a competing pop_global has already incremented "top", in which
237 // case it wins the element. 230 // case it wins the element.
238 if (localBot == oldAge.top()) { 231 if (localBot == oldAge.top()) {
239 Age tempAge;
240 // No competing pop_global has yet incremented "top"; we'll try to 232 // No competing pop_global has yet incremented "top"; we'll try to
241 // install new_age, thus claiming the element. 233 // install new_age, thus claiming the element.
242 assert(sizeof(Age) == sizeof(int), "Assumption about CAS unit."); 234 Age tempAge = _age.cmpxchg(newAge, oldAge);
243 *(uint*)&tempAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge);
244 if (tempAge == oldAge) { 235 if (tempAge == oldAge) {
245 // We win. 236 // We win.
246 assert(dirty_size(localBot, get_top()) != n() - 1, 237 assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
247 "Shouldn't be possible...");
248 return true; 238 return true;
249 } 239 }
250 } 240 }
251 // We fail; a completing pop_global gets the element. But the queue is 241 // We lose; a completing pop_global gets the element. But the queue is empty
252 // empty (and top is greater than bottom.) Fix this representation of 242 // and top is greater than bottom. Fix this representation of the empty queue
253 // the empty queue to become the canonical one. 243 // to become the canonical one.
254 set_age(newAge); 244 _age.set(newAge);
255 assert(dirty_size(localBot, get_top()) != n() - 1, 245 assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
256 "Shouldn't be possible...");
257 return false; 246 return false;
258 } 247 }
259 248
260 template<class E> 249 template<class E>
261 bool GenericTaskQueue<E>::pop_global(E& t) { 250 bool GenericTaskQueue<E>::pop_global(E& t) {
262 Age newAge; 251 Age oldAge = _age.get();
263 Age oldAge = get_age();
264 uint localBot = _bottom; 252 uint localBot = _bottom;
265 uint n_elems = size(localBot, oldAge.top()); 253 uint n_elems = size(localBot, oldAge.top());
266 if (n_elems == 0) { 254 if (n_elems == 0) {
267 return false; 255 return false;
268 } 256 }
257
269 t = _elems[oldAge.top()]; 258 t = _elems[oldAge.top()];
270 newAge = oldAge; 259 Age newAge(oldAge);
271 newAge._top = increment_index(newAge.top()); 260 newAge.increment();
272 if ( newAge._top == 0 ) newAge._tag++; 261 Age resAge = _age.cmpxchg(newAge, oldAge);
273 Age resAge; 262
274 *(uint*)&resAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge);
275 // Note that using "_bottom" here might fail, since a pop_local might 263 // Note that using "_bottom" here might fail, since a pop_local might
276 // have decremented it. 264 // have decremented it.
277 assert(dirty_size(localBot, newAge._top) != n() - 1, 265 assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity");
278 "Shouldn't be possible..."); 266 return resAge == oldAge;
279 return (resAge == oldAge);
280 } 267 }
281 268
282 template<class E> 269 template<class E>
283 GenericTaskQueue<E>::~GenericTaskQueue() { 270 GenericTaskQueue<E>::~GenericTaskQueue() {
284 FREE_C_HEAP_ARRAY(E, _elems); 271 FREE_C_HEAP_ARRAY(E, _elems);
457 // so the global task is not complete. 444 // so the global task is not complete.
458 bool offer_termination() { 445 bool offer_termination() {
459 return offer_termination(NULL); 446 return offer_termination(NULL);
460 } 447 }
461 448
462 // As above, but it also terminates of the should_exit_termination() 449 // As above, but it also terminates if the should_exit_termination()
463 // method of the terminator parameter returns true. If terminator is 450 // method of the terminator parameter returns true. If terminator is
464 // NULL, then it is ignored. 451 // NULL, then it is ignored.
465 bool offer_termination(TerminatorTerminator* terminator); 452 bool offer_termination(TerminatorTerminator* terminator);
466 453
467 // Reset the terminator, so that it may be reused again. 454 // Reset the terminator, so that it may be reused again.
490 } else { 477 } else {
491 return false; 478 return false;
492 } 479 }
493 #else 480 #else
494 uint localBot = _bottom; 481 uint localBot = _bottom;
495 assert((localBot >= 0) && (localBot < n()), "_bottom out of range."); 482 assert((localBot >= 0) && (localBot < N), "_bottom out of range.");
496 TAG_TYPE top = get_top(); 483 idx_t top = _age.top();
497 uint dirty_n_elems = dirty_size(localBot, top); 484 uint dirty_n_elems = dirty_size(localBot, top);
498 assert((dirty_n_elems >= 0) && (dirty_n_elems < n()), 485 assert((dirty_n_elems >= 0) && (dirty_n_elems < N), "n_elems out of range.");
499 "n_elems out of range.");
500 if (dirty_n_elems < max_elems()) { 486 if (dirty_n_elems < max_elems()) {
501 _elems[localBot] = t; 487 _elems[localBot] = t;
502 _bottom = increment_index(localBot); 488 _bottom = increment_index(localBot);
503 return true; 489 return true;
504 } else { 490 } else {
515 t = _elems[localBot]; 501 t = _elems[localBot];
516 _bottom = localBot; 502 _bottom = localBot;
517 return true; 503 return true;
518 #else 504 #else
519 uint localBot = _bottom; 505 uint localBot = _bottom;
520 // This value cannot be n-1. That can only occur as a result of 506 // This value cannot be N-1. That can only occur as a result of
521 // the assignment to bottom in this method. If it does, this method 507 // the assignment to bottom in this method. If it does, this method
522 // resets the size( to 0 before the next call (which is sequential, 508 // resets the size( to 0 before the next call (which is sequential,
523 // since this is pop_local.) 509 // since this is pop_local.)
524 uint dirty_n_elems = dirty_size(localBot, get_top()); 510 uint dirty_n_elems = dirty_size(localBot, _age.top());
525 assert(dirty_n_elems != n() - 1, "Shouldn't be possible..."); 511 assert(dirty_n_elems != N - 1, "Shouldn't be possible...");
526 if (dirty_n_elems == 0) return false; 512 if (dirty_n_elems == 0) return false;
527 localBot = decrement_index(localBot); 513 localBot = decrement_index(localBot);
528 _bottom = localBot; 514 _bottom = localBot;
529 // This is necessary to prevent any read below from being reordered 515 // This is necessary to prevent any read below from being reordered
530 // before the store just above. 516 // before the store just above.
532 t = _elems[localBot]; 518 t = _elems[localBot];
533 // This is a second read of "age"; the "size()" above is the first. 519 // This is a second read of "age"; the "size()" above is the first.
534 // If there's still at least one element in the queue, based on the 520 // If there's still at least one element in the queue, based on the
535 // "_bottom" and "age" we've read, then there can be no interference with 521 // "_bottom" and "age" we've read, then there can be no interference with
536 // a "pop_global" operation, and we're done. 522 // a "pop_global" operation, and we're done.
537 TAG_TYPE tp = get_top(); // XXX 523 idx_t tp = _age.top(); // XXX
538 if (size(localBot, tp) > 0) { 524 if (size(localBot, tp) > 0) {
539 assert(dirty_size(localBot, tp) != n() - 1, 525 assert(dirty_size(localBot, tp) != N - 1, "sanity");
540 "Shouldn't be possible...");
541 return true; 526 return true;
542 } else { 527 } else {
543 // Otherwise, the queue contained exactly one element; we take the slow 528 // Otherwise, the queue contained exactly one element; we take the slow
544 // path. 529 // path.
545 return pop_local_slow(localBot, get_age()); 530 return pop_local_slow(localBot, _age.get());
546 } 531 }
547 #endif 532 #endif
548 } 533 }
549 534
550 typedef oop Task; 535 typedef oop Task;