Mercurial > hg > graal-compiler
comparison src/share/vm/gc_implementation/parNew/parNewGeneration.cpp @ 679:cea947c8a988
6819891: ParNew: Fix work queue overflow code to deal correctly with +UseCompressedOops
Summary: When using compressed oops, rather than chaining the overflowed grey objects' pre-images through their klass words, we use GC-worker thread-local overflow stacks.
Reviewed-by: jcoomes, jmasa
author | ysr |
---|---|
date | Sat, 28 Mar 2009 15:47:29 -0700 |
parents | 0fbdb4381b99 |
children | becb17ad5e51 |
comparison
equal
deleted
inserted
replaced
678:4ac7d97e6101 | 679:cea947c8a988 |
---|---|
34 Generation* old_gen_, | 34 Generation* old_gen_, |
35 int thread_num_, | 35 int thread_num_, |
36 ObjToScanQueueSet* work_queue_set_, | 36 ObjToScanQueueSet* work_queue_set_, |
37 size_t desired_plab_sz_, | 37 size_t desired_plab_sz_, |
38 ParallelTaskTerminator& term_) : | 38 ParallelTaskTerminator& term_) : |
39 _to_space(to_space_), _old_gen(old_gen_), _thread_num(thread_num_), | 39 _to_space(to_space_), _old_gen(old_gen_), _young_gen(gen_), _thread_num(thread_num_), |
40 _work_queue(work_queue_set_->queue(thread_num_)), _to_space_full(false), | 40 _work_queue(work_queue_set_->queue(thread_num_)), _to_space_full(false), |
41 _ageTable(false), // false ==> not the global age table, no perf data. | 41 _ageTable(false), // false ==> not the global age table, no perf data. |
42 _to_space_alloc_buffer(desired_plab_sz_), | 42 _to_space_alloc_buffer(desired_plab_sz_), |
43 _to_space_closure(gen_, this), _old_gen_closure(gen_, this), | 43 _to_space_closure(gen_, this), _old_gen_closure(gen_, this), |
44 _to_space_root_closure(gen_, this), _old_gen_root_closure(gen_, this), | 44 _to_space_root_closure(gen_, this), _old_gen_root_closure(gen_, this), |
55 (ChunkArray*) old_gen()->get_data_recorder(thread_num()); | 55 (ChunkArray*) old_gen()->get_data_recorder(thread_num()); |
56 _hash_seed = 17; // Might want to take time-based random value. | 56 _hash_seed = 17; // Might want to take time-based random value. |
57 _start = os::elapsedTime(); | 57 _start = os::elapsedTime(); |
58 _old_gen_closure.set_generation(old_gen_); | 58 _old_gen_closure.set_generation(old_gen_); |
59 _old_gen_root_closure.set_generation(old_gen_); | 59 _old_gen_root_closure.set_generation(old_gen_); |
60 if (UseCompressedOops) { | |
61 _overflow_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(512, true); | |
62 } else { | |
63 _overflow_stack = NULL; | |
64 } | |
60 } | 65 } |
61 #ifdef _MSC_VER | 66 #ifdef _MSC_VER |
62 #pragma warning( pop ) | 67 #pragma warning( pop ) |
63 #endif | 68 #endif |
64 | 69 |
79 | 84 |
80 void ParScanThreadState::scan_partial_array_and_push_remainder(oop old) { | 85 void ParScanThreadState::scan_partial_array_and_push_remainder(oop old) { |
81 assert(old->is_objArray(), "must be obj array"); | 86 assert(old->is_objArray(), "must be obj array"); |
82 assert(old->is_forwarded(), "must be forwarded"); | 87 assert(old->is_forwarded(), "must be forwarded"); |
83 assert(Universe::heap()->is_in_reserved(old), "must be in heap."); | 88 assert(Universe::heap()->is_in_reserved(old), "must be in heap."); |
84 assert(!_old_gen->is_in(old), "must be in young generation."); | 89 assert(!old_gen()->is_in(old), "must be in young generation."); |
85 | 90 |
86 objArrayOop obj = objArrayOop(old->forwardee()); | 91 objArrayOop obj = objArrayOop(old->forwardee()); |
87 // Process ParGCArrayScanChunk elements now | 92 // Process ParGCArrayScanChunk elements now |
88 // and push the remainder back onto queue | 93 // and push the remainder back onto queue |
89 int start = arrayOop(old)->length(); | 94 int start = arrayOop(old)->length(); |
117 } | 122 } |
118 | 123 |
119 | 124 |
120 void ParScanThreadState::trim_queues(int max_size) { | 125 void ParScanThreadState::trim_queues(int max_size) { |
121 ObjToScanQueue* queue = work_queue(); | 126 ObjToScanQueue* queue = work_queue(); |
122 while (queue->size() > (juint)max_size) { | 127 do { |
123 oop obj_to_scan; | 128 while (queue->size() > (juint)max_size) { |
124 if (queue->pop_local(obj_to_scan)) { | 129 oop obj_to_scan; |
125 note_pop(); | 130 if (queue->pop_local(obj_to_scan)) { |
126 | 131 note_pop(); |
127 if ((HeapWord *)obj_to_scan < young_old_boundary()) { | 132 if ((HeapWord *)obj_to_scan < young_old_boundary()) { |
128 if (obj_to_scan->is_objArray() && | 133 if (obj_to_scan->is_objArray() && |
129 obj_to_scan->is_forwarded() && | 134 obj_to_scan->is_forwarded() && |
130 obj_to_scan->forwardee() != obj_to_scan) { | 135 obj_to_scan->forwardee() != obj_to_scan) { |
131 scan_partial_array_and_push_remainder(obj_to_scan); | 136 scan_partial_array_and_push_remainder(obj_to_scan); |
137 } else { | |
138 // object is in to_space | |
139 obj_to_scan->oop_iterate(&_to_space_closure); | |
140 } | |
132 } else { | 141 } else { |
133 // object is in to_space | 142 // object is in old generation |
134 obj_to_scan->oop_iterate(&_to_space_closure); | 143 obj_to_scan->oop_iterate(&_old_gen_closure); |
135 } | 144 } |
136 } else { | |
137 // object is in old generation | |
138 obj_to_scan->oop_iterate(&_old_gen_closure); | |
139 } | 145 } |
140 } | 146 } |
141 } | 147 // For the case of compressed oops, we have a private, non-shared |
148 // overflow stack, so we eagerly drain it so as to more evenly | |
149 // distribute load early. Note: this may be good to do in | |
150 // general rather than delay for the final stealing phase. | |
151 // If applicable, we'll transfer a set of objects over to our | |
152 // work queue, allowing them to be stolen and draining our | |
153 // private overflow stack. | |
154 } while (ParGCTrimOverflow && young_gen()->take_from_overflow_list(this)); | |
155 } | |
156 | |
157 bool ParScanThreadState::take_from_overflow_stack() { | |
158 assert(UseCompressedOops, "Else should not call"); | |
159 assert(young_gen()->overflow_list() == NULL, "Error"); | |
160 ObjToScanQueue* queue = work_queue(); | |
161 GrowableArray<oop>* of_stack = overflow_stack(); | |
162 uint num_overflow_elems = of_stack->length(); | |
163 uint num_take_elems = MIN2(MIN2((queue->max_elems() - queue->size())/4, | |
164 (juint)ParGCDesiredObjsFromOverflowList), | |
165 num_overflow_elems); | |
166 // Transfer the most recent num_take_elems from the overflow | |
167 // stack to our work queue. | |
168 for (size_t i = 0; i != num_take_elems; i++) { | |
169 oop cur = of_stack->pop(); | |
170 oop obj_to_push = cur->forwardee(); | |
171 assert(Universe::heap()->is_in_reserved(cur), "Should be in heap"); | |
172 assert(!old_gen()->is_in_reserved(cur), "Should be in young gen"); | |
173 assert(Universe::heap()->is_in_reserved(obj_to_push), "Should be in heap"); | |
174 if (should_be_partially_scanned(obj_to_push, cur)) { | |
175 assert(arrayOop(cur)->length() == 0, "entire array remaining to be scanned"); | |
176 obj_to_push = cur; | |
177 } | |
178 bool ok = queue->push(obj_to_push); | |
179 assert(ok, "Should have succeeded"); | |
180 } | |
181 assert(young_gen()->overflow_list() == NULL, "Error"); | |
182 return num_take_elems > 0; // was something transferred? | |
183 } | |
184 | |
185 void ParScanThreadState::push_on_overflow_stack(oop p) { | |
186 assert(UseCompressedOops, "Else should not call"); | |
187 overflow_stack()->push(p); | |
188 assert(young_gen()->overflow_list() == NULL, "Error"); | |
142 } | 189 } |
143 | 190 |
144 HeapWord* ParScanThreadState::alloc_in_to_space_slow(size_t word_sz) { | 191 HeapWord* ParScanThreadState::alloc_in_to_space_slow(size_t word_sz) { |
145 | 192 |
146 // Otherwise, if the object is small enough, try to reallocate the | 193 // Otherwise, if the object is small enough, try to reallocate the |
423 // Since this is being done in a separate thread, need new resource | 470 // Since this is being done in a separate thread, need new resource |
424 // and handle marks. | 471 // and handle marks. |
425 ResourceMark rm; | 472 ResourceMark rm; |
426 HandleMark hm; | 473 HandleMark hm; |
427 // We would need multiple old-gen queues otherwise. | 474 // We would need multiple old-gen queues otherwise. |
428 guarantee(gch->n_gens() == 2, | 475 assert(gch->n_gens() == 2, "Par young collection currently only works with one older gen."); |
429 "Par young collection currently only works with one older gen."); | |
430 | 476 |
431 Generation* old_gen = gch->next_gen(_gen); | 477 Generation* old_gen = gch->next_gen(_gen); |
432 | 478 |
433 ParScanThreadState& par_scan_state = _state_set->thread_sate(i); | 479 ParScanThreadState& par_scan_state = _state_set->thread_sate(i); |
434 par_scan_state.set_young_old_boundary(_young_old_boundary); | 480 par_scan_state.set_young_old_boundary(_young_old_boundary); |
1167 return false; | 1213 return false; |
1168 } | 1214 } |
1169 } | 1215 } |
1170 #endif | 1216 #endif |
1171 | 1217 |
1218 // In case we are using compressed oops, we need to be careful. | |
1219 // If the object being pushed is an object array, then its length | |
1220 // field keeps track of the "grey boundary" at which the next | |
1221 // incremental scan will be done (see ParGCArrayScanChunk). | |
1222 // When using compressed oops, this length field is kept in the | |
1223 // lower 32 bits of the erstwhile klass word and cannot be used | |
1224 // for the overflow chaining pointer (OCP below). As such the OCP | |
1225 // would itself need to be compressed into the top 32-bits in this | |
1226 // case. Unfortunately, see below, in the event that we have a | |
1227 // promotion failure, the node to be pushed on the list can be | |
1228 // outside of the Java heap, so the heap-based pointer compression | |
1229 // would not work (we would have potential aliasing between C-heap | |
1230 // and Java-heap pointers). For this reason, when using compressed | |
1231 // oops, we simply use a worker-thread-local, non-shared overflow | |
1232 // list in the form of a growable array, with a slightly different | |
1233 // overflow stack draining strategy. If/when we start using fat | |
1234 // stacks here, we can go back to using (fat) pointer chains | |
1235 // (although some performance comparisons would be useful since | |
1236 // single global lists have their own performance disadvantages | |
1237 // as we were made painfully aware not long ago, see 6786503). | |
1172 #define BUSY (oop(0x1aff1aff)) | 1238 #define BUSY (oop(0x1aff1aff)) |
1173 void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) { | 1239 void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) { |
1174 // if the object has been forwarded to itself, then we cannot | 1240 assert(is_in_reserved(from_space_obj), "Should be from this generation"); |
1175 // use the klass pointer for the linked list. Instead we have | 1241 if (UseCompressedOops) { |
1176 // to allocate an oopDesc in the C-Heap and use that for the linked list. | 1242 // In the case of compressed oops, we use a private, not-shared |
1177 // XXX This is horribly inefficient when a promotion failure occurs | 1243 // overflow stack. |
1178 // and should be fixed. XXX FIX ME !!! | 1244 par_scan_state->push_on_overflow_stack(from_space_obj); |
1245 } else { | |
1246 // if the object has been forwarded to itself, then we cannot | |
1247 // use the klass pointer for the linked list. Instead we have | |
1248 // to allocate an oopDesc in the C-Heap and use that for the linked list. | |
1249 // XXX This is horribly inefficient when a promotion failure occurs | |
1250 // and should be fixed. XXX FIX ME !!! | |
1179 #ifndef PRODUCT | 1251 #ifndef PRODUCT |
1180 Atomic::inc_ptr(&_num_par_pushes); | 1252 Atomic::inc_ptr(&_num_par_pushes); |
1181 assert(_num_par_pushes > 0, "Tautology"); | 1253 assert(_num_par_pushes > 0, "Tautology"); |
1182 #endif | 1254 #endif |
1183 if (from_space_obj->forwardee() == from_space_obj) { | 1255 if (from_space_obj->forwardee() == from_space_obj) { |
1184 oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1); | 1256 oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1); |
1185 listhead->forward_to(from_space_obj); | 1257 listhead->forward_to(from_space_obj); |
1186 from_space_obj = listhead; | 1258 from_space_obj = listhead; |
1187 } | 1259 } |
1188 oop observed_overflow_list = _overflow_list; | 1260 oop observed_overflow_list = _overflow_list; |
1189 oop cur_overflow_list; | 1261 oop cur_overflow_list; |
1190 do { | 1262 do { |
1191 cur_overflow_list = observed_overflow_list; | 1263 cur_overflow_list = observed_overflow_list; |
1192 if (cur_overflow_list != BUSY) { | 1264 if (cur_overflow_list != BUSY) { |
1193 from_space_obj->set_klass_to_list_ptr(cur_overflow_list); | 1265 from_space_obj->set_klass_to_list_ptr(cur_overflow_list); |
1194 } else { | 1266 } else { |
1195 from_space_obj->set_klass_to_list_ptr(NULL); | 1267 from_space_obj->set_klass_to_list_ptr(NULL); |
1196 } | 1268 } |
1197 observed_overflow_list = | 1269 observed_overflow_list = |
1198 (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list); | 1270 (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list); |
1199 } while (cur_overflow_list != observed_overflow_list); | 1271 } while (cur_overflow_list != observed_overflow_list); |
1200 } | 1272 } |
1273 } | |
1274 | |
1275 bool ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) { | |
1276 bool res; | |
1277 | |
1278 if (UseCompressedOops) { | |
1279 res = par_scan_state->take_from_overflow_stack(); | |
1280 } else { | |
1281 res = take_from_overflow_list_work(par_scan_state); | |
1282 } | |
1283 return res; | |
1284 } | |
1285 | |
1201 | 1286 |
1202 // *NOTE*: The overflow list manipulation code here and | 1287 // *NOTE*: The overflow list manipulation code here and |
1203 // in CMSCollector:: are very similar in shape, | 1288 // in CMSCollector:: are very similar in shape, |
1204 // except that in the CMS case we thread the objects | 1289 // except that in the CMS case we thread the objects |
1205 // directly into the list via their mark word, and do | 1290 // directly into the list via their mark word, and do |
1211 // Because of the common code, if you make any changes in | 1296 // Because of the common code, if you make any changes in |
1212 // the code below, please check the CMS version to see if | 1297 // the code below, please check the CMS version to see if |
1213 // similar changes might be needed. | 1298 // similar changes might be needed. |
1214 // See CMSCollector::par_take_from_overflow_list() for | 1299 // See CMSCollector::par_take_from_overflow_list() for |
1215 // more extensive documentation comments. | 1300 // more extensive documentation comments. |
1216 bool | 1301 bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan_state) { |
1217 ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) { | |
1218 ObjToScanQueue* work_q = par_scan_state->work_queue(); | 1302 ObjToScanQueue* work_q = par_scan_state->work_queue(); |
1219 assert(work_q->size() == 0, "Should first empty local work queue"); | |
1220 // How many to take? | 1303 // How many to take? |
1221 size_t objsFromOverflow = MIN2((size_t)work_q->max_elems()/4, | 1304 size_t objsFromOverflow = MIN2((size_t)(work_q->max_elems() - work_q->size())/4, |
1222 (size_t)ParGCDesiredObjsFromOverflowList); | 1305 (size_t)ParGCDesiredObjsFromOverflowList); |
1223 | 1306 |
1307 assert(par_scan_state->overflow_stack() == NULL, "Error"); | |
1224 if (_overflow_list == NULL) return false; | 1308 if (_overflow_list == NULL) return false; |
1225 | 1309 |
1226 // Otherwise, there was something there; try claiming the list. | 1310 // Otherwise, there was something there; try claiming the list. |
1227 oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); | 1311 oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); |
1228 // Trim off a prefix of at most objsFromOverflow items | 1312 // Trim off a prefix of at most objsFromOverflow items |