Mercurial > hg > graal-compiler
comparison src/share/vm/gc_implementation/parNew/parNewGeneration.cpp @ 534:5cfd8d19e546
6786503: Overflow list performance can be improved
Summary: Avoid overflow list walk in CMS & ParNew when it is unnecessary. Fix a couple of correctness issues, including a C-heap leak, in ParNew at the intersection of promotion failure, work queue overflow and object array chunking. Add stress testing option and related assertion checking.
Reviewed-by: jmasa
author | ysr |
---|---|
date | Mon, 26 Jan 2009 12:47:21 -0800 |
parents | 7d7a7c599c17 |
children | 0fbdb4381b99 |
comparison
equal
deleted
inserted
replaced
527:2b1de1db9a9d | 534:5cfd8d19e546 |
---|---|
402 // Otherwise, offer termination. | 402 // Otherwise, offer termination. |
403 par_scan_state()->start_term_time(); | 403 par_scan_state()->start_term_time(); |
404 if (terminator()->offer_termination()) break; | 404 if (terminator()->offer_termination()) break; |
405 par_scan_state()->end_term_time(); | 405 par_scan_state()->end_term_time(); |
406 } | 406 } |
407 assert(par_gen()->_overflow_list == NULL && par_gen()->_num_par_pushes == 0, | |
408 "Broken overflow list?"); | |
407 // Finish the last termination pause. | 409 // Finish the last termination pause. |
408 par_scan_state()->end_term_time(); | 410 par_scan_state()->end_term_time(); |
409 } | 411 } |
410 | 412 |
411 ParNewGenTask::ParNewGenTask(ParNewGeneration* gen, Generation* next_gen, | 413 ParNewGenTask::ParNewGenTask(ParNewGeneration* gen, Generation* next_gen, |
454 : DefNewGeneration(rs, initial_byte_size, level, "PCopy"), | 456 : DefNewGeneration(rs, initial_byte_size, level, "PCopy"), |
455 _overflow_list(NULL), | 457 _overflow_list(NULL), |
456 _is_alive_closure(this), | 458 _is_alive_closure(this), |
457 _plab_stats(YoungPLABSize, PLABWeight) | 459 _plab_stats(YoungPLABSize, PLABWeight) |
458 { | 460 { |
461 NOT_PRODUCT(_overflow_counter = ParGCWorkQueueOverflowInterval;) | |
462 NOT_PRODUCT(_num_par_pushes = 0;) | |
459 _task_queues = new ObjToScanQueueSet(ParallelGCThreads); | 463 _task_queues = new ObjToScanQueueSet(ParallelGCThreads); |
460 guarantee(_task_queues != NULL, "task_queues allocation failure."); | 464 guarantee(_task_queues != NULL, "task_queues allocation failure."); |
461 | 465 |
462 for (uint i1 = 0; i1 < ParallelGCThreads; i1++) { | 466 for (uint i1 = 0; i1 < ParallelGCThreads; i1++) { |
463 ObjToScanQueuePadded *q_padded = new ObjToScanQueuePadded(); | 467 ObjToScanQueuePadded *q_padded = new ObjToScanQueuePadded(); |
991 obj_to_push = old; | 995 obj_to_push = old; |
992 assert(obj_to_push->is_forwarded() && obj_to_push->forwardee() != obj_to_push, | 996 assert(obj_to_push->is_forwarded() && obj_to_push->forwardee() != obj_to_push, |
993 "push forwarded object"); | 997 "push forwarded object"); |
994 } | 998 } |
995 // Push it on one of the queues of to-be-scanned objects. | 999 // Push it on one of the queues of to-be-scanned objects. |
996 if (!par_scan_state->work_queue()->push(obj_to_push)) { | 1000 bool simulate_overflow = false; |
1001 NOT_PRODUCT( | |
1002 if (ParGCWorkQueueOverflowALot && should_simulate_overflow()) { | |
1003 // simulate a stack overflow | |
1004 simulate_overflow = true; | |
1005 } | |
1006 ) | |
1007 if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) { | |
997 // Add stats for overflow pushes. | 1008 // Add stats for overflow pushes. |
998 if (Verbose && PrintGCDetails) { | 1009 if (Verbose && PrintGCDetails) { |
999 gclog_or_tty->print("queue overflow!\n"); | 1010 gclog_or_tty->print("queue overflow!\n"); |
1000 } | 1011 } |
1001 push_on_overflow_list(old); | 1012 push_on_overflow_list(old, par_scan_state); |
1002 par_scan_state->note_overflow_push(); | 1013 par_scan_state->note_overflow_push(); |
1003 } | 1014 } |
1004 par_scan_state->note_push(); | 1015 par_scan_state->note_push(); |
1005 | 1016 |
1006 return new_obj; | 1017 return new_obj; |
1108 obj_to_push = old; | 1119 obj_to_push = old; |
1109 assert(obj_to_push->is_forwarded() && obj_to_push->forwardee() != obj_to_push, | 1120 assert(obj_to_push->is_forwarded() && obj_to_push->forwardee() != obj_to_push, |
1110 "push forwarded object"); | 1121 "push forwarded object"); |
1111 } | 1122 } |
1112 // Push it on one of the queues of to-be-scanned objects. | 1123 // Push it on one of the queues of to-be-scanned objects. |
1113 if (!par_scan_state->work_queue()->push(obj_to_push)) { | 1124 bool simulate_overflow = false; |
1125 NOT_PRODUCT( | |
1126 if (ParGCWorkQueueOverflowALot && should_simulate_overflow()) { | |
1127 // simulate a stack overflow | |
1128 simulate_overflow = true; | |
1129 } | |
1130 ) | |
1131 if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) { | |
1114 // Add stats for overflow pushes. | 1132 // Add stats for overflow pushes. |
1115 push_on_overflow_list(old); | 1133 push_on_overflow_list(old, par_scan_state); |
1116 par_scan_state->note_overflow_push(); | 1134 par_scan_state->note_overflow_push(); |
1117 } | 1135 } |
1118 par_scan_state->note_push(); | 1136 par_scan_state->note_push(); |
1119 | 1137 |
1120 return new_obj; | 1138 return new_obj; |
1133 } | 1151 } |
1134 | 1152 |
1135 return forward_ptr; | 1153 return forward_ptr; |
1136 } | 1154 } |
1137 | 1155 |
1138 void ParNewGeneration::push_on_overflow_list(oop from_space_obj) { | 1156 #ifndef PRODUCT |
1139 oop cur_overflow_list = _overflow_list; | 1157 // It's OK to call this multi-threaded; the worst thing |
1158 // that can happen is that we'll get a bunch of closely | |
1159 // spaced simulated oveflows, but that's OK, in fact | |
1160 // probably good as it would exercise the overflow code | |
1161 // under contention. | |
1162 bool ParNewGeneration::should_simulate_overflow() { | |
1163 if (_overflow_counter-- <= 0) { // just being defensive | |
1164 _overflow_counter = ParGCWorkQueueOverflowInterval; | |
1165 return true; | |
1166 } else { | |
1167 return false; | |
1168 } | |
1169 } | |
1170 #endif | |
1171 | |
1172 #define BUSY (oop(0x1aff1aff)) | |
1173 void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) { | |
1140 // if the object has been forwarded to itself, then we cannot | 1174 // if the object has been forwarded to itself, then we cannot |
1141 // use the klass pointer for the linked list. Instead we have | 1175 // use the klass pointer for the linked list. Instead we have |
1142 // to allocate an oopDesc in the C-Heap and use that for the linked list. | 1176 // to allocate an oopDesc in the C-Heap and use that for the linked list. |
1177 // XXX This is horribly inefficient when a promotion failure occurs | |
1178 // and should be fixed. XXX FIX ME !!! | |
1179 #ifndef PRODUCT | |
1180 Atomic::inc_ptr(&_num_par_pushes); | |
1181 assert(_num_par_pushes > 0, "Tautology"); | |
1182 #endif | |
1143 if (from_space_obj->forwardee() == from_space_obj) { | 1183 if (from_space_obj->forwardee() == from_space_obj) { |
1144 oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1); | 1184 oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1); |
1145 listhead->forward_to(from_space_obj); | 1185 listhead->forward_to(from_space_obj); |
1146 from_space_obj = listhead; | 1186 from_space_obj = listhead; |
1147 } | 1187 } |
1148 while (true) { | 1188 oop observed_overflow_list = _overflow_list; |
1149 from_space_obj->set_klass_to_list_ptr(cur_overflow_list); | 1189 oop cur_overflow_list; |
1150 oop observed_overflow_list = | 1190 do { |
1191 cur_overflow_list = observed_overflow_list; | |
1192 if (cur_overflow_list != BUSY) { | |
1193 from_space_obj->set_klass_to_list_ptr(cur_overflow_list); | |
1194 } else { | |
1195 from_space_obj->set_klass_to_list_ptr(NULL); | |
1196 } | |
1197 observed_overflow_list = | |
1151 (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list); | 1198 (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list); |
1152 if (observed_overflow_list == cur_overflow_list) break; | 1199 } while (cur_overflow_list != observed_overflow_list); |
1153 // Otherwise... | 1200 } |
1154 cur_overflow_list = observed_overflow_list; | 1201 |
1155 } | 1202 // *NOTE*: The overflow list manipulation code here and |
1156 } | 1203 // in CMSCollector:: are very similar in shape, |
1157 | 1204 // except that in the CMS case we thread the objects |
1205 // directly into the list via their mark word, and do | |
1206 // not need to deal with special cases below related | |
1207 // to chunking of object arrays and promotion failure | |
1208 // handling. | |
1209 // CR 6797058 has been filed to attempt consolidation of | |
1210 // the common code. | |
1211 // Because of the common code, if you make any changes in | |
1212 // the code below, please check the CMS version to see if | |
1213 // similar changes might be needed. | |
1214 // See CMSCollector::par_take_from_overflow_list() for | |
1215 // more extensive documentation comments. | |
1158 bool | 1216 bool |
1159 ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) { | 1217 ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) { |
1160 ObjToScanQueue* work_q = par_scan_state->work_queue(); | 1218 ObjToScanQueue* work_q = par_scan_state->work_queue(); |
1219 assert(work_q->size() == 0, "Should first empty local work queue"); | |
1161 // How many to take? | 1220 // How many to take? |
1162 int objsFromOverflow = MIN2(work_q->max_elems()/4, | 1221 size_t objsFromOverflow = MIN2((size_t)work_q->max_elems()/4, |
1163 (juint)ParGCDesiredObjsFromOverflowList); | 1222 (size_t)ParGCDesiredObjsFromOverflowList); |
1164 | 1223 |
1165 if (_overflow_list == NULL) return false; | 1224 if (_overflow_list == NULL) return false; |
1166 | 1225 |
1167 // Otherwise, there was something there; try claiming the list. | 1226 // Otherwise, there was something there; try claiming the list. |
1168 oop prefix = (oop)Atomic::xchg_ptr(NULL, &_overflow_list); | 1227 oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); |
1169 | |
1170 if (prefix == NULL) { | |
1171 return false; | |
1172 } | |
1173 // Trim off a prefix of at most objsFromOverflow items | 1228 // Trim off a prefix of at most objsFromOverflow items |
1174 int i = 1; | 1229 Thread* tid = Thread::current(); |
1230 size_t spin_count = (size_t)ParallelGCThreads; | |
1231 size_t sleep_time_millis = MAX2((size_t)1, objsFromOverflow/100); | |
1232 for (size_t spin = 0; prefix == BUSY && spin < spin_count; spin++) { | |
1233 // someone grabbed it before we did ... | |
1234 // ... we spin for a short while... | |
1235 os::sleep(tid, sleep_time_millis, false); | |
1236 if (_overflow_list == NULL) { | |
1237 // nothing left to take | |
1238 return false; | |
1239 } else if (_overflow_list != BUSY) { | |
1240 // try and grab the prefix | |
1241 prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); | |
1242 } | |
1243 } | |
1244 if (prefix == NULL || prefix == BUSY) { | |
1245 // Nothing to take or waited long enough | |
1246 if (prefix == NULL) { | |
1247 // Write back the NULL in case we overwrote it with BUSY above | |
1248 // and it is still the same value. | |
1249 (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY); | |
1250 } | |
1251 return false; | |
1252 } | |
1253 assert(prefix != NULL && prefix != BUSY, "Error"); | |
1254 size_t i = 1; | |
1175 oop cur = prefix; | 1255 oop cur = prefix; |
1176 while (i < objsFromOverflow && cur->klass_or_null() != NULL) { | 1256 while (i < objsFromOverflow && cur->klass_or_null() != NULL) { |
1177 i++; cur = oop(cur->klass()); | 1257 i++; cur = oop(cur->klass()); |
1178 } | 1258 } |
1179 | 1259 |
1180 // Reattach remaining (suffix) to overflow list | 1260 // Reattach remaining (suffix) to overflow list |
1181 if (cur->klass_or_null() != NULL) { | 1261 if (cur->klass_or_null() == NULL) { |
1182 oop suffix = oop(cur->klass()); | 1262 // Write back the NULL in lieu of the BUSY we wrote |
1183 cur->set_klass_to_list_ptr(NULL); | 1263 // above and it is still the same value. |
1184 | 1264 if (_overflow_list == BUSY) { |
1185 // Find last item of suffix list | 1265 (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY); |
1186 oop last = suffix; | 1266 } |
1187 while (last->klass_or_null() != NULL) { | 1267 } else { |
1188 last = oop(last->klass()); | 1268 assert(cur->klass_or_null() != BUSY, "Error"); |
1189 } | 1269 oop suffix = oop(cur->klass()); // suffix will be put back on global list |
1190 // Atomically prepend suffix to current overflow list | 1270 cur->set_klass_to_list_ptr(NULL); // break off suffix |
1191 oop cur_overflow_list = _overflow_list; | 1271 // It's possible that the list is still in the empty(busy) state |
1192 while (true) { | 1272 // we left it in a short while ago; in that case we may be |
1193 last->set_klass_to_list_ptr(cur_overflow_list); | 1273 // able to place back the suffix. |
1194 oop observed_overflow_list = | 1274 oop observed_overflow_list = _overflow_list; |
1195 (oop)Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list); | 1275 oop cur_overflow_list = observed_overflow_list; |
1196 if (observed_overflow_list == cur_overflow_list) break; | 1276 bool attached = false; |
1197 // Otherwise... | 1277 while (observed_overflow_list == BUSY || observed_overflow_list == NULL) { |
1198 cur_overflow_list = observed_overflow_list; | 1278 observed_overflow_list = |
1279 (oop) Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list); | |
1280 if (cur_overflow_list == observed_overflow_list) { | |
1281 attached = true; | |
1282 break; | |
1283 } else cur_overflow_list = observed_overflow_list; | |
1284 } | |
1285 if (!attached) { | |
1286 // Too bad, someone else got in in between; we'll need to do a splice. | |
1287 // Find the last item of suffix list | |
1288 oop last = suffix; | |
1289 while (last->klass_or_null() != NULL) { | |
1290 last = oop(last->klass()); | |
1291 } | |
1292 // Atomically prepend suffix to current overflow list | |
1293 observed_overflow_list = _overflow_list; | |
1294 do { | |
1295 cur_overflow_list = observed_overflow_list; | |
1296 if (cur_overflow_list != BUSY) { | |
1297 // Do the splice ... | |
1298 last->set_klass_to_list_ptr(cur_overflow_list); | |
1299 } else { // cur_overflow_list == BUSY | |
1300 last->set_klass_to_list_ptr(NULL); | |
1301 } | |
1302 observed_overflow_list = | |
1303 (oop)Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list); | |
1304 } while (cur_overflow_list != observed_overflow_list); | |
1199 } | 1305 } |
1200 } | 1306 } |
1201 | 1307 |
1202 // Push objects on prefix list onto this thread's work queue | 1308 // Push objects on prefix list onto this thread's work queue |
1203 assert(cur != NULL, "program logic"); | 1309 assert(prefix != NULL && prefix != BUSY, "program logic"); |
1204 cur = prefix; | 1310 cur = prefix; |
1205 int n = 0; | 1311 ssize_t n = 0; |
1206 while (cur != NULL) { | 1312 while (cur != NULL) { |
1207 oop obj_to_push = cur->forwardee(); | 1313 oop obj_to_push = cur->forwardee(); |
1208 oop next = oop(cur->klass_or_null()); | 1314 oop next = oop(cur->klass_or_null()); |
1209 cur->set_klass(obj_to_push->klass()); | 1315 cur->set_klass(obj_to_push->klass()); |
1210 if (par_scan_state->should_be_partially_scanned(obj_to_push, cur)) { | 1316 // This may be an array object that is self-forwarded. In that case, the list pointer |
1317 // space, cur, is not in the Java heap, but rather in the C-heap and should be freed. | |
1318 if (!is_in_reserved(cur)) { | |
1319 // This can become a scaling bottleneck when there is work queue overflow coincident | |
1320 // with promotion failure. | |
1321 oopDesc* f = cur; | |
1322 FREE_C_HEAP_ARRAY(oopDesc, f); | |
1323 } else if (par_scan_state->should_be_partially_scanned(obj_to_push, cur)) { | |
1324 assert(arrayOop(cur)->length() == 0, "entire array remaining to be scanned"); | |
1211 obj_to_push = cur; | 1325 obj_to_push = cur; |
1212 assert(arrayOop(cur)->length() == 0, "entire array remaining to be scanned"); | 1326 } |
1213 } | 1327 bool ok = work_q->push(obj_to_push); |
1214 work_q->push(obj_to_push); | 1328 assert(ok, "Should have succeeded"); |
1215 cur = next; | 1329 cur = next; |
1216 n++; | 1330 n++; |
1217 } | 1331 } |
1218 par_scan_state->note_overflow_refill(n); | 1332 par_scan_state->note_overflow_refill(n); |
1333 #ifndef PRODUCT | |
1334 assert(_num_par_pushes >= n, "Too many pops?"); | |
1335 Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes); | |
1336 #endif | |
1219 return true; | 1337 return true; |
1220 } | 1338 } |
1339 #undef BUSY | |
1221 | 1340 |
1222 void ParNewGeneration::ref_processor_init() | 1341 void ParNewGeneration::ref_processor_init() |
1223 { | 1342 { |
1224 if (_ref_processor == NULL) { | 1343 if (_ref_processor == NULL) { |
1225 // Allocate and initialize a reference processor | 1344 // Allocate and initialize a reference processor |