comparison src/share/vm/gc_implementation/parNew/parNewGeneration.cpp @ 534:5cfd8d19e546

6786503: Overflow list performance can be improved Summary: Avoid overflow list walk in CMS & ParNew when it is unnecessary. Fix a couple of correctness issues, including a C-heap leak, in ParNew at the intersection of promotion failure, work queue overflow and object array chunking. Add stress testing option and related assertion checking. Reviewed-by: jmasa
author ysr
date Mon, 26 Jan 2009 12:47:21 -0800
parents 7d7a7c599c17
children 0fbdb4381b99
comparison
equal deleted inserted replaced
527:2b1de1db9a9d 534:5cfd8d19e546
402 // Otherwise, offer termination. 402 // Otherwise, offer termination.
403 par_scan_state()->start_term_time(); 403 par_scan_state()->start_term_time();
404 if (terminator()->offer_termination()) break; 404 if (terminator()->offer_termination()) break;
405 par_scan_state()->end_term_time(); 405 par_scan_state()->end_term_time();
406 } 406 }
407 assert(par_gen()->_overflow_list == NULL && par_gen()->_num_par_pushes == 0,
408 "Broken overflow list?");
407 // Finish the last termination pause. 409 // Finish the last termination pause.
408 par_scan_state()->end_term_time(); 410 par_scan_state()->end_term_time();
409 } 411 }
410 412
411 ParNewGenTask::ParNewGenTask(ParNewGeneration* gen, Generation* next_gen, 413 ParNewGenTask::ParNewGenTask(ParNewGeneration* gen, Generation* next_gen,
454 : DefNewGeneration(rs, initial_byte_size, level, "PCopy"), 456 : DefNewGeneration(rs, initial_byte_size, level, "PCopy"),
455 _overflow_list(NULL), 457 _overflow_list(NULL),
456 _is_alive_closure(this), 458 _is_alive_closure(this),
457 _plab_stats(YoungPLABSize, PLABWeight) 459 _plab_stats(YoungPLABSize, PLABWeight)
458 { 460 {
461 NOT_PRODUCT(_overflow_counter = ParGCWorkQueueOverflowInterval;)
462 NOT_PRODUCT(_num_par_pushes = 0;)
459 _task_queues = new ObjToScanQueueSet(ParallelGCThreads); 463 _task_queues = new ObjToScanQueueSet(ParallelGCThreads);
460 guarantee(_task_queues != NULL, "task_queues allocation failure."); 464 guarantee(_task_queues != NULL, "task_queues allocation failure.");
461 465
462 for (uint i1 = 0; i1 < ParallelGCThreads; i1++) { 466 for (uint i1 = 0; i1 < ParallelGCThreads; i1++) {
463 ObjToScanQueuePadded *q_padded = new ObjToScanQueuePadded(); 467 ObjToScanQueuePadded *q_padded = new ObjToScanQueuePadded();
991 obj_to_push = old; 995 obj_to_push = old;
992 assert(obj_to_push->is_forwarded() && obj_to_push->forwardee() != obj_to_push, 996 assert(obj_to_push->is_forwarded() && obj_to_push->forwardee() != obj_to_push,
993 "push forwarded object"); 997 "push forwarded object");
994 } 998 }
995 // Push it on one of the queues of to-be-scanned objects. 999 // Push it on one of the queues of to-be-scanned objects.
996 if (!par_scan_state->work_queue()->push(obj_to_push)) { 1000 bool simulate_overflow = false;
1001 NOT_PRODUCT(
1002 if (ParGCWorkQueueOverflowALot && should_simulate_overflow()) {
1003 // simulate a stack overflow
1004 simulate_overflow = true;
1005 }
1006 )
1007 if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) {
997 // Add stats for overflow pushes. 1008 // Add stats for overflow pushes.
998 if (Verbose && PrintGCDetails) { 1009 if (Verbose && PrintGCDetails) {
999 gclog_or_tty->print("queue overflow!\n"); 1010 gclog_or_tty->print("queue overflow!\n");
1000 } 1011 }
1001 push_on_overflow_list(old); 1012 push_on_overflow_list(old, par_scan_state);
1002 par_scan_state->note_overflow_push(); 1013 par_scan_state->note_overflow_push();
1003 } 1014 }
1004 par_scan_state->note_push(); 1015 par_scan_state->note_push();
1005 1016
1006 return new_obj; 1017 return new_obj;
1108 obj_to_push = old; 1119 obj_to_push = old;
1109 assert(obj_to_push->is_forwarded() && obj_to_push->forwardee() != obj_to_push, 1120 assert(obj_to_push->is_forwarded() && obj_to_push->forwardee() != obj_to_push,
1110 "push forwarded object"); 1121 "push forwarded object");
1111 } 1122 }
1112 // Push it on one of the queues of to-be-scanned objects. 1123 // Push it on one of the queues of to-be-scanned objects.
1113 if (!par_scan_state->work_queue()->push(obj_to_push)) { 1124 bool simulate_overflow = false;
1125 NOT_PRODUCT(
1126 if (ParGCWorkQueueOverflowALot && should_simulate_overflow()) {
1127 // simulate a stack overflow
1128 simulate_overflow = true;
1129 }
1130 )
1131 if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) {
1114 // Add stats for overflow pushes. 1132 // Add stats for overflow pushes.
1115 push_on_overflow_list(old); 1133 push_on_overflow_list(old, par_scan_state);
1116 par_scan_state->note_overflow_push(); 1134 par_scan_state->note_overflow_push();
1117 } 1135 }
1118 par_scan_state->note_push(); 1136 par_scan_state->note_push();
1119 1137
1120 return new_obj; 1138 return new_obj;
1133 } 1151 }
1134 1152
1135 return forward_ptr; 1153 return forward_ptr;
1136 } 1154 }
1137 1155
1138 void ParNewGeneration::push_on_overflow_list(oop from_space_obj) { 1156 #ifndef PRODUCT
1139 oop cur_overflow_list = _overflow_list; 1157 // It's OK to call this multi-threaded; the worst thing
1158 // that can happen is that we'll get a bunch of closely
1159 // spaced simulated oveflows, but that's OK, in fact
1160 // probably good as it would exercise the overflow code
1161 // under contention.
1162 bool ParNewGeneration::should_simulate_overflow() {
1163 if (_overflow_counter-- <= 0) { // just being defensive
1164 _overflow_counter = ParGCWorkQueueOverflowInterval;
1165 return true;
1166 } else {
1167 return false;
1168 }
1169 }
1170 #endif
1171
1172 #define BUSY (oop(0x1aff1aff))
1173 void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) {
1140 // if the object has been forwarded to itself, then we cannot 1174 // if the object has been forwarded to itself, then we cannot
1141 // use the klass pointer for the linked list. Instead we have 1175 // use the klass pointer for the linked list. Instead we have
1142 // to allocate an oopDesc in the C-Heap and use that for the linked list. 1176 // to allocate an oopDesc in the C-Heap and use that for the linked list.
1177 // XXX This is horribly inefficient when a promotion failure occurs
1178 // and should be fixed. XXX FIX ME !!!
1179 #ifndef PRODUCT
1180 Atomic::inc_ptr(&_num_par_pushes);
1181 assert(_num_par_pushes > 0, "Tautology");
1182 #endif
1143 if (from_space_obj->forwardee() == from_space_obj) { 1183 if (from_space_obj->forwardee() == from_space_obj) {
1144 oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1); 1184 oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1);
1145 listhead->forward_to(from_space_obj); 1185 listhead->forward_to(from_space_obj);
1146 from_space_obj = listhead; 1186 from_space_obj = listhead;
1147 } 1187 }
1148 while (true) { 1188 oop observed_overflow_list = _overflow_list;
1149 from_space_obj->set_klass_to_list_ptr(cur_overflow_list); 1189 oop cur_overflow_list;
1150 oop observed_overflow_list = 1190 do {
1191 cur_overflow_list = observed_overflow_list;
1192 if (cur_overflow_list != BUSY) {
1193 from_space_obj->set_klass_to_list_ptr(cur_overflow_list);
1194 } else {
1195 from_space_obj->set_klass_to_list_ptr(NULL);
1196 }
1197 observed_overflow_list =
1151 (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list); 1198 (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list);
1152 if (observed_overflow_list == cur_overflow_list) break; 1199 } while (cur_overflow_list != observed_overflow_list);
1153 // Otherwise... 1200 }
1154 cur_overflow_list = observed_overflow_list; 1201
1155 } 1202 // *NOTE*: The overflow list manipulation code here and
1156 } 1203 // in CMSCollector:: are very similar in shape,
1157 1204 // except that in the CMS case we thread the objects
1205 // directly into the list via their mark word, and do
1206 // not need to deal with special cases below related
1207 // to chunking of object arrays and promotion failure
1208 // handling.
1209 // CR 6797058 has been filed to attempt consolidation of
1210 // the common code.
1211 // Because of the common code, if you make any changes in
1212 // the code below, please check the CMS version to see if
1213 // similar changes might be needed.
1214 // See CMSCollector::par_take_from_overflow_list() for
1215 // more extensive documentation comments.
1158 bool 1216 bool
1159 ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) { 1217 ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) {
1160 ObjToScanQueue* work_q = par_scan_state->work_queue(); 1218 ObjToScanQueue* work_q = par_scan_state->work_queue();
1219 assert(work_q->size() == 0, "Should first empty local work queue");
1161 // How many to take? 1220 // How many to take?
1162 int objsFromOverflow = MIN2(work_q->max_elems()/4, 1221 size_t objsFromOverflow = MIN2((size_t)work_q->max_elems()/4,
1163 (juint)ParGCDesiredObjsFromOverflowList); 1222 (size_t)ParGCDesiredObjsFromOverflowList);
1164 1223
1165 if (_overflow_list == NULL) return false; 1224 if (_overflow_list == NULL) return false;
1166 1225
1167 // Otherwise, there was something there; try claiming the list. 1226 // Otherwise, there was something there; try claiming the list.
1168 oop prefix = (oop)Atomic::xchg_ptr(NULL, &_overflow_list); 1227 oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
1169
1170 if (prefix == NULL) {
1171 return false;
1172 }
1173 // Trim off a prefix of at most objsFromOverflow items 1228 // Trim off a prefix of at most objsFromOverflow items
1174 int i = 1; 1229 Thread* tid = Thread::current();
1230 size_t spin_count = (size_t)ParallelGCThreads;
1231 size_t sleep_time_millis = MAX2((size_t)1, objsFromOverflow/100);
1232 for (size_t spin = 0; prefix == BUSY && spin < spin_count; spin++) {
1233 // someone grabbed it before we did ...
1234 // ... we spin for a short while...
1235 os::sleep(tid, sleep_time_millis, false);
1236 if (_overflow_list == NULL) {
1237 // nothing left to take
1238 return false;
1239 } else if (_overflow_list != BUSY) {
1240 // try and grab the prefix
1241 prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
1242 }
1243 }
1244 if (prefix == NULL || prefix == BUSY) {
1245 // Nothing to take or waited long enough
1246 if (prefix == NULL) {
1247 // Write back the NULL in case we overwrote it with BUSY above
1248 // and it is still the same value.
1249 (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
1250 }
1251 return false;
1252 }
1253 assert(prefix != NULL && prefix != BUSY, "Error");
1254 size_t i = 1;
1175 oop cur = prefix; 1255 oop cur = prefix;
1176 while (i < objsFromOverflow && cur->klass_or_null() != NULL) { 1256 while (i < objsFromOverflow && cur->klass_or_null() != NULL) {
1177 i++; cur = oop(cur->klass()); 1257 i++; cur = oop(cur->klass());
1178 } 1258 }
1179 1259
1180 // Reattach remaining (suffix) to overflow list 1260 // Reattach remaining (suffix) to overflow list
1181 if (cur->klass_or_null() != NULL) { 1261 if (cur->klass_or_null() == NULL) {
1182 oop suffix = oop(cur->klass()); 1262 // Write back the NULL in lieu of the BUSY we wrote
1183 cur->set_klass_to_list_ptr(NULL); 1263 // above and it is still the same value.
1184 1264 if (_overflow_list == BUSY) {
1185 // Find last item of suffix list 1265 (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
1186 oop last = suffix; 1266 }
1187 while (last->klass_or_null() != NULL) { 1267 } else {
1188 last = oop(last->klass()); 1268 assert(cur->klass_or_null() != BUSY, "Error");
1189 } 1269 oop suffix = oop(cur->klass()); // suffix will be put back on global list
1190 // Atomically prepend suffix to current overflow list 1270 cur->set_klass_to_list_ptr(NULL); // break off suffix
1191 oop cur_overflow_list = _overflow_list; 1271 // It's possible that the list is still in the empty(busy) state
1192 while (true) { 1272 // we left it in a short while ago; in that case we may be
1193 last->set_klass_to_list_ptr(cur_overflow_list); 1273 // able to place back the suffix.
1194 oop observed_overflow_list = 1274 oop observed_overflow_list = _overflow_list;
1195 (oop)Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list); 1275 oop cur_overflow_list = observed_overflow_list;
1196 if (observed_overflow_list == cur_overflow_list) break; 1276 bool attached = false;
1197 // Otherwise... 1277 while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
1198 cur_overflow_list = observed_overflow_list; 1278 observed_overflow_list =
1279 (oop) Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list);
1280 if (cur_overflow_list == observed_overflow_list) {
1281 attached = true;
1282 break;
1283 } else cur_overflow_list = observed_overflow_list;
1284 }
1285 if (!attached) {
1286 // Too bad, someone else got in in between; we'll need to do a splice.
1287 // Find the last item of suffix list
1288 oop last = suffix;
1289 while (last->klass_or_null() != NULL) {
1290 last = oop(last->klass());
1291 }
1292 // Atomically prepend suffix to current overflow list
1293 observed_overflow_list = _overflow_list;
1294 do {
1295 cur_overflow_list = observed_overflow_list;
1296 if (cur_overflow_list != BUSY) {
1297 // Do the splice ...
1298 last->set_klass_to_list_ptr(cur_overflow_list);
1299 } else { // cur_overflow_list == BUSY
1300 last->set_klass_to_list_ptr(NULL);
1301 }
1302 observed_overflow_list =
1303 (oop)Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list);
1304 } while (cur_overflow_list != observed_overflow_list);
1199 } 1305 }
1200 } 1306 }
1201 1307
1202 // Push objects on prefix list onto this thread's work queue 1308 // Push objects on prefix list onto this thread's work queue
1203 assert(cur != NULL, "program logic"); 1309 assert(prefix != NULL && prefix != BUSY, "program logic");
1204 cur = prefix; 1310 cur = prefix;
1205 int n = 0; 1311 ssize_t n = 0;
1206 while (cur != NULL) { 1312 while (cur != NULL) {
1207 oop obj_to_push = cur->forwardee(); 1313 oop obj_to_push = cur->forwardee();
1208 oop next = oop(cur->klass_or_null()); 1314 oop next = oop(cur->klass_or_null());
1209 cur->set_klass(obj_to_push->klass()); 1315 cur->set_klass(obj_to_push->klass());
1210 if (par_scan_state->should_be_partially_scanned(obj_to_push, cur)) { 1316 // This may be an array object that is self-forwarded. In that case, the list pointer
1317 // space, cur, is not in the Java heap, but rather in the C-heap and should be freed.
1318 if (!is_in_reserved(cur)) {
1319 // This can become a scaling bottleneck when there is work queue overflow coincident
1320 // with promotion failure.
1321 oopDesc* f = cur;
1322 FREE_C_HEAP_ARRAY(oopDesc, f);
1323 } else if (par_scan_state->should_be_partially_scanned(obj_to_push, cur)) {
1324 assert(arrayOop(cur)->length() == 0, "entire array remaining to be scanned");
1211 obj_to_push = cur; 1325 obj_to_push = cur;
1212 assert(arrayOop(cur)->length() == 0, "entire array remaining to be scanned"); 1326 }
1213 } 1327 bool ok = work_q->push(obj_to_push);
1214 work_q->push(obj_to_push); 1328 assert(ok, "Should have succeeded");
1215 cur = next; 1329 cur = next;
1216 n++; 1330 n++;
1217 } 1331 }
1218 par_scan_state->note_overflow_refill(n); 1332 par_scan_state->note_overflow_refill(n);
1333 #ifndef PRODUCT
1334 assert(_num_par_pushes >= n, "Too many pops?");
1335 Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);
1336 #endif
1219 return true; 1337 return true;
1220 } 1338 }
1339 #undef BUSY
1221 1340
1222 void ParNewGeneration::ref_processor_init() 1341 void ParNewGeneration::ref_processor_init()
1223 { 1342 {
1224 if (_ref_processor == NULL) { 1343 if (_ref_processor == NULL) {
1225 // Allocate and initialize a reference processor 1344 // Allocate and initialize a reference processor