comparison src/share/vm/gc_implementation/parNew/parNewGeneration.cpp @ 679:cea947c8a988

6819891: ParNew: Fix work queue overflow code to deal correctly with +UseCompressedOops Summary: When using compressed oops, rather than chaining the overflowed grey objects' pre-images through their klass words, we use GC-worker thread-local overflow stacks. Reviewed-by: jcoomes, jmasa
author ysr
date Sat, 28 Mar 2009 15:47:29 -0700
parents 0fbdb4381b99
children becb17ad5e51
comparison
equal deleted inserted replaced
678:4ac7d97e6101 679:cea947c8a988
34 Generation* old_gen_, 34 Generation* old_gen_,
35 int thread_num_, 35 int thread_num_,
36 ObjToScanQueueSet* work_queue_set_, 36 ObjToScanQueueSet* work_queue_set_,
37 size_t desired_plab_sz_, 37 size_t desired_plab_sz_,
38 ParallelTaskTerminator& term_) : 38 ParallelTaskTerminator& term_) :
39 _to_space(to_space_), _old_gen(old_gen_), _thread_num(thread_num_), 39 _to_space(to_space_), _old_gen(old_gen_), _young_gen(gen_), _thread_num(thread_num_),
40 _work_queue(work_queue_set_->queue(thread_num_)), _to_space_full(false), 40 _work_queue(work_queue_set_->queue(thread_num_)), _to_space_full(false),
41 _ageTable(false), // false ==> not the global age table, no perf data. 41 _ageTable(false), // false ==> not the global age table, no perf data.
42 _to_space_alloc_buffer(desired_plab_sz_), 42 _to_space_alloc_buffer(desired_plab_sz_),
43 _to_space_closure(gen_, this), _old_gen_closure(gen_, this), 43 _to_space_closure(gen_, this), _old_gen_closure(gen_, this),
44 _to_space_root_closure(gen_, this), _old_gen_root_closure(gen_, this), 44 _to_space_root_closure(gen_, this), _old_gen_root_closure(gen_, this),
55 (ChunkArray*) old_gen()->get_data_recorder(thread_num()); 55 (ChunkArray*) old_gen()->get_data_recorder(thread_num());
56 _hash_seed = 17; // Might want to take time-based random value. 56 _hash_seed = 17; // Might want to take time-based random value.
57 _start = os::elapsedTime(); 57 _start = os::elapsedTime();
58 _old_gen_closure.set_generation(old_gen_); 58 _old_gen_closure.set_generation(old_gen_);
59 _old_gen_root_closure.set_generation(old_gen_); 59 _old_gen_root_closure.set_generation(old_gen_);
60 if (UseCompressedOops) {
61 _overflow_stack = new (ResourceObj::C_HEAP) GrowableArray<oop>(512, true);
62 } else {
63 _overflow_stack = NULL;
64 }
60 } 65 }
61 #ifdef _MSC_VER 66 #ifdef _MSC_VER
62 #pragma warning( pop ) 67 #pragma warning( pop )
63 #endif 68 #endif
64 69
79 84
80 void ParScanThreadState::scan_partial_array_and_push_remainder(oop old) { 85 void ParScanThreadState::scan_partial_array_and_push_remainder(oop old) {
81 assert(old->is_objArray(), "must be obj array"); 86 assert(old->is_objArray(), "must be obj array");
82 assert(old->is_forwarded(), "must be forwarded"); 87 assert(old->is_forwarded(), "must be forwarded");
83 assert(Universe::heap()->is_in_reserved(old), "must be in heap."); 88 assert(Universe::heap()->is_in_reserved(old), "must be in heap.");
84 assert(!_old_gen->is_in(old), "must be in young generation."); 89 assert(!old_gen()->is_in(old), "must be in young generation.");
85 90
86 objArrayOop obj = objArrayOop(old->forwardee()); 91 objArrayOop obj = objArrayOop(old->forwardee());
87 // Process ParGCArrayScanChunk elements now 92 // Process ParGCArrayScanChunk elements now
88 // and push the remainder back onto queue 93 // and push the remainder back onto queue
89 int start = arrayOop(old)->length(); 94 int start = arrayOop(old)->length();
117 } 122 }
118 123
119 124
120 void ParScanThreadState::trim_queues(int max_size) { 125 void ParScanThreadState::trim_queues(int max_size) {
121 ObjToScanQueue* queue = work_queue(); 126 ObjToScanQueue* queue = work_queue();
122 while (queue->size() > (juint)max_size) { 127 do {
123 oop obj_to_scan; 128 while (queue->size() > (juint)max_size) {
124 if (queue->pop_local(obj_to_scan)) { 129 oop obj_to_scan;
125 note_pop(); 130 if (queue->pop_local(obj_to_scan)) {
126 131 note_pop();
127 if ((HeapWord *)obj_to_scan < young_old_boundary()) { 132 if ((HeapWord *)obj_to_scan < young_old_boundary()) {
128 if (obj_to_scan->is_objArray() && 133 if (obj_to_scan->is_objArray() &&
129 obj_to_scan->is_forwarded() && 134 obj_to_scan->is_forwarded() &&
130 obj_to_scan->forwardee() != obj_to_scan) { 135 obj_to_scan->forwardee() != obj_to_scan) {
131 scan_partial_array_and_push_remainder(obj_to_scan); 136 scan_partial_array_and_push_remainder(obj_to_scan);
137 } else {
138 // object is in to_space
139 obj_to_scan->oop_iterate(&_to_space_closure);
140 }
132 } else { 141 } else {
133 // object is in to_space 142 // object is in old generation
134 obj_to_scan->oop_iterate(&_to_space_closure); 143 obj_to_scan->oop_iterate(&_old_gen_closure);
135 } 144 }
136 } else {
137 // object is in old generation
138 obj_to_scan->oop_iterate(&_old_gen_closure);
139 } 145 }
140 } 146 }
141 } 147 // For the case of compressed oops, we have a private, non-shared
148 // overflow stack, so we eagerly drain it so as to more evenly
149 // distribute load early. Note: this may be good to do in
150 // general rather than delay for the final stealing phase.
151 // If applicable, we'll transfer a set of objects over to our
152 // work queue, allowing them to be stolen and draining our
153 // private overflow stack.
154 } while (ParGCTrimOverflow && young_gen()->take_from_overflow_list(this));
155 }
156
157 bool ParScanThreadState::take_from_overflow_stack() {
158 assert(UseCompressedOops, "Else should not call");
159 assert(young_gen()->overflow_list() == NULL, "Error");
160 ObjToScanQueue* queue = work_queue();
161 GrowableArray<oop>* of_stack = overflow_stack();
162 uint num_overflow_elems = of_stack->length();
163 uint num_take_elems = MIN2(MIN2((queue->max_elems() - queue->size())/4,
164 (juint)ParGCDesiredObjsFromOverflowList),
165 num_overflow_elems);
166 // Transfer the most recent num_take_elems from the overflow
167 // stack to our work queue.
168 for (size_t i = 0; i != num_take_elems; i++) {
169 oop cur = of_stack->pop();
170 oop obj_to_push = cur->forwardee();
171 assert(Universe::heap()->is_in_reserved(cur), "Should be in heap");
172 assert(!old_gen()->is_in_reserved(cur), "Should be in young gen");
173 assert(Universe::heap()->is_in_reserved(obj_to_push), "Should be in heap");
174 if (should_be_partially_scanned(obj_to_push, cur)) {
175 assert(arrayOop(cur)->length() == 0, "entire array remaining to be scanned");
176 obj_to_push = cur;
177 }
178 bool ok = queue->push(obj_to_push);
179 assert(ok, "Should have succeeded");
180 }
181 assert(young_gen()->overflow_list() == NULL, "Error");
182 return num_take_elems > 0; // was something transferred?
183 }
184
185 void ParScanThreadState::push_on_overflow_stack(oop p) {
186 assert(UseCompressedOops, "Else should not call");
187 overflow_stack()->push(p);
188 assert(young_gen()->overflow_list() == NULL, "Error");
142 } 189 }
143 190
144 HeapWord* ParScanThreadState::alloc_in_to_space_slow(size_t word_sz) { 191 HeapWord* ParScanThreadState::alloc_in_to_space_slow(size_t word_sz) {
145 192
146 // Otherwise, if the object is small enough, try to reallocate the 193 // Otherwise, if the object is small enough, try to reallocate the
423 // Since this is being done in a separate thread, need new resource 470 // Since this is being done in a separate thread, need new resource
424 // and handle marks. 471 // and handle marks.
425 ResourceMark rm; 472 ResourceMark rm;
426 HandleMark hm; 473 HandleMark hm;
427 // We would need multiple old-gen queues otherwise. 474 // We would need multiple old-gen queues otherwise.
428 guarantee(gch->n_gens() == 2, 475 assert(gch->n_gens() == 2, "Par young collection currently only works with one older gen.");
429 "Par young collection currently only works with one older gen.");
430 476
431 Generation* old_gen = gch->next_gen(_gen); 477 Generation* old_gen = gch->next_gen(_gen);
432 478
433 ParScanThreadState& par_scan_state = _state_set->thread_sate(i); 479 ParScanThreadState& par_scan_state = _state_set->thread_sate(i);
434 par_scan_state.set_young_old_boundary(_young_old_boundary); 480 par_scan_state.set_young_old_boundary(_young_old_boundary);
1167 return false; 1213 return false;
1168 } 1214 }
1169 } 1215 }
1170 #endif 1216 #endif
1171 1217
1218 // In case we are using compressed oops, we need to be careful.
1219 // If the object being pushed is an object array, then its length
1220 // field keeps track of the "grey boundary" at which the next
1221 // incremental scan will be done (see ParGCArrayScanChunk).
1222 // When using compressed oops, this length field is kept in the
1223 // lower 32 bits of the erstwhile klass word and cannot be used
1224 // for the overflow chaining pointer (OCP below). As such the OCP
1225 // would itself need to be compressed into the top 32-bits in this
1226 // case. Unfortunately, see below, in the event that we have a
1227 // promotion failure, the node to be pushed on the list can be
1228 // outside of the Java heap, so the heap-based pointer compression
1229 // would not work (we would have potential aliasing between C-heap
1230 // and Java-heap pointers). For this reason, when using compressed
1231 // oops, we simply use a worker-thread-local, non-shared overflow
1232 // list in the form of a growable array, with a slightly different
1233 // overflow stack draining strategy. If/when we start using fat
1234 // stacks here, we can go back to using (fat) pointer chains
1235 // (although some performance comparisons would be useful since
1236 // single global lists have their own performance disadvantages
1237 // as we were made painfully aware not long ago, see 6786503).
1172 #define BUSY (oop(0x1aff1aff)) 1238 #define BUSY (oop(0x1aff1aff))
1173 void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) { 1239 void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) {
1174 // if the object has been forwarded to itself, then we cannot 1240 assert(is_in_reserved(from_space_obj), "Should be from this generation");
1175 // use the klass pointer for the linked list. Instead we have 1241 if (UseCompressedOops) {
1176 // to allocate an oopDesc in the C-Heap and use that for the linked list. 1242 // In the case of compressed oops, we use a private, not-shared
1177 // XXX This is horribly inefficient when a promotion failure occurs 1243 // overflow stack.
1178 // and should be fixed. XXX FIX ME !!! 1244 par_scan_state->push_on_overflow_stack(from_space_obj);
1245 } else {
1246 // if the object has been forwarded to itself, then we cannot
1247 // use the klass pointer for the linked list. Instead we have
1248 // to allocate an oopDesc in the C-Heap and use that for the linked list.
1249 // XXX This is horribly inefficient when a promotion failure occurs
1250 // and should be fixed. XXX FIX ME !!!
1179 #ifndef PRODUCT 1251 #ifndef PRODUCT
1180 Atomic::inc_ptr(&_num_par_pushes); 1252 Atomic::inc_ptr(&_num_par_pushes);
1181 assert(_num_par_pushes > 0, "Tautology"); 1253 assert(_num_par_pushes > 0, "Tautology");
1182 #endif 1254 #endif
1183 if (from_space_obj->forwardee() == from_space_obj) { 1255 if (from_space_obj->forwardee() == from_space_obj) {
1184 oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1); 1256 oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1);
1185 listhead->forward_to(from_space_obj); 1257 listhead->forward_to(from_space_obj);
1186 from_space_obj = listhead; 1258 from_space_obj = listhead;
1187 } 1259 }
1188 oop observed_overflow_list = _overflow_list; 1260 oop observed_overflow_list = _overflow_list;
1189 oop cur_overflow_list; 1261 oop cur_overflow_list;
1190 do { 1262 do {
1191 cur_overflow_list = observed_overflow_list; 1263 cur_overflow_list = observed_overflow_list;
1192 if (cur_overflow_list != BUSY) { 1264 if (cur_overflow_list != BUSY) {
1193 from_space_obj->set_klass_to_list_ptr(cur_overflow_list); 1265 from_space_obj->set_klass_to_list_ptr(cur_overflow_list);
1194 } else { 1266 } else {
1195 from_space_obj->set_klass_to_list_ptr(NULL); 1267 from_space_obj->set_klass_to_list_ptr(NULL);
1196 } 1268 }
1197 observed_overflow_list = 1269 observed_overflow_list =
1198 (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list); 1270 (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list);
1199 } while (cur_overflow_list != observed_overflow_list); 1271 } while (cur_overflow_list != observed_overflow_list);
1200 } 1272 }
1273 }
1274
1275 bool ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) {
1276 bool res;
1277
1278 if (UseCompressedOops) {
1279 res = par_scan_state->take_from_overflow_stack();
1280 } else {
1281 res = take_from_overflow_list_work(par_scan_state);
1282 }
1283 return res;
1284 }
1285
1201 1286
1202 // *NOTE*: The overflow list manipulation code here and 1287 // *NOTE*: The overflow list manipulation code here and
1203 // in CMSCollector:: are very similar in shape, 1288 // in CMSCollector:: are very similar in shape,
1204 // except that in the CMS case we thread the objects 1289 // except that in the CMS case we thread the objects
1205 // directly into the list via their mark word, and do 1290 // directly into the list via their mark word, and do
1211 // Because of the common code, if you make any changes in 1296 // Because of the common code, if you make any changes in
1212 // the code below, please check the CMS version to see if 1297 // the code below, please check the CMS version to see if
1213 // similar changes might be needed. 1298 // similar changes might be needed.
1214 // See CMSCollector::par_take_from_overflow_list() for 1299 // See CMSCollector::par_take_from_overflow_list() for
1215 // more extensive documentation comments. 1300 // more extensive documentation comments.
1216 bool 1301 bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan_state) {
1217 ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) {
1218 ObjToScanQueue* work_q = par_scan_state->work_queue(); 1302 ObjToScanQueue* work_q = par_scan_state->work_queue();
1219 assert(work_q->size() == 0, "Should first empty local work queue");
1220 // How many to take? 1303 // How many to take?
1221 size_t objsFromOverflow = MIN2((size_t)work_q->max_elems()/4, 1304 size_t objsFromOverflow = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
1222 (size_t)ParGCDesiredObjsFromOverflowList); 1305 (size_t)ParGCDesiredObjsFromOverflowList);
1223 1306
1307 assert(par_scan_state->overflow_stack() == NULL, "Error");
1224 if (_overflow_list == NULL) return false; 1308 if (_overflow_list == NULL) return false;
1225 1309
1226 // Otherwise, there was something there; try claiming the list. 1310 // Otherwise, there was something there; try claiming the list.
1227 oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); 1311 oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
1228 // Trim off a prefix of at most objsFromOverflow items 1312 // Trim off a prefix of at most objsFromOverflow items