comparison src/share/vm/gc_implementation/g1/sparsePRT.hpp @ 2173:97ba643ea3ed

7014261: G1: RSet-related failures Summary: A race between the concurrent cleanup thread and the VM thread while it is processing the "expanded sparse table list" causes both threads to try to free the same sparse table entry and either causes one of the threads to fail or leaves the entry in an inconsistent state. The solution is purge all entries on the expanded list that correspond go regions that are being cleaned up. Reviewed-by: brutisso, johnc
author tonyp
date Tue, 25 Jan 2011 17:58:19 -0500
parents f95d63e2154a
children d2a62e0f25eb
comparison
equal deleted inserted replaced
2172:a672e43650cc 2173:97ba643ea3ed
1 /* 1 /*
2 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. 2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 * 4 *
5 * This code is free software; you can redistribute it and/or modify it 5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as 6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
210 210
211 // Concurrent accesss to a SparsePRT must be serialized by some external 211 // Concurrent accesss to a SparsePRT must be serialized by some external
212 // mutex. 212 // mutex.
213 213
214 class SparsePRTIter; 214 class SparsePRTIter;
215 class SparsePRTCleanupTask;
215 216
216 class SparsePRT VALUE_OBJ_CLASS_SPEC { 217 class SparsePRT VALUE_OBJ_CLASS_SPEC {
218 friend class SparsePRTCleanupTask;
219
217 // Iterations are done on the _cur hash table, since they only need to 220 // Iterations are done on the _cur hash table, since they only need to
218 // see entries visible at the start of a collection pause. 221 // see entries visible at the start of a collection pause.
219 // All other operations are done using the _next hash table. 222 // All other operations are done using the _next hash table.
220 RSHashTable* _cur; 223 RSHashTable* _cur;
221 RSHashTable* _next; 224 RSHashTable* _next;
235 238
236 SparsePRT* _next_expanded; 239 SparsePRT* _next_expanded;
237 240
238 SparsePRT* next_expanded() { return _next_expanded; } 241 SparsePRT* next_expanded() { return _next_expanded; }
239 void set_next_expanded(SparsePRT* nxt) { _next_expanded = nxt; } 242 void set_next_expanded(SparsePRT* nxt) { _next_expanded = nxt; }
243
244 bool should_be_on_expanded_list();
240 245
241 static SparsePRT* _head_expanded_list; 246 static SparsePRT* _head_expanded_list;
242 247
243 public: 248 public:
244 SparsePRT(HeapRegion* hr); 249 SparsePRT(HeapRegion* hr);
282 void init_iterator(SparsePRTIter* sprt_iter); 287 void init_iterator(SparsePRTIter* sprt_iter);
283 288
284 static void add_to_expanded_list(SparsePRT* sprt); 289 static void add_to_expanded_list(SparsePRT* sprt);
285 static SparsePRT* get_from_expanded_list(); 290 static SparsePRT* get_from_expanded_list();
286 291
292 // The purpose of these three methods is to help the GC workers
293 // during the cleanup pause to recreate the expanded list, purging
294 // any tables from it that belong to regions that are freed during
295 // cleanup (if we don't purge those tables, there is a race that
296 // causes various crashes; see CR 7014261).
297 //
298 // We chose to recreate the expanded list, instead of purging
299 // entries from it by iterating over it, to avoid this serial phase
300 // at the end of the cleanup pause.
301 //
302 // The three methods below work as follows:
303 // * reset_for_cleanup_tasks() : Nulls the expanded list head at the
304 // start of the cleanup pause.
305 // * do_cleanup_work() : Called by the cleanup workers for every
306 // region that is not free / is being freed by the cleanup
307 // pause. It creates a list of expanded tables whose head / tail
308 // are on the thread-local SparsePRTCleanupTask object.
309 // * finish_cleanup_task() : Called by the cleanup workers after
310 // they complete their cleanup task. It adds the local list into
311 // the global expanded list. It assumes that the
312 // ParGCRareEvent_lock is being held to ensure MT-safety.
313 static void reset_for_cleanup_tasks();
314 void do_cleanup_work(SparsePRTCleanupTask* sprt_cleanup_task);
315 static void finish_cleanup_task(SparsePRTCleanupTask* sprt_cleanup_task);
316
287 bool contains_card(RegionIdx_t region_id, CardIdx_t card_index) const { 317 bool contains_card(RegionIdx_t region_id, CardIdx_t card_index) const {
288 return _next->contains_card(region_id, card_index); 318 return _next->contains_card(region_id, card_index);
289 } 319 }
290 }; 320 };
291 321
292
293 class SparsePRTIter: public RSHashTableIter { 322 class SparsePRTIter: public RSHashTableIter {
294 public: 323 public:
295 void init(const SparsePRT* sprt) { 324 void init(const SparsePRT* sprt) {
296 RSHashTableIter::init(sprt->cur()); 325 RSHashTableIter::init(sprt->cur());
297 } 326 }
298 bool has_next(size_t& card_index) { 327 bool has_next(size_t& card_index) {
299 return RSHashTableIter::has_next(card_index); 328 return RSHashTableIter::has_next(card_index);
300 } 329 }
301 }; 330 };
302 331
332 // This allows each worker during a cleanup pause to create a
333 // thread-local list of sparse tables that have been expanded and need
334 // to be processed at the beginning of the next GC pause. This lists
335 // are concatenated into the single expanded list at the end of the
336 // cleanup pause.
337 class SparsePRTCleanupTask VALUE_OBJ_CLASS_SPEC {
338 private:
339 SparsePRT* _head;
340 SparsePRT* _tail;
341
342 public:
343 SparsePRTCleanupTask() : _head(NULL), _tail(NULL) { }
344
345 void add(SparsePRT* sprt);
346 SparsePRT* head() { return _head; }
347 SparsePRT* tail() { return _tail; }
348 };
349
303 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_SPARSEPRT_HPP 350 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_SPARSEPRT_HPP