# HG changeset patch # User ysr # Date 1279741542 25200 # Node ID 4f1fffe08c63230f6f2e080f530736b32c707647 # Parent 131ed9a23d484ec8577586337af810969098c453# Parent 5cbac8938c4c56eb1e87cbe38e377850301b9f4a Merge diff -r 131ed9a23d48 -r 4f1fffe08c63 src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Wed Jul 21 09:57:21 2010 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Wed Jul 21 12:45:42 2010 -0700 @@ -664,19 +664,14 @@ return; } - // XXX use a global constant instead of 64! - typedef struct OopTaskQueuePadded { - OopTaskQueue work_queue; - char pad[64 - sizeof(OopTaskQueue)]; // prevent false sharing - } OopTaskQueuePadded; - + typedef Padded PaddedOopTaskQueue; for (i = 0; i < num_queues; i++) { - OopTaskQueuePadded *q_padded = new OopTaskQueuePadded(); - if (q_padded == NULL) { + PaddedOopTaskQueue *q = new PaddedOopTaskQueue(); + if (q == NULL) { warning("work_queue allocation failure."); return; } - _task_queues->register_queue(i, &q_padded->work_queue); + _task_queues->register_queue(i, q); } for (i = 0; i < num_queues; i++) { _task_queues->queue(i)->initialize(); diff -r 131ed9a23d48 -r 4f1fffe08c63 src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Wed Jul 21 09:57:21 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Wed Jul 21 12:45:42 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -271,21 +271,16 @@ if (cas_res == prev_epoch_entry) { // We successfully updated the card num value in the epoch entry count_ptr->_count = 0; // initialize counter for new card num + jbyte* old_card_ptr = card_num_2_ptr(old_card_num); // Even though the region containg the card at old_card_num was not // in the young list when old_card_num was recorded in the epoch // cache it could have been added to the free list and subsequently - // added to the young list in the intervening time. If the evicted - // card is in a young region just return the card_ptr and the evicted - // card will not be cleaned. See CR 6817995. - - jbyte* old_card_ptr = card_num_2_ptr(old_card_num); - if (is_young_card(old_card_ptr)) { - *count = 0; - // We can defer the processing of card_ptr - *defer = true; - return card_ptr; - } + // added to the young list in the intervening time. See CR 6817995. + // We do not deal with this case here - it will be handled in + // HeapRegion::oops_on_card_seq_iterate_careful after it has been + // determined that the region containing the card has been allocated + // to, and it's safe to check the young type of the region. // We do not want to defer processing of card_ptr in this case // (we need to refine old_card_ptr and card_ptr) @@ -301,22 +296,22 @@ jbyte* cached_ptr = add_card_count(card_ptr, &count, defer); assert(cached_ptr != NULL, "bad cached card ptr"); - if (is_young_card(cached_ptr)) { - // The region containing cached_ptr has been freed during a clean up - // pause, reallocated, and tagged as young. - assert(cached_ptr != card_ptr, "shouldn't be"); + // We've just inserted a card pointer into the card count cache + // and got back the card that we just inserted or (evicted) the + // previous contents of that count slot. - // We've just inserted a new old-gen card pointer into the card count - // cache and evicted the previous contents of that count slot. - // The evicted card pointer has been determined to be in a young region - // and so cannot be the newly inserted card pointer (that will be - // in an old region). - // The count for newly inserted card will be set to zero during the - // insertion, so we don't want to defer the cleaning of the newly - // inserted card pointer. - assert(*defer == false, "deferring non-hot card"); - return NULL; - } + // The card we got back could be in a young region. When the + // returned card (if evicted) was originally inserted, we had + // determined that its containing region was not young. However + // it is possible for the region to be freed during a cleanup + // pause, then reallocated and tagged as young which will result + // in the returned card residing in a young region. + // + // We do not deal with this case here - the change from non-young + // to young could be observed at any time - it will be handled in + // HeapRegion::oops_on_card_seq_iterate_careful after it has been + // determined that the region containing the card has been allocated + // to. // The card pointer we obtained from card count cache is not hot // so do not store it in the cache; return it for immediate @@ -325,7 +320,7 @@ return cached_ptr; } - // Otherwise, the pointer we got from the _card_counts is hot. + // Otherwise, the pointer we got from the _card_counts cache is hot. jbyte* res = NULL; MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag); if (_n_hot == _hot_cache_size) { @@ -338,17 +333,8 @@ if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0; _n_hot++; - if (res != NULL) { - // Even though the region containg res was not in the young list - // when it was recorded in the hot cache it could have been added - // to the free list and subsequently added to the young list in - // the intervening time. If res is in a young region, return NULL - // so that res is not cleaned. See CR 6817995. - - if (is_young_card(res)) { - res = NULL; - } - } + // The card obtained from the hot card cache could be in a young + // region. See above on how this can happen. return res; } diff -r 131ed9a23d48 -r 4f1fffe08c63 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Wed Jul 21 09:57:21 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Wed Jul 21 12:45:42 2010 -0700 @@ -638,6 +638,11 @@ // Now retry the allocation. if (_cur_alloc_region != NULL) { + if (allocated_young_region != NULL) { + // We need to ensure that the store to top does not + // float above the setting of the young type. + OrderAccess::storestore(); + } res = _cur_alloc_region->allocate(word_size); } } diff -r 131ed9a23d48 -r 4f1fffe08c63 src/share/vm/gc_implementation/g1/g1RemSet.cpp --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp Wed Jul 21 09:57:21 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp Wed Jul 21 12:45:42 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -676,9 +676,27 @@ // We must complete this write before we do any of the reads below. OrderAccess::storeload(); // And process it, being careful of unallocated portions of TLAB's. + + // The region for the current card may be a young region. The + // current card may have been a card that was evicted from the + // card cache. When the card was inserted into the cache, we had + // determined that its region was non-young. While in the cache, + // the region may have been freed during a cleanup pause, reallocated + // and tagged as young. + // + // We wish to filter out cards for such a region but the current + // thread, if we're running conucrrently, may "see" the young type + // change at any time (so an earlier "is_young" check may pass or + // fail arbitrarily). We tell the iteration code to perform this + // filtering when it has been determined that there has been an actual + // allocation in this region and making it safe to check the young type. + bool filter_young = true; + HeapWord* stop_point = r->oops_on_card_seq_iterate_careful(dirtyRegion, - &filter_then_update_rs_oop_cl); + &filter_then_update_rs_oop_cl, + filter_young); + // If stop_point is non-null, then we encountered an unallocated region // (perhaps the unfilled portion of a TLAB.) For now, we'll dirty the // card and re-enqueue: if we put off the card until a GC pause, then the @@ -789,8 +807,14 @@ if (r == NULL) { assert(_g1->is_in_permanent(start), "Or else where?"); } else { - guarantee(!r->is_young(), "It was evicted in the current minor cycle."); - // Process card pointer we get back from the hot card cache + // Checking whether the region we got back from the cache + // is young here is inappropriate. The region could have been + // freed, reallocated and tagged as young while in the cache. + // Hence we could see its young type change at any time. + // + // Process card pointer we get back from the hot card cache. This + // will check whether the region containing the card is young + // _after_ checking that the region has been allocated from. concurrentRefineOneCard_impl(res, worker_i); } } diff -r 131ed9a23d48 -r 4f1fffe08c63 src/share/vm/gc_implementation/g1/heapRegion.cpp --- a/src/share/vm/gc_implementation/g1/heapRegion.cpp Wed Jul 21 09:57:21 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp Wed Jul 21 12:45:42 2010 -0700 @@ -658,7 +658,8 @@ HeapWord* HeapRegion:: oops_on_card_seq_iterate_careful(MemRegion mr, - FilterOutOfRegionClosure* cl) { + FilterOutOfRegionClosure* cl, + bool filter_young) { G1CollectedHeap* g1h = G1CollectedHeap::heap(); // If we're within a stop-world GC, then we might look at a card in a @@ -672,6 +673,16 @@ if (mr.is_empty()) return NULL; // Otherwise, find the obj that extends onto mr.start(). + // The intersection of the incoming mr (for the card) and the + // allocated part of the region is non-empty. This implies that + // we have actually allocated into this region. The code in + // G1CollectedHeap.cpp that allocates a new region sets the + // is_young tag on the region before allocating. Thus we + // safely know if this region is young. + if (is_young() && filter_young) { + return NULL; + } + // We used to use "block_start_careful" here. But we're actually happy // to update the BOT while we do this... HeapWord* cur = block_start(mr.start()); diff -r 131ed9a23d48 -r 4f1fffe08c63 src/share/vm/gc_implementation/g1/heapRegion.hpp --- a/src/share/vm/gc_implementation/g1/heapRegion.hpp Wed Jul 21 09:57:21 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp Wed Jul 21 12:45:42 2010 -0700 @@ -252,7 +252,7 @@ // survivor }; - YoungType _young_type; + volatile YoungType _young_type; int _young_index_in_cset; SurvRateGroup* _surv_rate_group; int _age_index; @@ -726,9 +726,12 @@ HeapWord* object_iterate_mem_careful(MemRegion mr, ObjectClosure* cl); + // In this version - if filter_young is true and the region + // is a young region then we skip the iteration. HeapWord* oops_on_card_seq_iterate_careful(MemRegion mr, - FilterOutOfRegionClosure* cl); + FilterOutOfRegionClosure* cl, + bool filter_young); // The region "mr" is entirely in "this", and starts and ends at block // boundaries. The caller declares that all the contained blocks are diff -r 131ed9a23d48 -r 4f1fffe08c63 src/share/vm/gc_implementation/parNew/parNewGeneration.cpp --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Wed Jul 21 09:57:21 2010 -0700 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Wed Jul 21 12:45:42 2010 -0700 @@ -539,10 +539,9 @@ guarantee(_task_queues != NULL, "task_queues allocation failure."); for (uint i1 = 0; i1 < ParallelGCThreads; i1++) { - ObjToScanQueuePadded *q_padded = new ObjToScanQueuePadded(); - guarantee(q_padded != NULL, "work_queue Allocation failure."); - - _task_queues->register_queue(i1, &q_padded->work_queue); + ObjToScanQueue *q = new ObjToScanQueue(); + guarantee(q != NULL, "work_queue Allocation failure."); + _task_queues->register_queue(i1, q); } for (uint i2 = 0; i2 < ParallelGCThreads; i2++) diff -r 131ed9a23d48 -r 4f1fffe08c63 src/share/vm/gc_implementation/parNew/parNewGeneration.hpp --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Wed Jul 21 09:57:21 2010 -0700 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Wed Jul 21 12:45:42 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,8 +33,8 @@ // but they must be here to allow ParScanClosure::do_oop_work to be defined // in genOopClosures.inline.hpp. -typedef OopTaskQueue ObjToScanQueue; -typedef OopTaskQueueSet ObjToScanQueueSet; +typedef Padded ObjToScanQueue; +typedef GenericTaskQueueSet ObjToScanQueueSet; // Enable this to get push/pop/steal stats. const int PAR_STATS_ENABLED = 0; @@ -304,12 +304,6 @@ friend class ParEvacuateFollowersClosure; private: - // XXX use a global constant instead of 64! - struct ObjToScanQueuePadded { - ObjToScanQueue work_queue; - char pad[64 - sizeof(ObjToScanQueue)]; // prevent false sharing - }; - // The per-worker-thread work queues ObjToScanQueueSet* _task_queues; diff -r 131ed9a23d48 -r 4f1fffe08c63 src/share/vm/gc_implementation/parNew/parOopClosures.hpp --- a/src/share/vm/gc_implementation/parNew/parOopClosures.hpp Wed Jul 21 09:57:21 2010 -0700 +++ b/src/share/vm/gc_implementation/parNew/parOopClosures.hpp Wed Jul 21 12:45:42 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,7 +26,8 @@ class ParScanThreadState; class ParNewGeneration; -typedef OopTaskQueueSet ObjToScanQueueSet; +typedef Padded ObjToScanQueue; +typedef GenericTaskQueueSet ObjToScanQueueSet; class ParallelTaskTerminator; class ParScanClosure: public OopsInGenClosure { diff -r 131ed9a23d48 -r 4f1fffe08c63 src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp Wed Jul 21 09:57:21 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.cpp Wed Jul 21 12:45:42 2010 -0700 @@ -90,10 +90,7 @@ } void PSPromotionManager::post_scavenge() { -#if PS_PM_STATS - print_stats(); -#endif // PS_PM_STATS - + TASKQUEUE_STATS_ONLY(if (PrintGCDetails && ParallelGCVerbose) print_stats()); for (uint i = 0; i < ParallelGCThreads + 1; i++) { PSPromotionManager* manager = manager_array(i); if (UseDepthFirstScavengeOrder) { @@ -105,37 +102,58 @@ } } -#if PS_PM_STATS - +#if TASKQUEUE_STATS void -PSPromotionManager::print_stats(uint i) { - tty->print_cr("---- GC Worker %2d Stats", i); - tty->print_cr(" total pushes %8d", _total_pushes); - tty->print_cr(" masked pushes %8d", _masked_pushes); - tty->print_cr(" overflow pushes %8d", _overflow_pushes); - tty->print_cr(" max overflow length %8d", _max_overflow_length); - tty->print_cr(""); - tty->print_cr(" arrays chunked %8d", _arrays_chunked); - tty->print_cr(" array chunks processed %8d", _array_chunks_processed); - tty->print_cr(""); - tty->print_cr(" total steals %8d", _total_steals); - tty->print_cr(" masked steals %8d", _masked_steals); - tty->print_cr(""); +PSPromotionManager::print_taskqueue_stats(uint i) const { + const TaskQueueStats& stats = depth_first() ? + _claimed_stack_depth.stats : _claimed_stack_breadth.stats; + tty->print("%3u ", i); + stats.print(); + tty->cr(); } void +PSPromotionManager::print_local_stats(uint i) const { + #define FMT " " SIZE_FORMAT_W(10) + tty->print_cr("%3u" FMT FMT FMT FMT, i, _masked_pushes, _masked_steals, + _arrays_chunked, _array_chunks_processed); + #undef FMT +} + +static const char* const pm_stats_hdr[] = { + " --------masked------- arrays array", + "thr push steal chunked chunks", + "--- ---------- ---------- ---------- ----------" +}; + +void PSPromotionManager::print_stats() { - tty->print_cr("== GC Tasks Stats (%s), GC %3d", - (UseDepthFirstScavengeOrder) ? "Depth-First" : "Breadth-First", + const bool df = UseDepthFirstScavengeOrder; + tty->print_cr("== GC Task Stats (%s-First), GC %3d", df ? "Depth" : "Breadth", Universe::heap()->total_collections()); - for (uint i = 0; i < ParallelGCThreads+1; ++i) { - PSPromotionManager* manager = manager_array(i); - manager->print_stats(i); + tty->print("thr "); TaskQueueStats::print_header(1); tty->cr(); + tty->print("--- "); TaskQueueStats::print_header(2); tty->cr(); + for (uint i = 0; i < ParallelGCThreads + 1; ++i) { + manager_array(i)->print_taskqueue_stats(i); + } + + const uint hlines = sizeof(pm_stats_hdr) / sizeof(pm_stats_hdr[0]); + for (uint i = 0; i < hlines; ++i) tty->print_cr(pm_stats_hdr[i]); + for (uint i = 0; i < ParallelGCThreads + 1; ++i) { + manager_array(i)->print_local_stats(i); } } -#endif // PS_PM_STATS +void +PSPromotionManager::reset_stats() { + TaskQueueStats& stats = depth_first() ? + claimed_stack_depth()->stats : claimed_stack_breadth()->stats; + stats.reset(); + _masked_pushes = _masked_steals = 0; + _arrays_chunked = _array_chunks_processed = 0; +} +#endif // TASKQUEUE_STATS PSPromotionManager::PSPromotionManager() { ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); @@ -189,16 +207,7 @@ _prefetch_queue.clear(); -#if PS_PM_STATS - _total_pushes = 0; - _masked_pushes = 0; - _overflow_pushes = 0; - _max_overflow_length = 0; - _arrays_chunked = 0; - _array_chunks_processed = 0; - _total_steals = 0; - _masked_steals = 0; -#endif // PS_PM_STATS + TASKQUEUE_STATS_ONLY(reset_stats()); } @@ -423,14 +432,9 @@ new_obj->is_objArray() && PSChunkLargeArrays) { // we'll chunk it -#if PS_PM_STATS - ++_arrays_chunked; -#endif // PS_PM_STATS oop* const masked_o = mask_chunked_array_oop(o); push_depth(masked_o); -#if PS_PM_STATS - ++_masked_pushes; -#endif // PS_PM_STATS + TASKQUEUE_STATS_ONLY(++_arrays_chunked; ++_masked_pushes); } else { // we'll just push its contents new_obj->push_contents(this); @@ -494,9 +498,7 @@ assert(old->is_objArray(), "invariant"); assert(old->is_forwarded(), "invariant"); -#if PS_PM_STATS - ++_array_chunks_processed; -#endif // PS_PM_STATS + TASKQUEUE_STATS_ONLY(++_array_chunks_processed); oop const obj = old->forwardee(); @@ -508,9 +510,7 @@ assert(start > 0, "invariant"); arrayOop(old)->set_length(start); push_depth(mask_chunked_array_oop(old)); -#if PS_PM_STATS - ++_masked_pushes; -#endif // PS_PM_STATS + TASKQUEUE_STATS_ONLY(++_masked_pushes); } else { // this is the final chunk for this array start = 0; diff -r 131ed9a23d48 -r 4f1fffe08c63 src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp Wed Jul 21 09:57:21 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.hpp Wed Jul 21 12:45:42 2010 -0700 @@ -42,8 +42,6 @@ class PSOldGen; class ParCompactionManager; -#define PS_PM_STATS 0 - class PSPromotionManager : public CHeapObj { friend class PSScavenge; friend class PSRefProcTaskExecutor; @@ -54,22 +52,18 @@ static PSOldGen* _old_gen; static MutableSpace* _young_space; -#if PS_PM_STATS - uint _total_pushes; - uint _masked_pushes; - - uint _overflow_pushes; - uint _max_overflow_length; +#if TASKQUEUE_STATS + size_t _masked_pushes; + size_t _masked_steals; + size_t _arrays_chunked; + size_t _array_chunks_processed; - uint _arrays_chunked; - uint _array_chunks_processed; + void print_taskqueue_stats(uint i) const; + void print_local_stats(uint i) const; + static void print_stats(); - uint _total_steals; - uint _masked_steals; - - void print_stats(uint i); - static void print_stats(); -#endif // PS_PM_STATS + void reset_stats(); +#endif // TASKQUEUE_STATS PSYoungPromotionLAB _young_lab; PSOldPromotionLAB _old_lab; @@ -143,42 +137,12 @@ template void push_depth(T* p) { assert(depth_first(), "pre-condition"); - -#if PS_PM_STATS - ++_total_pushes; - int stack_length = claimed_stack_depth()->overflow_stack()->length(); -#endif // PS_PM_STATS - claimed_stack_depth()->push(p); - -#if PS_PM_STATS - if (claimed_stack_depth()->overflow_stack()->length() != stack_length) { - ++_overflow_pushes; - if ((uint)stack_length + 1 > _max_overflow_length) { - _max_overflow_length = (uint)stack_length + 1; - } - } -#endif // PS_PM_STATS } void push_breadth(oop o) { assert(!depth_first(), "pre-condition"); - -#if PS_PM_STATS - ++_total_pushes; - int stack_length = claimed_stack_breadth()->overflow_stack()->length(); -#endif // PS_PM_STATS - claimed_stack_breadth()->push(o); - -#if PS_PM_STATS - if (claimed_stack_breadth()->overflow_stack()->length() != stack_length) { - ++_overflow_pushes; - if ((uint)stack_length + 1 > _max_overflow_length) { - _max_overflow_length = (uint)stack_length + 1; - } - } -#endif // PS_PM_STATS } protected: @@ -256,12 +220,5 @@ template inline void claim_or_forward_depth(T* p); template inline void claim_or_forward_breadth(T* p); -#if PS_PM_STATS - void increment_steals(oop* p = NULL) { - _total_steals += 1; - if (p != NULL && is_oop_masked(p)) { - _masked_steals += 1; - } - } -#endif // PS_PM_STATS + TASKQUEUE_STATS_ONLY(inline void record_steal(StarTask& p);) }; diff -r 131ed9a23d48 -r 4f1fffe08c63 src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp Wed Jul 21 09:57:21 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp Wed Jul 21 12:45:42 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2008, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -124,3 +124,11 @@ } } } + +#if TASKQUEUE_STATS +void PSPromotionManager::record_steal(StarTask& p) { + if (is_oop_masked(p)) { + ++_masked_steals; + } +} +#endif // TASKQUEUE_STATS diff -r 131ed9a23d48 -r 4f1fffe08c63 src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp Wed Jul 21 09:57:21 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp Wed Jul 21 12:45:42 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2008, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -148,9 +148,7 @@ while(true) { StarTask p; if (PSPromotionManager::steal_depth(which, &random_seed, p)) { -#if PS_PM_STATS - pm->increment_steals(p); -#endif // PS_PM_STATS + TASKQUEUE_STATS_ONLY(pm->record_steal(p)); pm->process_popped_location_depth(p); pm->drain_stacks_depth(true); } else { @@ -163,9 +161,6 @@ while(true) { oop obj; if (PSPromotionManager::steal_breadth(which, &random_seed, obj)) { -#if PS_PM_STATS - pm->increment_steals(); -#endif // PS_PM_STATS obj->copy_contents(pm); pm->drain_stacks_breadth(true); } else { diff -r 131ed9a23d48 -r 4f1fffe08c63 src/share/vm/utilities/globalDefinitions.hpp --- a/src/share/vm/utilities/globalDefinitions.hpp Wed Jul 21 09:57:21 2010 -0700 +++ b/src/share/vm/utilities/globalDefinitions.hpp Wed Jul 21 12:45:42 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -345,6 +345,35 @@ return align_size_up(offset, HeapWordsPerLong); } +// The expected size in bytes of a cache line, used to pad data structures. +#define DEFAULT_CACHE_LINE_SIZE 64 + +// Bytes needed to pad type to avoid cache-line sharing; alignment should be the +// expected cache line size (a power of two). The first addend avoids sharing +// when the start address is not a multiple of alignment; the second maintains +// alignment of starting addresses that happen to be a multiple. +#define PADDING_SIZE(type, alignment) \ + ((alignment) + align_size_up_(sizeof(type), alignment)) + +// Templates to create a subclass padded to avoid cache line sharing. These are +// effective only when applied to derived-most (leaf) classes. + +// When no args are passed to the base ctor. +template +class Padded: public T { +private: + char _pad_buf_[PADDING_SIZE(T, alignment)]; +}; + +// When either 0 or 1 args may be passed to the base ctor. +template +class Padded01: public T { +public: + Padded01(): T() { } + Padded01(Arg1T arg1): T(arg1) { } +private: + char _pad_buf_[PADDING_SIZE(T, alignment)]; +}; //---------------------------------------------------------------------------------------------------- // Utility macros for compilers diff -r 131ed9a23d48 -r 4f1fffe08c63 src/share/vm/utilities/taskqueue.cpp --- a/src/share/vm/utilities/taskqueue.cpp Wed Jul 21 09:57:21 2010 -0700 +++ b/src/share/vm/utilities/taskqueue.cpp Wed Jul 21 12:45:42 2010 -0700 @@ -31,6 +31,48 @@ uint ParallelTaskTerminator::_total_peeks = 0; #endif +#if TASKQUEUE_STATS +const char * const TaskQueueStats::_names[last_stat_id] = { + "qpush", "qpop", "qpop-s", "qattempt", "qsteal", "opush", "omax" +}; + +void TaskQueueStats::print_header(unsigned int line, outputStream* const stream, + unsigned int width) +{ + // Use a width w: 1 <= w <= max_width + const unsigned int max_width = 40; + const unsigned int w = MAX2(MIN2(width, max_width), 1U); + + if (line == 0) { // spaces equal in width to the header + const unsigned int hdr_width = w * last_stat_id + last_stat_id - 1; + stream->print("%*s", hdr_width, " "); + } else if (line == 1) { // labels + stream->print("%*s", w, _names[0]); + for (unsigned int i = 1; i < last_stat_id; ++i) { + stream->print(" %*s", w, _names[i]); + } + } else if (line == 2) { // dashed lines + char dashes[max_width + 1]; + memset(dashes, '-', w); + dashes[w] = '\0'; + stream->print("%s", dashes); + for (unsigned int i = 1; i < last_stat_id; ++i) { + stream->print(" %s", dashes); + } + } +} + +void TaskQueueStats::print(outputStream* stream, unsigned int width) const +{ + #define FMT SIZE_FORMAT_W(*) + stream->print(FMT, width, _stats[0]); + for (unsigned int i = 1; i < last_stat_id; ++i) { + stream->print(" " FMT, width, _stats[i]); + } + #undef FMT +} +#endif // TASKQUEUE_STATS + int TaskQueueSetSuper::randomParkAndMiller(int *seed0) { const int a = 16807; const int m = 2147483647; diff -r 131ed9a23d48 -r 4f1fffe08c63 src/share/vm/utilities/taskqueue.hpp --- a/src/share/vm/utilities/taskqueue.hpp Wed Jul 21 09:57:21 2010 -0700 +++ b/src/share/vm/utilities/taskqueue.hpp Wed Jul 21 12:45:42 2010 -0700 @@ -22,6 +22,72 @@ * */ +// Simple TaskQueue stats that are collected by default in debug builds. + +#if !defined(TASKQUEUE_STATS) && defined(ASSERT) +#define TASKQUEUE_STATS 1 +#elif !defined(TASKQUEUE_STATS) +#define TASKQUEUE_STATS 0 +#endif + +#if TASKQUEUE_STATS +#define TASKQUEUE_STATS_ONLY(code) code +#else +#define TASKQUEUE_STATS_ONLY(code) +#endif // TASKQUEUE_STATS + +#if TASKQUEUE_STATS +class TaskQueueStats { +public: + enum StatId { + push, // number of taskqueue pushes + pop, // number of taskqueue pops + pop_slow, // subset of taskqueue pops that were done slow-path + steal_attempt, // number of taskqueue steal attempts + steal, // number of taskqueue steals + overflow, // number of overflow pushes + overflow_max_len, // max length of overflow stack + last_stat_id + }; + +public: + inline TaskQueueStats() { reset(); } + + inline void record_push() { ++_stats[push]; } + inline void record_pop() { ++_stats[pop]; } + inline void record_pop_slow() { record_pop(); ++_stats[pop_slow]; } + inline void record_steal(bool success); + inline void record_overflow(size_t new_length); + + inline size_t get(StatId id) const { return _stats[id]; } + inline const size_t* get() const { return _stats; } + + inline void reset(); + + static void print_header(unsigned int line, outputStream* const stream = tty, + unsigned int width = 10); + void print(outputStream* const stream = tty, unsigned int width = 10) const; + +private: + size_t _stats[last_stat_id]; + static const char * const _names[last_stat_id]; +}; + +void TaskQueueStats::record_steal(bool success) { + ++_stats[steal_attempt]; + if (success) ++_stats[steal]; +} + +void TaskQueueStats::record_overflow(size_t new_len) { + ++_stats[overflow]; + if (new_len > _stats[overflow_max_len]) _stats[overflow_max_len] = new_len; +} + +void TaskQueueStats::reset() { + memset(_stats, 0, sizeof(_stats)); +} +#endif // TASKQUEUE_STATS + template class TaskQueueSuper: public CHeapObj { protected: @@ -135,6 +201,8 @@ // Total size of queue. static const uint total_size() { return N; } + + TASKQUEUE_STATS_ONLY(TaskQueueStats stats;) }; template @@ -152,6 +220,7 @@ public: using TaskQueueSuper::max_elems; using TaskQueueSuper::size; + TASKQUEUE_STATS_ONLY(using TaskQueueSuper::stats;) private: // Slow paths for push, pop_local. (pop_global has no fast path.) @@ -224,14 +293,14 @@ // g++ complains if the volatile result of the assignment is unused. const_cast(_elems[localBot] = t); OrderAccess::release_store(&_bottom, increment_index(localBot)); + TASKQUEUE_STATS_ONLY(stats.record_push()); return true; } return false; } template -bool GenericTaskQueue:: -pop_local_slow(uint localBot, Age oldAge) { +bool GenericTaskQueue::pop_local_slow(uint localBot, Age oldAge) { // This queue was observed to contain exactly one element; either this // thread will claim it, or a competing "pop_global". In either case, // the queue will be logically empty afterwards. Create a new Age value @@ -251,6 +320,7 @@ if (tempAge == oldAge) { // We win. assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); + TASKQUEUE_STATS_ONLY(stats.record_pop_slow()); return true; } } @@ -306,6 +376,8 @@ typedef GrowableArray overflow_t; typedef GenericTaskQueue taskqueue_t; + TASKQUEUE_STATS_ONLY(using taskqueue_t::stats;) + OverflowTaskQueue(); ~OverflowTaskQueue(); void initialize(); @@ -356,6 +428,7 @@ { if (!taskqueue_t::push(t)) { overflow_stack()->push(t); + TASKQUEUE_STATS_ONLY(stats.record_overflow(overflow_stack()->length())); } return true; } @@ -424,9 +497,13 @@ template bool GenericTaskQueueSet::steal(uint queue_num, int* seed, E& t) { - for (uint i = 0; i < 2 * _n; i++) - if (steal_best_of_2(queue_num, seed, t)) + for (uint i = 0; i < 2 * _n; i++) { + if (steal_best_of_2(queue_num, seed, t)) { + TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(true)); return true; + } + } + TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(false)); return false; } @@ -574,6 +651,7 @@ // g++ complains if the volatile result of the assignment is unused. const_cast(_elems[localBot] = t); OrderAccess::release_store(&_bottom, increment_index(localBot)); + TASKQUEUE_STATS_ONLY(stats.record_push()); return true; } else { return push_slow(t, dirty_n_elems); @@ -603,6 +681,7 @@ idx_t tp = _age.top(); // XXX if (size(localBot, tp) > 0) { assert(dirty_size(localBot, tp) != N - 1, "sanity"); + TASKQUEUE_STATS_ONLY(stats.record_pop()); return true; } else { // Otherwise, the queue contained exactly one element; we take the slow