Mercurial > hg > graal-jvmci-8

--- a/.hgtags	Wed Mar 25 11:31:54 2015 -0700
+++ b/.hgtags	Wed Apr 01 11:31:42 2015 -0700
@@ -626,3 +626,5 @@
 353e580ce6878d80c7b7cd27f8ad24609b12c58b jdk8u60-b07
 a72a4192a36d6d84766d6135fe6515346c742007 hs25.60-b08
 bf68e15dc8fe73eeb1eb3c656df51fdb1f707a97 jdk8u60-b08
+d937e6a0674841d670232ecf1611f52e1ae998e7 hs25.60-b09
+f1058b5c6294235d8ad032dcc72c8f8bc202cb5a jdk8u60-b09
--- a/make/aix/makefiles/adlc.make	Wed Mar 25 11:31:54 2015 -0700
+++ b/make/aix/makefiles/adlc.make	Wed Apr 01 11:31:42 2015 -0700
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -140,13 +140,7 @@
 # Note "+="; it is a hook so flags.make can add more flags, like -g or -DFOO.
 ADLCFLAGS += -q -T

-# Normally, debugging is done directly on the ad_<arch>*.cpp files.
-# But -g will put #line directives in those files pointing back to <arch>.ad.
-# Some builds of gcc 3.2 have a bug that gets tickled by the extra #line directives
-# so skip it for 3.2 and ealier.
-ifneq "$(shell expr \( $(CC_VER_MAJOR) \> 3 \) \| \( \( $(CC_VER_MAJOR) = 3 \) \& \( $(CC_VER_MINOR) \>= 3 \) \))" "0"
 ADLCFLAGS += -g
-endif

 ifdef LP64
 ADLCFLAGS += -D_LP64
--- a/make/aix/makefiles/ppc64.make	Wed Mar 25 11:31:54 2015 -0700
+++ b/make/aix/makefiles/ppc64.make	Wed Apr 01 11:31:42 2015 -0700
@@ -1,6 +1,6 @@
 #
-# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2012, 2013 SAP AG. All rights reserved.
+# Copyright (c) 2004, 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2012, 2015 SAP AG. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -46,7 +46,9 @@
 #  - 1540-1090 (I) The destructor of "..." might not be called.
 #  - 1500-010: (W) WARNING in ...: Infinite loop.  Program may not stop.
 #    There are several infinite loops in the vm, suppress.
-CFLAGS += -qsuppress=1540-1090 -qsuppress=1500-010
+#  - 1540-1639 (I) The behavior of long type bit fields has changed ...
+#                  ... long type bit fields now default to long, not int.
+CFLAGS += -qsuppress=1540-1090 -qsuppress=1500-010 -qsuppress=1540-1639

 # Suppress
 #  - 540-1088 (W) The exception specification is being ignored.
@@ -69,9 +71,6 @@
 OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT)
 OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/NOOPT)

-# xlc 10.01 parameters for ipa compile.
-QIPA_COMPILE=$(if $(CXX_IS_V10),-qipa)
-
 # Xlc 10.1 parameters for aggressive optimization:
 # - qhot=level=1: Most aggressive loop optimizations.
 # - qignerrno: Assume errno is not modified by system calls.
@@ -86,7 +85,7 @@
 OPT_CFLAGS/synchronizer.o = $(OPT_CFLAGS) -qnoinline

 # Set all the xlC V10.1 options here.
-OPT_CFLAGS += $(QIPA_COMPILE) $(QV10_OPT) $(QV10_OPT_AGGRESSIVE)
+OPT_CFLAGS += $(QV10_OPT) $(QV10_OPT_AGGRESSIVE)

 export OBJECT_MODE=64
--- a/make/aix/makefiles/xlc.make	Wed Mar 25 11:31:54 2015 -0700
+++ b/make/aix/makefiles/xlc.make	Wed Apr 01 11:31:42 2015 -0700
@@ -1,6 +1,6 @@
 #
-# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2012, 2013 SAP. All rights reserved.
+# Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2012, 2015 SAP. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -34,13 +34,17 @@

 AS  = $(CC) -c

-# get xlc version
-CXX_VERSION   := $(shell $(CXX) -qversion 2>&1 | sed -n 's/.*Version: \([0-9.]*\)/\1/p')
+# get xlc version which comes as VV.RR.MMMM.LLLL where 'VV' is the version,
+# 'RR' is the release, 'MMMM' is the modification and 'LLLL' is the level.
+# We only use 'VV.RR.LLLL' to avoid integer overflows in bash when comparing
+# the version numbers (some shells only support 32-bit integer compares!).
+CXX_VERSION := $(shell $(CXX) -qversion 2>&1 | \
+                   sed -n 's/.*Version: \([0-9]\{2\}\).\([0-9]\{2\}\).[0-9]\{4\}.\([0-9]\{4\}\)/\1\2\3/p')

 # xlc 08.00.0000.0023 and higher supports -qtune=balanced
-CXX_SUPPORTS_BALANCED_TUNING=$(shell if [ $(subst .,,$(CXX_VERSION)) -ge 080000000023 ] ; then echo "true" ; fi)
+CXX_SUPPORTS_BALANCED_TUNING := $(shell if [ $(CXX_VERSION) -ge 08000023 ] ; then echo "true" ; fi)
 # xlc 10.01 is used with aggressive optimizations to boost performance
-CXX_IS_V10=$(shell if [ $(subst .,,$(CXX_VERSION)) -ge 100100000000 ] ; then echo "true" ; fi)
+CXX_IS_V10 := $(shell if [ $(CXX_VERSION) -ge 10010000 ] ; then echo "true" ; fi)

 # check for precompiled headers support

@@ -130,7 +134,7 @@
 # MAPFLAG = -Xlinker --version-script=FILENAME

 # Build shared library
-SHARED_FLAG = -q64 -b64 -bexpall -G -bnoentry -qmkshrobj -brtl -bnolibpath
+SHARED_FLAG = -q64 -b64 -bexpall -G -bnoentry -qmkshrobj -brtl -bnolibpath -bernotok

 #------------------------------------------------------------------------
 # Debug flags
--- a/make/hotspot_version	Wed Mar 25 11:31:54 2015 -0700
+++ b/make/hotspot_version	Wed Apr 01 11:31:42 2015 -0700
@@ -35,7 +35,7 @@

 HS_MAJOR_VER=25
 HS_MINOR_VER=60
-HS_BUILD_NUMBER=08
+HS_BUILD_NUMBER=09

 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/src/share/vm/classfile/verifier.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/classfile/verifier.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -655,6 +655,7 @@


     bool this_uninit = false;  // Set to true when invokespecial <init> initialized 'this'
+    bool verified_exc_handlers = false;

     // Merge with the next instruction
     {
@@ -686,6 +687,18 @@
         }
       }

+      // Look for possible jump target in exception handlers and see if it
+      // matches current_frame.  Do this check here for astore*, dstore*,
+      // fstore*, istore*, and lstore* opcodes because they can change the type
+      // state by adding a local.  JVM Spec says that the incoming type state
+      // should be used for this check.  So, do the check here before a possible
+      // local is added to the type state.
+      if (Bytecodes::is_store_into_local(opcode) && bci >= ex_min && bci < ex_max) {
+        verify_exception_handler_targets(
+          bci, this_uninit, &current_frame, &stackmap_table, CHECK_VERIFY(this));
+        verified_exc_handlers = true;
+      }
+
       switch (opcode) {
         case Bytecodes::_nop :
           no_control_flow = false; break;
@@ -1662,9 +1675,13 @@
       }  // end switch
     }  // end Merge with the next instruction

-    // Look for possible jump target in exception handlers and see if it
-    // matches current_frame
-    if (bci >= ex_min && bci < ex_max) {
+    // Look for possible jump target in exception handlers and see if it matches
+    // current_frame.  Don't do this check if it has already been done (for
+    // ([a,d,f,i,l]store* opcodes).  This check cannot be done earlier because
+    // opcodes, such as invokespecial, may set the this_uninit flag.
+    assert(!(verified_exc_handlers && this_uninit),
+      "Exception handler targets got verified before this_uninit got set");
+    if (!verified_exc_handlers && bci >= ex_min && bci < ex_max) {
       verify_exception_handler_targets(
         bci, this_uninit, &current_frame, &stackmap_table, CHECK_VERIFY(this));
     }
@@ -2232,14 +2249,20 @@
 }

 // Look at the method's handlers.  If the bci is in the handler's try block
-// then check if the handler_pc is already on the stack.  If not, push it.
+// then check if the handler_pc is already on the stack.  If not, push it
+// unless the handler has already been scanned.
 void ClassVerifier::push_handlers(ExceptionTable* exhandlers,
+                                  GrowableArray<u4>* handler_list,
                                   GrowableArray<u4>* handler_stack,
                                   u4 bci) {
   int exlength = exhandlers->length();
   for(int x = 0; x < exlength; x++) {
     if (bci >= exhandlers->start_pc(x) && bci < exhandlers->end_pc(x)) {
-      handler_stack->append_if_missing(exhandlers->handler_pc(x));
+      u4 exhandler_pc = exhandlers->handler_pc(x);
+      if (!handler_list->contains(exhandler_pc)) {
+        handler_stack->append_if_missing(exhandler_pc);
+        handler_list->append(exhandler_pc);
+      }
     }
   }
 }
@@ -2257,6 +2280,10 @@
   GrowableArray<u4>* bci_stack = new GrowableArray<u4>(30);
   // Create stack for handlers for try blocks containing this handler.
   GrowableArray<u4>* handler_stack = new GrowableArray<u4>(30);
+  // Create list of handlers that have been pushed onto the handler_stack
+  // so that handlers embedded inside of their own TRY blocks only get
+  // scanned once.
+  GrowableArray<u4>* handler_list = new GrowableArray<u4>(30);
   // Create list of visited branch opcodes (goto* and if*).
   GrowableArray<u4>* visited_branches = new GrowableArray<u4>(30);
   ExceptionTable exhandlers(_method());
@@ -2275,7 +2302,7 @@

     // If the bytecode is in a TRY block, push its handlers so they
     // will get parsed.
-    push_handlers(&exhandlers, handler_stack, bci);
+    push_handlers(&exhandlers, handler_list, handler_stack, bci);

     switch (opcode) {
       case Bytecodes::_if_icmpeq:
--- a/src/share/vm/classfile/verifier.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/classfile/verifier.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -305,9 +305,10 @@
     bool* this_uninit, constantPoolHandle cp, StackMapTable* stackmap_table,
     TRAPS);

-  // Used by ends_in_athrow() to push all handlers that contain bci onto
-  // the handler_stack, if the handler is not already on the stack.
+  // Used by ends_in_athrow() to push all handlers that contain bci onto the
+  // handler_stack, if the handler has not already been pushed on the stack.
   void push_handlers(ExceptionTable* exhandlers,
+                     GrowableArray<u4>* handler_list,
                      GrowableArray<u4>* handler_stack,
                      u4 bci);
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -598,7 +598,7 @@
   _collector_policy(cp),
   _should_unload_classes(CMSClassUnloadingEnabled),
   _concurrent_cycles_since_last_unload(0),
-  _roots_scanning_options(SharedHeap::SO_None),
+  _roots_scanning_options(GenCollectedHeap::SO_None),
   _inter_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding),
   _intra_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding),
   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) CMSTracer()),
@@ -3068,7 +3068,7 @@
   gch->gen_process_roots(_cmsGen->level(),
                          true,   // younger gens are roots
                          true,   // activate StrongRootsScope
-                         SharedHeap::ScanningOption(roots_scanning_options()),
+                         GenCollectedHeap::ScanningOption(roots_scanning_options()),
                          should_unload_classes(),
                          &notOlder,
                          NULL,
@@ -3136,7 +3136,7 @@
   gch->gen_process_roots(_cmsGen->level(),
                          true,   // younger gens are roots
                          true,   // activate StrongRootsScope
-                         SharedHeap::ScanningOption(roots_scanning_options()),
+                         GenCollectedHeap::ScanningOption(roots_scanning_options()),
                          should_unload_classes(),
                          &notOlder,
                          NULL,
@@ -3327,7 +3327,7 @@
 void CMSCollector::setup_cms_unloading_and_verification_state() {
   const  bool should_verify =   VerifyBeforeGC || VerifyAfterGC || VerifyDuringGC
                              || VerifyBeforeExit;
-  const  int  rso           =   SharedHeap::SO_AllCodeCache;
+  const  int  rso           =   GenCollectedHeap::SO_AllCodeCache;

   // We set the proper root for this CMS cycle here.
   if (should_unload_classes()) {   // Should unload classes this cycle
@@ -3753,7 +3753,7 @@
       gch->gen_process_roots(_cmsGen->level(),
                              true,   // younger gens are roots
                              true,   // activate StrongRootsScope
-                             SharedHeap::ScanningOption(roots_scanning_options()),
+                             GenCollectedHeap::ScanningOption(roots_scanning_options()),
                              should_unload_classes(),
                              &notOlder,
                              NULL,
@@ -5254,13 +5254,13 @@
   gch->gen_process_roots(_collector->_cmsGen->level(),
                          false,     // yg was scanned above
                          false,     // this is parallel code
-                         SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
+                         GenCollectedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
                          _collector->should_unload_classes(),
                          &par_mri_cl,
                          NULL,
                          &cld_closure);
   assert(_collector->should_unload_classes()
-         || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_AllCodeCache),
+         || (_collector->CMSCollector::roots_scanning_options() & GenCollectedHeap::SO_AllCodeCache),
          "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
   _timer.stop();
   if (PrintCMSStatistics != 0) {
@@ -5390,14 +5390,14 @@
   gch->gen_process_roots(_collector->_cmsGen->level(),
                          false,     // yg was scanned above
                          false,     // this is parallel code
-                         SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
+                         GenCollectedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
                          _collector->should_unload_classes(),
                          &par_mrias_cl,
                          NULL,
                          NULL);     // The dirty klasses will be handled below

   assert(_collector->should_unload_classes()
-         || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_AllCodeCache),
+         || (_collector->CMSCollector::roots_scanning_options() & GenCollectedHeap::SO_AllCodeCache),
          "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
   _timer.stop();
   if (PrintCMSStatistics != 0) {
@@ -5982,14 +5982,14 @@
     gch->gen_process_roots(_cmsGen->level(),
                            true,  // younger gens as roots
                            false, // use the local StrongRootsScope
-                           SharedHeap::ScanningOption(roots_scanning_options()),
+                           GenCollectedHeap::ScanningOption(roots_scanning_options()),
                            should_unload_classes(),
                            &mrias_cl,
                            NULL,
                            NULL); // The dirty klasses will be handled below

     assert(should_unload_classes()
-           || (roots_scanning_options() & SharedHeap::SO_AllCodeCache),
+           || (roots_scanning_options() & GenCollectedHeap::SO_AllCodeCache),
            "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
   }
--- a/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -254,25 +254,23 @@
 HeapRegion* SurvivorGCAllocRegion::allocate_new_region(size_t word_size,
                                                        bool force) {
   assert(!force, "not supported for GC alloc regions");
-  return _g1h->new_gc_alloc_region(word_size, count(), GCAllocForSurvived);
+  return _g1h->new_gc_alloc_region(word_size, count(), InCSetState::Young);
 }

 void SurvivorGCAllocRegion::retire_region(HeapRegion* alloc_region,
                                           size_t allocated_bytes) {
-  _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes,
-                               GCAllocForSurvived);
+  _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes, InCSetState::Young);
 }

 HeapRegion* OldGCAllocRegion::allocate_new_region(size_t word_size,
                                                   bool force) {
   assert(!force, "not supported for GC alloc regions");
-  return _g1h->new_gc_alloc_region(word_size, count(), GCAllocForTenured);
+  return _g1h->new_gc_alloc_region(word_size, count(), InCSetState::Old);
 }

 void OldGCAllocRegion::retire_region(HeapRegion* alloc_region,
                                      size_t allocated_bytes) {
-  _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes,
-                               GCAllocForTenured);
+  _g1h->retire_gc_alloc_region(alloc_region, allocated_bytes, InCSetState::Old);
 }

 HeapRegion* OldGCAllocRegion::release() {
--- a/src/share/vm/gc_implementation/g1/g1Allocator.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1Allocator.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -59,7 +59,7 @@
       !(retained_region->top() == retained_region->end()) &&
       !retained_region->is_empty() &&
       !retained_region->isHumongous()) {
-    retained_region->record_top_and_timestamp();
+    retained_region->record_timestamp();
     // The retained region was added to the old region set when it was
     // retired. We have to remove it now, since we don't allow regions
     // we allocate to in the region sets. We'll re-add it later, when
@@ -94,6 +94,9 @@
   // want either way so no reason to check explicitly for either
   // condition.
   _retained_old_gc_alloc_region = old_gc_alloc_region(context)->release();
+  if (_retained_old_gc_alloc_region != NULL) {
+    _retained_old_gc_alloc_region->record_retained_region();
+  }

   if (ResizePLAB) {
     _g1h->_survivor_plab_stats.adjust_desired_plab_sz(no_of_gc_workers);
@@ -110,15 +113,16 @@
 G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size) :
   ParGCAllocBuffer(gclab_word_size), _retired(true) { }

-HeapWord* G1ParGCAllocator::allocate_slow(GCAllocPurpose purpose, size_t word_sz, AllocationContext_t context) {
-  HeapWord* obj = NULL;
-  size_t gclab_word_size = _g1h->desired_plab_sz(purpose);
+HeapWord* G1ParGCAllocator::allocate_direct_or_new_plab(InCSetState dest,
+                                                        size_t word_sz,
+                                                        AllocationContext_t context) {
+  size_t gclab_word_size = _g1h->desired_plab_sz(dest);
   if (word_sz * 100 < gclab_word_size * ParallelGCBufferWastePct) {
-    G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose, context);
+    G1ParGCAllocBuffer* alloc_buf = alloc_buffer(dest, context);
     add_to_alloc_buffer_waste(alloc_buf->words_remaining());
     alloc_buf->retire(false /* end_of_gc */, false /* retain */);

-    HeapWord* buf = _g1h->par_allocate_during_gc(purpose, gclab_word_size, context);
+    HeapWord* buf = _g1h->par_allocate_during_gc(dest, gclab_word_size, context);
     if (buf == NULL) {
       return NULL; // Let caller handle allocation failure.
     }
@@ -126,30 +130,33 @@
     alloc_buf->set_word_size(gclab_word_size);
     alloc_buf->set_buf(buf);

-    obj = alloc_buf->allocate(word_sz);
+    HeapWord* const obj = alloc_buf->allocate(word_sz);
     assert(obj != NULL, "buffer was definitely big enough...");
+    return obj;
   } else {
-    obj = _g1h->par_allocate_during_gc(purpose, word_sz, context);
+    return _g1h->par_allocate_during_gc(dest, word_sz, context);
   }
-  return obj;
 }

 G1DefaultParGCAllocator::G1DefaultParGCAllocator(G1CollectedHeap* g1h) :
-            G1ParGCAllocator(g1h),
-            _surviving_alloc_buffer(g1h->desired_plab_sz(GCAllocForSurvived)),
-            _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured)) {
-
-  _alloc_buffers[GCAllocForSurvived] = &_surviving_alloc_buffer;
-  _alloc_buffers[GCAllocForTenured]  = &_tenured_alloc_buffer;
-
+  G1ParGCAllocator(g1h),
+  _surviving_alloc_buffer(g1h->desired_plab_sz(InCSetState::Young)),
+  _tenured_alloc_buffer(g1h->desired_plab_sz(InCSetState::Old)) {
+  for (uint state = 0; state < InCSetState::Num; state++) {
+    _alloc_buffers[state] = NULL;
+  }
+  _alloc_buffers[InCSetState::Young] = &_surviving_alloc_buffer;
+  _alloc_buffers[InCSetState::Old]  = &_tenured_alloc_buffer;
 }

 void G1DefaultParGCAllocator::retire_alloc_buffers() {
-  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
-    size_t waste = _alloc_buffers[ap]->words_remaining();
-    add_to_alloc_buffer_waste(waste);
-    _alloc_buffers[ap]->flush_stats_and_retire(_g1h->stats_for_purpose((GCAllocPurpose)ap),
-                                               true /* end_of_gc */,
-                                               false /* retain */);
+  for (uint state = 0; state < InCSetState::Num; state++) {
+    G1ParGCAllocBuffer* const buf = _alloc_buffers[state];
+    if (buf != NULL) {
+      add_to_alloc_buffer_waste(buf->words_remaining());
+      buf->flush_stats_and_retire(_g1h->alloc_buffer_stats(state),
+                                  true /* end_of_gc */,
+                                  false /* retain */);
+    }
   }
 }
--- a/src/share/vm/gc_implementation/g1/g1Allocator.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1Allocator.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -27,14 +27,9 @@

 #include "gc_implementation/g1/g1AllocationContext.hpp"
 #include "gc_implementation/g1/g1AllocRegion.hpp"
+#include "gc_implementation/g1/g1InCSetState.hpp"
 #include "gc_implementation/shared/parGCAllocBuffer.hpp"

-enum GCAllocPurpose {
-  GCAllocForTenured,
-  GCAllocForSurvived,
-  GCAllocPurposeCount
-};
-
 // Base class for G1 allocators.
 class G1Allocator : public CHeapObj<mtGC> {
   friend class VMStructs;
@@ -178,20 +173,40 @@
 protected:
   G1CollectedHeap* _g1h;

+  // The survivor alignment in effect in bytes.
+  // == 0 : don't align survivors
+  // != 0 : align survivors to that alignment
+  // These values were chosen to favor the non-alignment case since some
+  // architectures have a special compare against zero instructions.
+  const uint _survivor_alignment_bytes;
+
   size_t _alloc_buffer_waste;
   size_t _undo_waste;

   void add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; }
   void add_to_undo_waste(size_t waste)         { _undo_waste += waste; }

-  HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz, AllocationContext_t context);
+  virtual void retire_alloc_buffers() = 0;
+  virtual G1ParGCAllocBuffer* alloc_buffer(InCSetState dest, AllocationContext_t context) = 0;

-  virtual void retire_alloc_buffers() = 0;
-  virtual G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose, AllocationContext_t context) = 0;
+  // Calculate the survivor space object alignment in bytes. Returns that or 0 if
+  // there are no restrictions on survivor alignment.
+  static uint calc_survivor_alignment_bytes() {
+    assert(SurvivorAlignmentInBytes >= ObjectAlignmentInBytes, "sanity");
+    if (SurvivorAlignmentInBytes == ObjectAlignmentInBytes) {
+      // No need to align objects in the survivors differently, return 0
+      // which means "survivor alignment is not used".
+      return 0;
+    } else {
+      assert(SurvivorAlignmentInBytes > 0, "sanity");
+      return SurvivorAlignmentInBytes;
+    }
+  }

 public:
   G1ParGCAllocator(G1CollectedHeap* g1h) :
-    _g1h(g1h), _alloc_buffer_waste(0), _undo_waste(0) {
+    _g1h(g1h), _survivor_alignment_bytes(calc_survivor_alignment_bytes()),
+    _alloc_buffer_waste(0), _undo_waste(0) {
   }

   static G1ParGCAllocator* create_allocator(G1CollectedHeap* g1h);
@@ -199,24 +214,40 @@
   size_t alloc_buffer_waste() { return _alloc_buffer_waste; }
   size_t undo_waste() {return _undo_waste; }

-  HeapWord* allocate(GCAllocPurpose purpose, size_t word_sz, AllocationContext_t context) {
-    HeapWord* obj = NULL;
-    if (purpose == GCAllocForSurvived) {
-      obj = alloc_buffer(purpose, context)->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
+  // Allocate word_sz words in dest, either directly into the regions or by
+  // allocating a new PLAB. Returns the address of the allocated memory, NULL if
+  // not successful.
+  HeapWord* allocate_direct_or_new_plab(InCSetState dest,
+                                        size_t word_sz,
+                                        AllocationContext_t context);
+
+  // Allocate word_sz words in the PLAB of dest.  Returns the address of the
+  // allocated memory, NULL if not successful.
+  HeapWord* plab_allocate(InCSetState dest,
+                          size_t word_sz,
+                          AllocationContext_t context) {
+    G1ParGCAllocBuffer* buffer = alloc_buffer(dest, context);
+    if (_survivor_alignment_bytes == 0) {
+      return buffer->allocate(word_sz);
     } else {
-      obj = alloc_buffer(purpose, context)->allocate(word_sz);
+      return buffer->allocate_aligned(word_sz, _survivor_alignment_bytes);
     }
+  }
+
+  HeapWord* allocate(InCSetState dest, size_t word_sz,
+                     AllocationContext_t context) {
+    HeapWord* const obj = plab_allocate(dest, word_sz, context);
     if (obj != NULL) {
       return obj;
     }
-    return allocate_slow(purpose, word_sz, context);
+    return allocate_direct_or_new_plab(dest, word_sz, context);
   }

-  void undo_allocation(GCAllocPurpose purpose, HeapWord* obj, size_t word_sz, AllocationContext_t context) {
-    if (alloc_buffer(purpose, context)->contains(obj)) {
-      assert(alloc_buffer(purpose, context)->contains(obj + word_sz - 1),
+  void undo_allocation(InCSetState dest, HeapWord* obj, size_t word_sz, AllocationContext_t context) {
+    if (alloc_buffer(dest, context)->contains(obj)) {
+      assert(alloc_buffer(dest, context)->contains(obj + word_sz - 1),
              "should contain whole object");
-      alloc_buffer(purpose, context)->undo_allocation(obj, word_sz);
+      alloc_buffer(dest, context)->undo_allocation(obj, word_sz);
     } else {
       CollectedHeap::fill_with_object(obj, word_sz);
       add_to_undo_waste(word_sz);
@@ -227,13 +258,17 @@
 class G1DefaultParGCAllocator : public G1ParGCAllocator {
   G1ParGCAllocBuffer  _surviving_alloc_buffer;
   G1ParGCAllocBuffer  _tenured_alloc_buffer;
-  G1ParGCAllocBuffer* _alloc_buffers[GCAllocPurposeCount];
+  G1ParGCAllocBuffer* _alloc_buffers[InCSetState::Num];

 public:
   G1DefaultParGCAllocator(G1CollectedHeap* g1h);

-  virtual G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose, AllocationContext_t context) {
-    return _alloc_buffers[purpose];
+  virtual G1ParGCAllocBuffer* alloc_buffer(InCSetState dest, AllocationContext_t context) {
+    assert(dest.is_valid(),
+           err_msg("Allocation buffer index out-of-bounds: " CSETSTATE_FORMAT, dest.value()));
+    assert(_alloc_buffers[dest.value()] != NULL,
+           err_msg("Allocation buffer is NULL: " CSETSTATE_FORMAT, dest.value()));
+    return _alloc_buffers[dest.value()];
   }

   virtual void retire_alloc_buffers() ;
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -46,6 +46,7 @@
 #include "gc_implementation/g1/g1ParScanThreadState.inline.hpp"
 #include "gc_implementation/g1/g1RegionToSpaceMapper.hpp"
 #include "gc_implementation/g1/g1RemSet.inline.hpp"
+#include "gc_implementation/g1/g1RootProcessor.hpp"
 #include "gc_implementation/g1/g1StringDedup.hpp"
 #include "gc_implementation/g1/g1YCTypes.hpp"
 #include "gc_implementation/g1/heapRegion.inline.hpp"
@@ -85,18 +86,6 @@
 // apply to TLAB allocation, which is not part of this interface: it
 // is done by clients of this interface.)

-// Notes on implementation of parallelism in different tasks.
-//
-// G1ParVerifyTask uses heap_region_par_iterate_chunked() for parallelism.
-// The number of GC workers is passed to heap_region_par_iterate_chunked().
-// It does use run_task() which sets _n_workers in the task.
-// G1ParTask executes g1_process_roots() ->
-// SharedHeap::process_roots() which calls eventually to
-// CardTableModRefBS::par_non_clean_card_iterate_work() which uses
-// SequentialSubTasksDone.  SharedHeap::process_roots() also
-// directly uses SubTasksDone (_process_strong_tasks field in SharedHeap).
-//
-
 // Local to this file.

 class RefineCardTableEntryClosure: public CardTableEntryClosure {
@@ -1854,7 +1843,6 @@
   _is_alive_closure_stw(this),
   _ref_processor_cm(NULL),
   _ref_processor_stw(NULL),
-  _process_strong_tasks(new SubTasksDone(G1H_PS_NumElements)),
   _bot_shared(NULL),
   _evac_failure_scan_stack(NULL),
   _mark_in_progress(false),
@@ -1877,6 +1865,7 @@
   _old_marking_cycles_started(0),
   _old_marking_cycles_completed(0),
   _concurrent_cycle_started(false),
+  _heap_summary_sent(false),
   _in_cset_fast_test(),
   _dirty_cards_region_list(NULL),
   _worker_cset_start_region(NULL),
@@ -1887,9 +1876,6 @@
   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()) {

   _g1h = this;
-  if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) {
-    vm_exit_during_initialization("Failed necessary allocation.");
-  }

   _allocator = G1Allocator::create_allocator(_g1h);
   _humongous_object_threshold_in_words = HeapRegion::GrainWords / 2;
@@ -2298,11 +2284,11 @@
   hot_card_cache->drain(worker_i, g1_rem_set(), into_cset_dcq);

   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
-  int n_completed_buffers = 0;
+  size_t n_completed_buffers = 0;
   while (dcqs.apply_closure_to_completed_buffer(cl, worker_i, 0, true)) {
     n_completed_buffers++;
   }
-  g1_policy()->phase_times()->record_update_rs_processed_buffers(worker_i, n_completed_buffers);
+  g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, n_completed_buffers);
   dcqs.clear_n_completed_buffers();
   assert(!dcqs.completed_buffers_exist_dirty(), "Completed buffers exist!");
 }
@@ -2447,13 +2433,24 @@
     _gc_timer_cm->register_gc_end();
     _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());

+    // Clear state variables to prepare for the next concurrent cycle.
     _concurrent_cycle_started = false;
+    _heap_summary_sent = false;
   }
 }

 void G1CollectedHeap::trace_heap_after_concurrent_cycle() {
   if (_concurrent_cycle_started) {
-    trace_heap_after_gc(_gc_tracer_cm);
+    // This function can be called when:
+    //  the cleanup pause is run
+    //  the concurrent cycle is aborted before the cleanup pause.
+    //  the concurrent cycle is aborted after the cleanup pause,
+    //   but before the concurrent cycle end has been registered.
+    // Make sure that we only send the heap information once.
+    if (!_heap_summary_sent) {
+      trace_heap_after_gc(_gc_tracer_cm);
+      _heap_summary_sent = true;
+    }
   }
 }

@@ -3291,11 +3288,12 @@
     G1VerifyCodeRootOopClosure codeRootsCl(this, &rootsCl, vo);
     G1VerifyCodeRootBlobClosure blobsCl(&codeRootsCl);

-    process_all_roots(true,            // activate StrongRootsScope
-                      SO_AllCodeCache, // roots scanning options
-                      &rootsCl,
-                      &cldCl,
-                      &blobsCl);
+    {
+      G1RootProcessor root_processor(this);
+      root_processor.process_all_roots(&rootsCl,
+                                       &cldCl,
+                                       &blobsCl);
+    }

     bool failures = rootsCl.failures() || codeRootsCl.failures();

@@ -3720,7 +3718,7 @@
                                                                   cl.candidate_humongous());
   _has_humongous_reclaim_candidates = cl.candidate_humongous() > 0;

-  if (_has_humongous_reclaim_candidates) {
+  if (_has_humongous_reclaim_candidates || G1TraceReclaimDeadHumongousObjectsAtYoungGC) {
     clear_humongous_is_live_table();
   }
 }
@@ -3900,10 +3898,10 @@

     TraceCPUTime tcpu(G1Log::finer(), true, gclog_or_tty);

-    int active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+    uint active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
                                 workers()->active_workers() : 1);
     double pause_start_sec = os::elapsedTime();
-    g1_policy()->phase_times()->note_gc_start(active_workers);
+    g1_policy()->phase_times()->note_gc_start(active_workers, mark_in_progress());
     log_gc_header();

     TraceCollectorStats tcs(g1mm()->incremental_collection_counters());
@@ -4013,6 +4011,8 @@

         register_humongous_regions_with_in_cset_fast_test();

+        assert(check_cset_fast_test(), "Inconsistency in the InCSetState table.");
+
         _cm->note_start_of_gc();
         // We should not verify the per-thread SATB buffers given that
         // we have not filtered them yet (we'll do so during the
@@ -4242,29 +4242,6 @@
   return true;
 }

-size_t G1CollectedHeap::desired_plab_sz(GCAllocPurpose purpose)
-{
-  size_t gclab_word_size;
-  switch (purpose) {
-    case GCAllocForSurvived:
-      gclab_word_size = _survivor_plab_stats.desired_plab_sz();
-      break;
-    case GCAllocForTenured:
-      gclab_word_size = _old_plab_stats.desired_plab_sz();
-      break;
-    default:
-      assert(false, "unknown GCAllocPurpose");
-      gclab_word_size = _old_plab_stats.desired_plab_sz();
-      break;
-  }
-
-  // Prevent humongous PLAB sizes for two reasons:
-  // * PLABs are allocated using a similar paths as oops, but should
-  //   never be in a humongous region
-  // * Allowing humongous PLABs needlessly churns the region free lists
-  return MIN2(_humongous_object_threshold_in_words, gclab_word_size);
-}
-
 void G1CollectedHeap::init_for_evac_failure(OopsInHeapRegionClosure* cl) {
   _drain_in_progress = false;
   set_evac_failure_closure(cl);
@@ -4404,35 +4381,6 @@
   }
 }

-HeapWord* G1CollectedHeap::par_allocate_during_gc(GCAllocPurpose purpose,
-                                                  size_t word_size,
-                                                  AllocationContext_t context) {
-  if (purpose == GCAllocForSurvived) {
-    HeapWord* result = survivor_attempt_allocation(word_size, context);
-    if (result != NULL) {
-      return result;
-    } else {
-      // Let's try to allocate in the old gen in case we can fit the
-      // object there.
-      return old_attempt_allocation(word_size, context);
-    }
-  } else {
-    assert(purpose ==  GCAllocForTenured, "sanity");
-    HeapWord* result = old_attempt_allocation(word_size, context);
-    if (result != NULL) {
-      return result;
-    } else {
-      // Let's try to allocate in the survivors in case we can fit the
-      // object there.
-      return survivor_attempt_allocation(word_size, context);
-    }
-  }
-
-  ShouldNotReachHere();
-  // Trying to keep some compilers happy.
-  return NULL;
-}
-
 void G1ParCopyHelper::mark_object(oop obj) {
   assert(!_g1->heap_region_containing(obj)->in_collection_set(), "should not mark objects in the CSet");

@@ -4475,14 +4423,14 @@

   assert(_worker_id == _par_scan_state->queue_num(), "sanity");

-  G1CollectedHeap::in_cset_state_t state = _g1->in_cset_state(obj);
-
-  if (state == G1CollectedHeap::InCSet) {
+  const InCSetState state = _g1->in_cset_state(obj);
+  if (state.is_in_cset()) {
     oop forwardee;
-    if (obj->is_forwarded()) {
-      forwardee = obj->forwardee();
+    markOop m = obj->mark();
+    if (m->is_marked()) {
+      forwardee = (oop) m->decode_pointer();
     } else {
-      forwardee = _par_scan_state->copy_to_survivor_space(obj);
+      forwardee = _par_scan_state->copy_to_survivor_space(state, obj, m);
     }
     assert(forwardee != NULL, "forwardee should not be NULL");
     oopDesc::encode_store_heap_oop(p, forwardee);
@@ -4496,7 +4444,7 @@
       do_klass_barrier(p, forwardee);
     }
   } else {
-    if (state == G1CollectedHeap::IsHumongous) {
+    if (state.is_humongous()) {
       _g1->set_humongous_is_live(obj);
     }
     // The object is not in collection set. If we're a root scanning
@@ -4581,60 +4529,11 @@
   }
 };

-class G1CodeBlobClosure : public CodeBlobClosure {
-  class HeapRegionGatheringOopClosure : public OopClosure {
-    G1CollectedHeap* _g1h;
-    OopClosure* _work;
-    nmethod* _nm;
-
-    template <typename T>
-    void do_oop_work(T* p) {
-      _work->do_oop(p);
-      T oop_or_narrowoop = oopDesc::load_heap_oop(p);
-      if (!oopDesc::is_null(oop_or_narrowoop)) {
-        oop o = oopDesc::decode_heap_oop_not_null(oop_or_narrowoop);
-        HeapRegion* hr = _g1h->heap_region_containing_raw(o);
-        assert(!_g1h->obj_in_cs(o) || hr->rem_set()->strong_code_roots_list_contains(_nm), "if o still in CS then evacuation failed and nm must already be in the remset");
-        hr->add_strong_code_root(_nm);
-      }
-    }
-
-  public:
-    HeapRegionGatheringOopClosure(OopClosure* oc) : _g1h(G1CollectedHeap::heap()), _work(oc), _nm(NULL) {}
-
-    void do_oop(oop* o) {
-      do_oop_work(o);
-    }
-
-    void do_oop(narrowOop* o) {
-      do_oop_work(o);
-    }
-
-    void set_nm(nmethod* nm) {
-      _nm = nm;
-    }
-  };
-
-  HeapRegionGatheringOopClosure _oc;
-public:
-  G1CodeBlobClosure(OopClosure* oc) : _oc(oc) {}
-
-  void do_code_blob(CodeBlob* cb) {
-    nmethod* nm = cb->as_nmethod_or_null();
-    if (nm != NULL) {
-      if (!nm->test_set_oops_do_mark()) {
-        _oc.set_nm(nm);
-        nm->oops_do(&_oc);
-        nm->fix_oop_relocations();
-      }
-    }
-  }
-};
-
 class G1ParTask : public AbstractGangTask {
 protected:
   G1CollectedHeap*       _g1h;
   RefToScanQueueSet      *_queues;
+  G1RootProcessor*       _root_processor;
   ParallelTaskTerminator _terminator;
   uint _n_workers;

@@ -4642,10 +4541,11 @@
   Mutex* stats_lock() { return &_stats_lock; }

 public:
-  G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues)
+  G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor)
     : AbstractGangTask("G1 collection"),
       _g1h(g1h),
       _queues(task_queues),
+      _root_processor(root_processor),
       _terminator(0, _queues),
       _stats_lock(Mutex::leaf, "parallel G1 stats lock", true)
   {}
@@ -4659,13 +4559,7 @@
   ParallelTaskTerminator* terminator() { return &_terminator; }

   virtual void set_for_termination(int active_workers) {
-    // This task calls set_n_termination() in par_non_clean_card_iterate_work()
-    // in the young space (_par_seq_tasks) in the G1 heap
-    // for SequentialSubTasksDone.
-    // This task also uses SubTasksDone in SharedHeap and G1CollectedHeap
-    // both of which need setting by set_n_termination().
-    _g1h->SharedHeap::set_n_termination(active_workers);
-    _g1h->set_n_termination(active_workers);
+    _root_processor->set_num_workers(active_workers);
     terminator()->reset_for_reuse(active_workers);
     _n_workers = active_workers;
   }
@@ -4702,8 +4596,7 @@
   void work(uint worker_id) {
     if (worker_id >= _n_workers) return;  // no work needed this round

-    double start_time_ms = os::elapsedTime() * 1000.0;
-    _g1h->g1_policy()->phase_times()->record_gc_worker_start_time(worker_id, start_time_ms);
+    _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::GCWorkerStart, worker_id, os::elapsedTime());

     {
       ResourceMark rm;
@@ -4735,24 +4628,21 @@
                                                                                     false, // Process all klasses.
                                                                                     true); // Need to claim CLDs.

-      G1CodeBlobClosure scan_only_code_cl(&scan_only_root_cl);
-      G1CodeBlobClosure scan_mark_code_cl(&scan_mark_root_cl);
-      // IM Weak code roots are handled later.
-
       OopClosure* strong_root_cl;
       OopClosure* weak_root_cl;
       CLDClosure* strong_cld_cl;
       CLDClosure* weak_cld_cl;
-      CodeBlobClosure* strong_code_cl;
+
+      bool trace_metadata = false;

       if (_g1h->g1_policy()->during_initial_mark_pause()) {
         // We also need to mark copied objects.
         strong_root_cl = &scan_mark_root_cl;
         strong_cld_cl  = &scan_mark_cld_cl;
-        strong_code_cl = &scan_mark_code_cl;
         if (ClassUnloadingWithConcurrentMark) {
           weak_root_cl = &scan_mark_weak_root_cl;
           weak_cld_cl  = &scan_mark_weak_cld_cl;
+          trace_metadata = true;
         } else {
           weak_root_cl = &scan_mark_root_cl;
           weak_cld_cl  = &scan_mark_cld_cl;
@@ -4762,31 +4652,32 @@
         weak_root_cl   = &scan_only_root_cl;
         strong_cld_cl  = &scan_only_cld_cl;
         weak_cld_cl    = &scan_only_cld_cl;
-        strong_code_cl = &scan_only_code_cl;
       }

-
-      G1ParPushHeapRSClosure  push_heap_rs_cl(_g1h, &pss);
-
       pss.start_strong_roots();
-      _g1h->g1_process_roots(strong_root_cl,
-                             weak_root_cl,
-                             &push_heap_rs_cl,
-                             strong_cld_cl,
-                             weak_cld_cl,
-                             strong_code_cl,
-                             worker_id);
-
+
+      _root_processor->evacuate_roots(strong_root_cl,
+                                      weak_root_cl,
+                                      strong_cld_cl,
+                                      weak_cld_cl,
+                                      trace_metadata,
+                                      worker_id);
+
+      G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, &pss);
+      _root_processor->scan_remembered_sets(&push_heap_rs_cl,
+                                            weak_root_cl,
+                                            worker_id);
       pss.end_strong_roots();

       {
         double start = os::elapsedTime();
         G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator);
         evac.do_void();
-        double elapsed_ms = (os::elapsedTime()-start)*1000.0;
-        double term_ms = pss.term_time()*1000.0;
-        _g1h->g1_policy()->phase_times()->add_obj_copy_time(worker_id, elapsed_ms-term_ms);
-        _g1h->g1_policy()->phase_times()->record_termination(worker_id, term_ms, pss.term_attempts());
+        double elapsed_sec = os::elapsedTime() - start;
+        double term_sec = pss.term_time();
+        _g1h->g1_policy()->phase_times()->add_time_secs(G1GCPhaseTimes::ObjCopy, worker_id, elapsed_sec - term_sec);
+        _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::Termination, worker_id, term_sec);
+        _g1h->g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::Termination, worker_id, pss.term_attempts());
       }
       _g1h->g1_policy()->record_thread_age_table(pss.age_table());
       _g1h->update_surviving_young_words(pss.surviving_young_words()+1);
@@ -4802,100 +4693,10 @@
       // destructors are executed here and are included as part of the
       // "GC Worker Time".
     }
-
-    double end_time_ms = os::elapsedTime() * 1000.0;
-    _g1h->g1_policy()->phase_times()->record_gc_worker_end_time(worker_id, end_time_ms);
+    _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::GCWorkerEnd, worker_id, os::elapsedTime());
   }
 };

-// *** Common G1 Evacuation Stuff
-
-// This method is run in a GC worker.
-
-void
-G1CollectedHeap::
-g1_process_roots(OopClosure* scan_non_heap_roots,
-                 OopClosure* scan_non_heap_weak_roots,
-                 OopsInHeapRegionClosure* scan_rs,
-                 CLDClosure* scan_strong_clds,
-                 CLDClosure* scan_weak_clds,
-                 CodeBlobClosure* scan_strong_code,
-                 uint worker_i) {
-
-  // First scan the shared roots.
-  double ext_roots_start = os::elapsedTime();
-  double closure_app_time_sec = 0.0;
-
-  bool during_im = _g1h->g1_policy()->during_initial_mark_pause();
-  bool trace_metadata = during_im && ClassUnloadingWithConcurrentMark;
-
-  BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots);
-  BufferingOopClosure buf_scan_non_heap_weak_roots(scan_non_heap_weak_roots);
-
-  process_roots(false, // no scoping; this is parallel code
-                SharedHeap::SO_None,
-                &buf_scan_non_heap_roots,
-                &buf_scan_non_heap_weak_roots,
-                scan_strong_clds,
-                // Unloading Initial Marks handle the weak CLDs separately.
-                (trace_metadata ? NULL : scan_weak_clds),
-                scan_strong_code);
-
-  // Now the CM ref_processor roots.
-  if (!_process_strong_tasks->is_task_claimed(G1H_PS_refProcessor_oops_do)) {
-    // We need to treat the discovered reference lists of the
-    // concurrent mark ref processor as roots and keep entries
-    // (which are added by the marking threads) on them live
-    // until they can be processed at the end of marking.
-    ref_processor_cm()->weak_oops_do(&buf_scan_non_heap_roots);
-  }
-
-  if (trace_metadata) {
-    // Barrier to make sure all workers passed
-    // the strong CLD and strong nmethods phases.
-    active_strong_roots_scope()->wait_until_all_workers_done_with_threads(n_par_threads());
-
-    // Now take the complement of the strong CLDs.
-    ClassLoaderDataGraph::roots_cld_do(NULL, scan_weak_clds);
-  }
-
-  // Finish up any enqueued closure apps (attributed as object copy time).
-  buf_scan_non_heap_roots.done();
-  buf_scan_non_heap_weak_roots.done();
-
-  double obj_copy_time_sec = buf_scan_non_heap_roots.closure_app_seconds()
-      + buf_scan_non_heap_weak_roots.closure_app_seconds();
-
-  g1_policy()->phase_times()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0);
-
-  double ext_root_time_ms =
-    ((os::elapsedTime() - ext_roots_start) - obj_copy_time_sec) * 1000.0;
-
-  g1_policy()->phase_times()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
-
-  // During conc marking we have to filter the per-thread SATB buffers
-  // to make sure we remove any oops into the CSet (which will show up
-  // as implicitly live).
-  double satb_filtering_ms = 0.0;
-  if (!_process_strong_tasks->is_task_claimed(G1H_PS_filter_satb_buffers)) {
-    if (mark_in_progress()) {
-      double satb_filter_start = os::elapsedTime();
-
-      JavaThread::satb_mark_queue_set().filter_thread_buffers();
-
-      satb_filtering_ms = (os::elapsedTime() - satb_filter_start) * 1000.0;
-    }
-  }
-  g1_policy()->phase_times()->record_satb_filtering_time(worker_i, satb_filtering_ms);
-
-  // Now scan the complement of the collection set.
-  G1CodeBlobClosure scavenge_cs_nmethods(scan_non_heap_weak_roots);
-
-  g1_rem_set()->oops_into_collection_set_do(scan_rs, &scavenge_cs_nmethods, worker_i);
-
-  _process_strong_tasks->all_tasks_completed();
-}
-
 class G1StringSymbolTableUnlinkTask : public AbstractGangTask {
 private:
   BoolObjectClosure* _is_alive;
@@ -5309,7 +5110,8 @@
   G1RedirtyLoggedCardsTask(DirtyCardQueueSet* queue) : AbstractGangTask("Redirty Cards"), _queue(queue) { }

   virtual void work(uint worker_id) {
-    double start_time = os::elapsedTime();
+    G1GCPhaseTimes* phase_times = G1CollectedHeap::heap()->g1_policy()->phase_times();
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::RedirtyCards, worker_id);

     RedirtyLoggedCardTableEntryClosure cl;
     if (G1CollectedHeap::heap()->use_parallel_gc_threads()) {
@@ -5318,9 +5120,7 @@
       _queue->apply_closure_to_all_completed_buffers(&cl);
     }

-    G1GCPhaseTimes* timer = G1CollectedHeap::heap()->g1_policy()->phase_times();
-    timer->record_redirty_logged_cards_time_ms(worker_id, (os::elapsedTime() - start_time) * 1000.0);
-    timer->record_redirty_logged_cards_processed_cards(worker_id, cl.num_processed());
+    phase_times->record_thread_work_item(G1GCPhaseTimes::RedirtyCards, worker_id, cl.num_processed());
   }
 };

@@ -5381,17 +5181,17 @@
     oop obj = *p;
     assert(obj != NULL, "the caller should have filtered out NULL values");

-    G1CollectedHeap::in_cset_state_t cset_state = _g1->in_cset_state(obj);
-    if (cset_state == G1CollectedHeap::InNeither) {
+    const InCSetState cset_state = _g1->in_cset_state(obj);
+    if (!cset_state.is_in_cset_or_humongous()) {
       return;
     }
-    if (cset_state == G1CollectedHeap::InCSet) {
+    if (cset_state.is_in_cset()) {
       assert( obj->is_forwarded(), "invariant" );
       *p = obj->forwardee();
     } else {
       assert(!obj->is_forwarded(), "invariant" );
-      assert(cset_state == G1CollectedHeap::IsHumongous,
-             err_msg("Only allowed InCSet state is IsHumongous, but is %d", cset_state));
+      assert(cset_state.is_humongous(),
+             err_msg("Only allowed InCSet state is IsHumongous, but is %d", cset_state.value()));
       _g1->set_humongous_is_live(obj);
     }
   }
@@ -5884,7 +5684,6 @@
     n_workers = 1;
   }

-  G1ParTask g1_par_task(this, _task_queues);

   init_for_evac_failure(NULL);

@@ -5895,7 +5694,8 @@
   double end_par_time_sec;

   {
-    StrongRootsScope srs(this);
+    G1RootProcessor root_processor(this);
+    G1ParTask g1_par_task(this, _task_queues, &root_processor);
     // InitialMark needs claim bits to keep track of the marked-through CLDs.
     if (g1_policy()->during_initial_mark_pause()) {
       ClassLoaderDataGraph::clear_claimed_marks();
@@ -5916,18 +5716,20 @@
     end_par_time_sec = os::elapsedTime();

     // Closing the inner scope will execute the destructor
-    // for the StrongRootsScope object. We record the current
+    // for the G1RootProcessor object. We record the current
     // elapsed time before closing the scope so that time
-    // taken for the SRS destructor is NOT included in the
+    // taken for the destructor is NOT included in the
     // reported parallel time.
   }

+  G1GCPhaseTimes* phase_times = g1_policy()->phase_times();
+
   double par_time_ms = (end_par_time_sec - start_par_time_sec) * 1000.0;
-  g1_policy()->phase_times()->record_par_time(par_time_ms);
+  phase_times->record_par_time(par_time_ms);

   double code_root_fixup_time_ms =
         (os::elapsedTime() - end_par_time_sec) * 1000.0;
-  g1_policy()->phase_times()->record_code_root_fixup_time(code_root_fixup_time_ms);
+  phase_times->record_code_root_fixup_time(code_root_fixup_time_ms);

   set_par_threads(0);

@@ -5938,14 +5740,15 @@
   // not copied during the pause.
   process_discovered_references(n_workers);

-  // Weak root processing.
-  {
+  if (G1StringDedup::is_enabled()) {
+    double fixup_start = os::elapsedTime();
+
     G1STWIsAliveClosure is_alive(this);
     G1KeepAliveClosure keep_alive(this);
-    JNIHandles::weak_oops_do(&is_alive, &keep_alive);
-    if (G1StringDedup::is_enabled()) {
-      G1StringDedup::unlink_or_oops_do(&is_alive, &keep_alive);
-    }
+    G1StringDedup::unlink_or_oops_do(&is_alive, &keep_alive, true, phase_times);
+
+    double fixup_time_ms = (os::elapsedTime() - fixup_start) * 1000.0;
+    phase_times->record_string_dedup_fixup_time(fixup_time_ms);
   }

   _allocator->release_gc_alloc_regions(n_workers, evacuation_info);
@@ -6213,6 +6016,58 @@
   heap_region_iterate(&cl);
   guarantee(!cl.failures(), "bitmap verification");
 }
+
+bool G1CollectedHeap::check_cset_fast_test() {
+  bool failures = false;
+  for (uint i = 0; i < _hrm.length(); i += 1) {
+    HeapRegion* hr = _hrm.at(i);
+    InCSetState cset_state = (InCSetState) _in_cset_fast_test.get_by_index((uint) i);
+    if (hr->isHumongous()) {
+      if (hr->in_collection_set()) {
+        gclog_or_tty->print_cr("\n## humongous region %u in CSet", i);
+        failures = true;
+        break;
+      }
+      if (cset_state.is_in_cset()) {
+        gclog_or_tty->print_cr("\n## inconsistent cset state %d for humongous region %u", cset_state.value(), i);
+        failures = true;
+        break;
+      }
+      if (hr->continuesHumongous() && cset_state.is_humongous()) {
+        gclog_or_tty->print_cr("\n## inconsistent cset state %d for continues humongous region %u", cset_state.value(), i);
+        failures = true;
+        break;
+      }
+    } else {
+      if (cset_state.is_humongous()) {
+        gclog_or_tty->print_cr("\n## inconsistent cset state %d for non-humongous region %u", cset_state.value(), i);
+        failures = true;
+        break;
+      }
+      if (hr->in_collection_set() != cset_state.is_in_cset()) {
+        gclog_or_tty->print_cr("\n## in CSet %d / cset state %d inconsistency for region %u",
+                               hr->in_collection_set(), cset_state.value(), i);
+        failures = true;
+        break;
+      }
+      if (cset_state.is_in_cset()) {
+        if (hr->is_young() != (cset_state.is_young())) {
+          gclog_or_tty->print_cr("\n## is_young %d / cset state %d inconsistency for region %u",
+                                 hr->is_young(), cset_state.value(), i);
+          failures = true;
+          break;
+        }
+        if (hr->is_old() != (cset_state.is_old())) {
+          gclog_or_tty->print_cr("\n## is_old %d / cset state %d inconsistency for region %u",
+                                 hr->is_old(), cset_state.value(), i);
+          failures = true;
+          break;
+        }
+      }
+    }
+  }
+  return !failures;
+}
 #endif // PRODUCT

 void G1CollectedHeap::cleanUpCardTable() {
@@ -6420,9 +6275,10 @@
         g1h->humongous_region_is_always_live(region_idx)) {

       if (G1TraceReclaimDeadHumongousObjectsAtYoungGC) {
-        gclog_or_tty->print_cr("Live humongous %d region %d with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d live-other %d obj array %d",
+        gclog_or_tty->print_cr("Live humongous %d region %d size "SIZE_FORMAT" with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d live-other %d obj array %d",
                                r->isHumongous(),
                                region_idx,
+                               obj->size()*HeapWordSize,
                                r->rem_set()->occupied(),
                                r->rem_set()->strong_code_roots_list_length(),
                                next_bitmap->isMarked(r->bottom()),
@@ -6439,8 +6295,9 @@
                       r->bottom()));

     if (G1TraceReclaimDeadHumongousObjectsAtYoungGC) {
-      gclog_or_tty->print_cr("Reclaim humongous region %d start "PTR_FORMAT" region %d length "UINT32_FORMAT" with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d live-other %d obj array %d",
+      gclog_or_tty->print_cr("Reclaim humongous region %d size "SIZE_FORMAT" start "PTR_FORMAT" region %d length "UINT32_FORMAT" with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d live-other ",
                              r->isHumongous(),
+                             obj->size()*HeapWordSize,
                              r->bottom(),
                              region_idx,
                              r->region_num(),
@@ -6479,7 +6336,8 @@
 void G1CollectedHeap::eagerly_reclaim_humongous_regions() {
   assert_at_safepoint(true);

-  if (!G1ReclaimDeadHumongousObjectsAtYoungGC || !_has_humongous_reclaim_candidates) {
+  if (!G1ReclaimDeadHumongousObjectsAtYoungGC ||
+      (!_has_humongous_reclaim_candidates && !G1TraceReclaimDeadHumongousObjectsAtYoungGC)) {
     g1_policy()->phase_times()->record_fast_reclaim_humongous_time_ms(0.0, 0);
     return;
   }
@@ -6792,20 +6650,20 @@

 HeapRegion* G1CollectedHeap::new_gc_alloc_region(size_t word_size,
                                                  uint count,
-                                                 GCAllocPurpose ap) {
+                                                 InCSetState dest) {
   assert(FreeList_lock->owned_by_self(), "pre-condition");

-  if (count < g1_policy()->max_regions(ap)) {
-    bool survivor = (ap == GCAllocForSurvived);
+  if (count < g1_policy()->max_regions(dest)) {
+    const bool is_survivor = (dest.is_young());
     HeapRegion* new_alloc_region = new_region(word_size,
-                                              !survivor,
+                                              !is_survivor,
                                               true /* do_expand */);
     if (new_alloc_region != NULL) {
       // We really only need to do this for old regions given that we
       // should never scan survivors. But it doesn't hurt to do it
       // for survivors too.
-      new_alloc_region->record_top_and_timestamp();
-      if (survivor) {
+      new_alloc_region->record_timestamp();
+      if (is_survivor) {
         new_alloc_region->set_survivor();
         _hr_printer.alloc(new_alloc_region, G1HRPrinter::Survivor);
         check_bitmaps("Survivor Region Allocation", new_alloc_region);
@@ -6817,8 +6675,6 @@
       bool during_im = g1_policy()->during_initial_mark_pause();
       new_alloc_region->note_start_of_copying(during_im);
       return new_alloc_region;
-    } else {
-      g1_policy()->note_alloc_region_limit_reached(ap);
     }
   }
   return NULL;
@@ -6826,11 +6682,11 @@

 void G1CollectedHeap::retire_gc_alloc_region(HeapRegion* alloc_region,
                                              size_t allocated_bytes,
-                                             GCAllocPurpose ap) {
+                                             InCSetState dest) {
   bool during_im = g1_policy()->during_initial_mark_pause();
   alloc_region->note_end_of_copying(during_im);
   g1_policy()->record_bytes_copied_during_gc(allocated_bytes);
-  if (ap == GCAllocForSurvived) {
+  if (dest.is_young()) {
     young_list()->add_survivor_region(alloc_region);
   } else {
     _old_set.add(alloc_region);
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -32,6 +32,7 @@
 #include "gc_implementation/g1/g1AllocRegion.hpp"
 #include "gc_implementation/g1/g1BiasedArray.hpp"
 #include "gc_implementation/g1/g1HRPrinter.hpp"
+#include "gc_implementation/g1/g1InCSetState.hpp"
 #include "gc_implementation/g1/g1MonitoringSupport.hpp"
 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc_implementation/g1/g1YCTypes.hpp"
@@ -346,6 +347,7 @@
   volatile unsigned int _old_marking_cycles_completed;

   bool _concurrent_cycle_started;
+  bool _heap_summary_sent;

   // This is a non-product method that is helpful for testing. It is
   // called at the end of a GC and artificially expands the heap by
@@ -545,15 +547,9 @@
   // allocation region, either by picking one or expanding the
   // heap, and then allocate a block of the given size. The block
   // may not be a humongous - it must fit into a single heap region.
-  HeapWord* par_allocate_during_gc(GCAllocPurpose purpose,
-                                   size_t word_size,
-                                   AllocationContext_t context);
-
-  HeapWord* allocate_during_gc_slow(GCAllocPurpose purpose,
-                                    HeapRegion*    alloc_region,
-                                    bool           par,
-                                    size_t         word_size);
-
+  inline HeapWord* par_allocate_during_gc(InCSetState dest,
+                                          size_t word_size,
+                                          AllocationContext_t context);
   // Ensure that no further allocations can happen in "r", bearing in mind
   // that parallel threads might be attempting allocations.
   void par_allocate_remaining_space(HeapRegion* r);
@@ -575,9 +571,9 @@

   // For GC alloc regions.
   HeapRegion* new_gc_alloc_region(size_t word_size, uint count,
-                                  GCAllocPurpose ap);
+                                  InCSetState dest);
   void retire_gc_alloc_region(HeapRegion* alloc_region,
-                              size_t allocated_bytes, GCAllocPurpose ap);
+                              size_t allocated_bytes, InCSetState dest);

   // - if explicit_gc is true, the GC is for a System.gc() or a heap
   //   inspection request and should collect the entire heap
@@ -638,26 +634,11 @@
   // (Rounds up to a HeapRegion boundary.)
   bool expand(size_t expand_bytes);

-  // Returns the PLAB statistics given a purpose.
-  PLABStats* stats_for_purpose(GCAllocPurpose purpose) {
-    PLABStats* stats = NULL;
+  // Returns the PLAB statistics for a given destination.
+  inline PLABStats* alloc_buffer_stats(InCSetState dest);

-    switch (purpose) {
-    case GCAllocForSurvived:
-      stats = &_survivor_plab_stats;
-      break;
-    case GCAllocForTenured:
-      stats = &_old_plab_stats;
-      break;
-    default:
-      assert(false, "unrecognized GCAllocPurpose");
-    }
-
-    return stats;
-  }
-
-  // Determines PLAB size for a particular allocation purpose.
-  size_t desired_plab_sz(GCAllocPurpose purpose);
+  // Determines PLAB size for a given destination.
+  inline size_t desired_plab_sz(InCSetState dest);

   inline AllocationContextStats& allocation_context_stats();

@@ -681,8 +662,11 @@
   void register_humongous_regions_with_in_cset_fast_test();
   // We register a region with the fast "in collection set" test. We
   // simply set to true the array slot corresponding to this region.
-  void register_region_with_in_cset_fast_test(HeapRegion* r) {
-    _in_cset_fast_test.set_in_cset(r->hrm_index());
+  void register_young_region_with_in_cset_fast_test(HeapRegion* r) {
+    _in_cset_fast_test.set_in_young(r->hrm_index());
+  }
+  void register_old_region_with_in_cset_fast_test(HeapRegion* r) {
+    _in_cset_fast_test.set_in_old(r->hrm_index());
   }

   // This is a fast test on whether a reference points into the
@@ -812,22 +796,6 @@
   // statistics or updating free lists.
   void abandon_collection_set(HeapRegion* cs_head);

-  // Applies "scan_non_heap_roots" to roots outside the heap,
-  // "scan_rs" to roots inside the heap (having done "set_region" to
-  // indicate the region in which the root resides),
-  // and does "scan_metadata" If "scan_rs" is
-  // NULL, then this step is skipped.  The "worker_i"
-  // param is for use with parallel roots processing, and should be
-  // the "i" of the calling parallel worker thread's work(i) function.
-  // In the sequential case this param will be ignored.
-  void g1_process_roots(OopClosure* scan_non_heap_roots,
-                        OopClosure* scan_non_heap_weak_roots,
-                        OopsInHeapRegionClosure* scan_rs,
-                        CLDClosure* scan_strong_clds,
-                        CLDClosure* scan_weak_clds,
-                        CodeBlobClosure* scan_strong_code,
-                        uint worker_i);
-
   // The concurrent marker (and the thread it runs in.)
   ConcurrentMark* _cm;
   ConcurrentMarkThread* _cmThread;
@@ -1014,21 +982,10 @@
   // of G1CollectedHeap::_gc_time_stamp.
   unsigned int* _worker_cset_start_region_time_stamp;

-  enum G1H_process_roots_tasks {
-    G1H_PS_filter_satb_buffers,
-    G1H_PS_refProcessor_oops_do,
-    // Leave this one last.
-    G1H_PS_NumElements
-  };
-
-  SubTasksDone* _process_strong_tasks;
-
   volatile bool _free_regions_coming;

 public:

-  SubTasksDone* process_strong_tasks() { return _process_strong_tasks; }
-
   void set_refine_cte_cl_concurrency(bool concurrent);

   RefToScanQueue *task_queue(int i) const;
@@ -1061,21 +1018,11 @@
   // Initialize weak reference processing.
   virtual void ref_processing_init();

-  void set_par_threads(uint t) {
-    SharedHeap::set_par_threads(t);
-    // Done in SharedHeap but oddly there are
-    // two _process_strong_tasks's in a G1CollectedHeap
-    // so do it here too.
-    _process_strong_tasks->set_n_threads(t);
-  }
-
+  // Explicitly import set_par_threads into this scope
+  using SharedHeap::set_par_threads;
   // Set _n_par_threads according to a policy TBD.
   void set_par_threads();

-  void set_n_termination(int t) {
-    _process_strong_tasks->set_n_threads(t);
-  }
-
   virtual CollectedHeap::Name kind() const {
     return CollectedHeap::G1CollectedHeap;
   }
@@ -1182,6 +1129,9 @@
   // appropriate error messages and crash.
   void check_bitmaps(const char* caller) PRODUCT_RETURN;

+  // Do sanity check on the contents of the in-cset fast test table.
+  bool check_cset_fast_test() PRODUCT_RETURN_( return true; );
+
   // verify_region_sets() performs verification over the region
   // lists. It will be compiled in the product code to be used when
   // necessary (i.e., during heap verification).
@@ -1277,53 +1227,15 @@

   inline bool is_in_cset_or_humongous(const oop obj);

-  enum in_cset_state_t {
-   InNeither,           // neither in collection set nor humongous
-   InCSet,              // region is in collection set only
-   IsHumongous          // region is a humongous start region
-  };
  private:
-  // Instances of this class are used for quick tests on whether a reference points
-  // into the collection set or is a humongous object (points into a humongous
-  // object).
-  // Each of the array's elements denotes whether the corresponding region is in
-  // the collection set or a humongous region.
-  // We use this to quickly reclaim humongous objects: by making a humongous region
-  // succeed this test, we sort-of add it to the collection set. During the reference
-  // iteration closures, when we see a humongous region, we simply mark it as
-  // referenced, i.e. live.
-  class G1FastCSetBiasedMappedArray : public G1BiasedMappedArray<char> {
-   protected:
-    char default_value() const { return G1CollectedHeap::InNeither; }
-   public:
-    void set_humongous(uintptr_t index) {
-      assert(get_by_index(index) != InCSet, "Should not overwrite InCSet values");
-      set_by_index(index, G1CollectedHeap::IsHumongous);
-    }
-
-    void clear_humongous(uintptr_t index) {
-      set_by_index(index, G1CollectedHeap::InNeither);
-    }
-
-    void set_in_cset(uintptr_t index) {
-      assert(get_by_index(index) != G1CollectedHeap::IsHumongous, "Should not overwrite IsHumongous value");
-      set_by_index(index, G1CollectedHeap::InCSet);
-    }
-
-    bool is_in_cset_or_humongous(HeapWord* addr) const { return get_by_address(addr) != G1CollectedHeap::InNeither; }
-    bool is_in_cset(HeapWord* addr) const { return get_by_address(addr) == G1CollectedHeap::InCSet; }
-    G1CollectedHeap::in_cset_state_t at(HeapWord* addr) const { return (G1CollectedHeap::in_cset_state_t)get_by_address(addr); }
-    void clear() { G1BiasedMappedArray<char>::clear(); }
-  };
-
   // This array is used for a quick test on whether a reference points into
   // the collection set or not. Each of the array's elements denotes whether the
   // corresponding region is in the collection set or not.
-  G1FastCSetBiasedMappedArray _in_cset_fast_test;
+  G1InCSetStateFastTestBiasedMappedArray _in_cset_fast_test;

  public:

-  inline in_cset_state_t in_cset_state(const oop obj);
+  inline InCSetState in_cset_state(const oop obj);

   // Return "TRUE" iff the given object address is in the reserved
   // region of g1.
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -35,6 +35,41 @@
 #include "runtime/orderAccess.inline.hpp"
 #include "utilities/taskqueue.hpp"

+PLABStats* G1CollectedHeap::alloc_buffer_stats(InCSetState dest) {
+  switch (dest.value()) {
+    case InCSetState::Young:
+      return &_survivor_plab_stats;
+    case InCSetState::Old:
+      return &_old_plab_stats;
+    default:
+      ShouldNotReachHere();
+      return NULL; // Keep some compilers happy
+  }
+}
+
+size_t G1CollectedHeap::desired_plab_sz(InCSetState dest) {
+  size_t gclab_word_size = alloc_buffer_stats(dest)->desired_plab_sz();
+  // Prevent humongous PLAB sizes for two reasons:
+  // * PLABs are allocated using a similar paths as oops, but should
+  //   never be in a humongous region
+  // * Allowing humongous PLABs needlessly churns the region free lists
+  return MIN2(_humongous_object_threshold_in_words, gclab_word_size);
+}
+
+HeapWord* G1CollectedHeap::par_allocate_during_gc(InCSetState dest,
+                                                  size_t word_size,
+                                                  AllocationContext_t context) {
+  switch (dest.value()) {
+    case InCSetState::Young:
+      return survivor_attempt_allocation(word_size, context);
+    case InCSetState::Old:
+      return old_attempt_allocation(word_size, context);
+    default:
+      ShouldNotReachHere();
+      return NULL; // Keep some compilers happy
+  }
+}
+
 // Inline functions for G1CollectedHeap

 inline AllocationContextStats& G1CollectedHeap::allocation_context_stats() {
@@ -203,7 +238,7 @@
   return _in_cset_fast_test.is_in_cset_or_humongous((HeapWord*)obj);
 }

-G1CollectedHeap::in_cset_state_t G1CollectedHeap::in_cset_state(const oop obj) {
+InCSetState G1CollectedHeap::in_cset_state(const oop obj) {
   return _in_cset_fast_test.at((HeapWord*)obj);
 }
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -1084,7 +1084,7 @@
   if (update_stats) {
     double cost_per_card_ms = 0.0;
     if (_pending_cards > 0) {
-      cost_per_card_ms = phase_times()->average_last_update_rs_time() / (double) _pending_cards;
+      cost_per_card_ms = phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS) / (double) _pending_cards;
       _cost_per_card_ms_seq->add(cost_per_card_ms);
     }

@@ -1092,7 +1092,7 @@

     double cost_per_entry_ms = 0.0;
     if (cards_scanned > 10) {
-      cost_per_entry_ms = phase_times()->average_last_scan_rs_time() / (double) cards_scanned;
+      cost_per_entry_ms = phase_times()->average_time_ms(G1GCPhaseTimes::ScanRS) / (double) cards_scanned;
       if (_last_gc_was_young) {
         _cost_per_entry_ms_seq->add(cost_per_entry_ms);
       } else {
@@ -1134,7 +1134,7 @@
     double cost_per_byte_ms = 0.0;

     if (copied_bytes > 0) {
-      cost_per_byte_ms = phase_times()->average_last_obj_copy_time() / (double) copied_bytes;
+      cost_per_byte_ms = phase_times()->average_time_ms(G1GCPhaseTimes::ObjCopy) / (double) copied_bytes;
       if (_in_marking_window) {
         _cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms);
       } else {
@@ -1143,8 +1143,8 @@
     }

     double all_other_time_ms = pause_time_ms -
-      (phase_times()->average_last_update_rs_time() + phase_times()->average_last_scan_rs_time()
-      + phase_times()->average_last_obj_copy_time() + phase_times()->average_last_termination_time());
+      (phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS) + phase_times()->average_time_ms(G1GCPhaseTimes::ScanRS) +
+          phase_times()->average_time_ms(G1GCPhaseTimes::ObjCopy) + phase_times()->average_time_ms(G1GCPhaseTimes::Termination));

     double young_other_time_ms = 0.0;
     if (young_cset_region_length() > 0) {
@@ -1185,8 +1185,8 @@

   // Note that _mmu_tracker->max_gc_time() returns the time in seconds.
   double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
-  adjust_concurrent_refinement(phase_times()->average_last_update_rs_time(),
-                               phase_times()->sum_last_update_rs_processed_buffers(), update_rs_time_goal_ms);
+  adjust_concurrent_refinement(phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS),
+                               phase_times()->sum_thread_work_items(G1GCPhaseTimes::UpdateRS), update_rs_time_goal_ms);

   _collectionSetChooser->verify();
 }
@@ -1437,18 +1437,6 @@
   return young_list_length < young_list_max_length;
 }

-uint G1CollectorPolicy::max_regions(int purpose) {
-  switch (purpose) {
-    case GCAllocForSurvived:
-      return _max_survivor_regions;
-    case GCAllocForTenured:
-      return REGIONS_UNLIMITED;
-    default:
-      ShouldNotReachHere();
-      return REGIONS_UNLIMITED;
-  };
-}
-
 void G1CollectorPolicy::update_max_gc_locker_expansion() {
   uint expansion_region_num = 0;
   if (GCLockerEdenExpansionPercent > 0) {
@@ -1683,7 +1671,7 @@
   hr->set_next_in_collection_set(_collection_set);
   _collection_set = hr;
   _collection_set_bytes_used_before += hr->used();
-  _g1->register_region_with_in_cset_fast_test(hr);
+  _g1->register_old_region_with_in_cset_fast_test(hr);
   size_t rs_length = hr->rem_set()->occupied();
   _recorded_rs_lengths += rs_length;
   _old_cset_region_length += 1;
@@ -1816,7 +1804,7 @@
   hr->set_in_collection_set(true);
   assert( hr->next_in_collection_set() == NULL, "invariant");

-  _g1->register_region_with_in_cset_fast_test(hr);
+  _g1->register_young_region_with_in_cset_fast_test(hr);
 }

 // Add the region at the RHS of the incremental cset
@@ -2189,19 +2177,19 @@
     _other.add(pause_time_ms - phase_times->accounted_time_ms());
     _root_region_scan_wait.add(phase_times->root_region_scan_wait_time_ms());
     _parallel.add(phase_times->cur_collection_par_time_ms());
-    _ext_root_scan.add(phase_times->average_last_ext_root_scan_time());
-    _satb_filtering.add(phase_times->average_last_satb_filtering_times_ms());
-    _update_rs.add(phase_times->average_last_update_rs_time());
-    _scan_rs.add(phase_times->average_last_scan_rs_time());
-    _obj_copy.add(phase_times->average_last_obj_copy_time());
-    _termination.add(phase_times->average_last_termination_time());
+    _ext_root_scan.add(phase_times->average_time_ms(G1GCPhaseTimes::ExtRootScan));
+    _satb_filtering.add(phase_times->average_time_ms(G1GCPhaseTimes::SATBFiltering));
+    _update_rs.add(phase_times->average_time_ms(G1GCPhaseTimes::UpdateRS));
+    _scan_rs.add(phase_times->average_time_ms(G1GCPhaseTimes::ScanRS));
+    _obj_copy.add(phase_times->average_time_ms(G1GCPhaseTimes::ObjCopy));
+    _termination.add(phase_times->average_time_ms(G1GCPhaseTimes::Termination));

-    double parallel_known_time = phase_times->average_last_ext_root_scan_time() +
-      phase_times->average_last_satb_filtering_times_ms() +
-      phase_times->average_last_update_rs_time() +
-      phase_times->average_last_scan_rs_time() +
-      phase_times->average_last_obj_copy_time() +
-      + phase_times->average_last_termination_time();
+    double parallel_known_time = phase_times->average_time_ms(G1GCPhaseTimes::ExtRootScan) +
+      phase_times->average_time_ms(G1GCPhaseTimes::SATBFiltering) +
+      phase_times->average_time_ms(G1GCPhaseTimes::UpdateRS) +
+      phase_times->average_time_ms(G1GCPhaseTimes::ScanRS) +
+      phase_times->average_time_ms(G1GCPhaseTimes::ObjCopy) +
+      phase_times->average_time_ms(G1GCPhaseTimes::Termination);

     double parallel_other_time = phase_times->cur_collection_par_time_ms() - parallel_known_time;
     _parallel_other.add(parallel_other_time);
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -877,28 +877,20 @@
 public:
   uint tenuring_threshold() const { return _tenuring_threshold; }

-  inline GCAllocPurpose
-    evacuation_destination(HeapRegion* src_region, uint age, size_t word_sz) {
-      if (age < _tenuring_threshold && src_region->is_young()) {
-        return GCAllocForSurvived;
-      } else {
-        return GCAllocForTenured;
-      }
-  }
-
-  inline bool track_object_age(GCAllocPurpose purpose) {
-    return purpose == GCAllocForSurvived;
-  }
-
   static const uint REGIONS_UNLIMITED = (uint) -1;

-  uint max_regions(int purpose);
-
-  // The limit on regions for a particular purpose is reached.
-  void note_alloc_region_limit_reached(int purpose) {
-    if (purpose == GCAllocForSurvived) {
-      _tenuring_threshold = 0;
+  uint max_regions(InCSetState dest) {
+    switch (dest.value()) {
+      case InCSetState::Young:
+        return _max_survivor_regions;
+      case InCSetState::Old:
+        return REGIONS_UNLIMITED;
+      default:
+        assert(false, err_msg("Unknown dest state: " CSETSTATE_FORMAT, dest.value()));
+        break;
     }
+    // keep some compilers happy
+    return 0;
   }

   void note_start_adding_survivor_regions() {
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -22,12 +22,13 @@
  *
  */

-
 #include "precompiled.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1GCPhaseTimes.hpp"
 #include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/g1StringDedup.hpp"
+#include "memory/allocation.hpp"
+#include "runtime/os.hpp"

 // Helper class for avoiding interleaved logging
 class LineBuffer: public StackObj {
@@ -70,184 +71,258 @@
     va_end(ap);
   }

+  void print_cr() {
+    gclog_or_tty->print_cr("%s", _buffer);
+    _cur = _indent_level * INDENT_CHARS;
+  }
+
   void append_and_print_cr(const char* format, ...)  ATTRIBUTE_PRINTF(2, 3) {
     va_list ap;
     va_start(ap, format);
     vappend(format, ap);
     va_end(ap);
-    gclog_or_tty->print_cr("%s", _buffer);
-    _cur = _indent_level * INDENT_CHARS;
+    print_cr();
   }
 };

-PRAGMA_DIAG_PUSH
-PRAGMA_FORMAT_NONLITERAL_IGNORED
 template <class T>
-void WorkerDataArray<T>::print(int level, const char* title) {
-  if (_length == 1) {
-    // No need for min, max, average and sum for only one worker
-    LineBuffer buf(level);
-    buf.append("[%s:  ", title);
-    buf.append(_print_format, _data[0]);
-    buf.append_and_print_cr("]");
-    return;
+class WorkerDataArray  : public CHeapObj<mtGC> {
+  friend class G1GCParPhasePrinter;
+  T*          _data;
+  uint        _length;
+  const char* _title;
+  bool        _print_sum;
+  int         _log_level;
+  uint        _indent_level;
+  bool        _enabled;
+
+  WorkerDataArray<size_t>* _thread_work_items;
+
+  NOT_PRODUCT(T uninitialized();)
+
+  // We are caching the sum and average to only have to calculate them once.
+  // This is not done in an MT-safe way. It is intended to allow single
+  // threaded code to call sum() and average() multiple times in any order
+  // without having to worry about the cost.
+  bool   _has_new_data;
+  T      _sum;
+  T      _min;
+  T      _max;
+  double _average;
+
+ public:
+  WorkerDataArray(uint length, const char* title, bool print_sum, int log_level, uint indent_level) :
+    _title(title), _length(0), _print_sum(print_sum), _log_level(log_level), _indent_level(indent_level),
+    _has_new_data(true), _thread_work_items(NULL), _enabled(true) {
+    assert(length > 0, "Must have some workers to store data for");
+    _length = length;
+    _data = NEW_C_HEAP_ARRAY(T, _length, mtGC);
+  }
+
+  ~WorkerDataArray() {
+    FREE_C_HEAP_ARRAY(T, _data, mtGC);
+  }
+
+  void link_thread_work_items(WorkerDataArray<size_t>* thread_work_items) {
+    _thread_work_items = thread_work_items;
+  }
+
+  WorkerDataArray<size_t>* thread_work_items() { return _thread_work_items; }
+
+  void set(uint worker_i, T value) {
+    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
+    assert(_data[worker_i] == WorkerDataArray<T>::uninitialized(), err_msg("Overwriting data for worker %d in %s", worker_i, _title));
+    _data[worker_i] = value;
+    _has_new_data = true;
+  }
+
+  void set_thread_work_item(uint worker_i, size_t value) {
+    assert(_thread_work_items != NULL, "No sub count");
+    _thread_work_items->set(worker_i, value);
   }

-  T min = _data[0];
-  T max = _data[0];
-  T sum = 0;
+  T get(uint worker_i) {
+    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
+    assert(_data[worker_i] != WorkerDataArray<T>::uninitialized(), err_msg("No data added for worker %d", worker_i));
+    return _data[worker_i];
+  }
+
+  void add(uint worker_i, T value) {
+    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
+    assert(_data[worker_i] != WorkerDataArray<T>::uninitialized(), err_msg("No data to add to for worker %d", worker_i));
+    _data[worker_i] += value;
+    _has_new_data = true;
+  }

-  LineBuffer buf(level);
-  buf.append("[%s:", title);
-  for (uint i = 0; i < _length; ++i) {
-    T val = _data[i];
-    min = MIN2(val, min);
-    max = MAX2(val, max);
-    sum += val;
-    if (G1Log::finest()) {
-      buf.append("  ");
-      buf.append(_print_format, val);
-    }
+  double average(){
+    calculate_totals();
+    return _average;
+  }
+
+  T sum() {
+    calculate_totals();
+    return _sum;
+  }
+
+  T minimum() {
+    calculate_totals();
+    return _min;
   }

-  if (G1Log::finest()) {
-    buf.append_and_print_cr("%s", "");
+  T maximum() {
+    calculate_totals();
+    return _max;
   }

-  double avg = (double)sum / (double)_length;
-  buf.append(" Min: ");
-  buf.append(_print_format, min);
-  buf.append(", Avg: ");
-  buf.append("%.1lf", avg); // Always print average as a double
-  buf.append(", Max: ");
-  buf.append(_print_format, max);
-  buf.append(", Diff: ");
-  buf.append(_print_format, max - min);
-  if (_print_sum) {
-    // for things like the start and end times the sum is not
-    // that relevant
-    buf.append(", Sum: ");
-    buf.append(_print_format, sum);
+  void reset() PRODUCT_RETURN;
+  void verify() PRODUCT_RETURN;
+
+  void set_enabled(bool enabled) { _enabled = enabled; }
+
+  int log_level() { return _log_level;  }
+
+ private:
+
+  void calculate_totals(){
+    if (!_has_new_data) {
+      return;
+    }
+
+    _sum = (T)0;
+    _min = _data[0];
+    _max = _min;
+    for (uint i = 0; i < _length; ++i) {
+      T val = _data[i];
+      _sum += val;
+      _min = MIN2(_min, val);
+      _max = MAX2(_max, val);
+    }
+    _average = (double)_sum / (double)_length;
+    _has_new_data = false;
   }
-  buf.append_and_print_cr("]");
-}
-PRAGMA_DIAG_POP
+};
+

 #ifndef PRODUCT

-template <> const int WorkerDataArray<int>::_uninitialized = -1;
-template <> const double WorkerDataArray<double>::_uninitialized = -1.0;
-template <> const size_t WorkerDataArray<size_t>::_uninitialized = (size_t)-1;
+template <>
+size_t WorkerDataArray<size_t>::uninitialized() {
+  return (size_t)-1;
+}
+
+template <>
+double WorkerDataArray<double>::uninitialized() {
+  return -1.0;
+}

 template <class T>
 void WorkerDataArray<T>::reset() {
   for (uint i = 0; i < _length; i++) {
-    _data[i] = (T)_uninitialized;
+    _data[i] = WorkerDataArray<T>::uninitialized();
+  }
+  if (_thread_work_items != NULL) {
+    _thread_work_items->reset();
   }
 }

 template <class T>
 void WorkerDataArray<T>::verify() {
+  if (!_enabled) {
+    return;
+  }
+
   for (uint i = 0; i < _length; i++) {
-    assert(_data[i] != _uninitialized,
-        err_msg("Invalid data for worker " UINT32_FORMAT ", data: %lf, uninitialized: %lf",
-            i, (double)_data[i], (double)_uninitialized));
+    assert(_data[i] != WorkerDataArray<T>::uninitialized(),
+        err_msg("Invalid data for worker %u in '%s'", i, _title));
+  }
+  if (_thread_work_items != NULL) {
+    _thread_work_items->verify();
   }
 }

 #endif

 G1GCPhaseTimes::G1GCPhaseTimes(uint max_gc_threads) :
-  _max_gc_threads(max_gc_threads),
-  _last_gc_worker_start_times_ms(_max_gc_threads, "%.1lf", false),
-  _last_ext_root_scan_times_ms(_max_gc_threads, "%.1lf"),
-  _last_satb_filtering_times_ms(_max_gc_threads, "%.1lf"),
-  _last_update_rs_times_ms(_max_gc_threads, "%.1lf"),
-  _last_update_rs_processed_buffers(_max_gc_threads, "%d"),
-  _last_scan_rs_times_ms(_max_gc_threads, "%.1lf"),
-  _last_strong_code_root_scan_times_ms(_max_gc_threads, "%.1lf"),
-  _last_obj_copy_times_ms(_max_gc_threads, "%.1lf"),
-  _last_termination_times_ms(_max_gc_threads, "%.1lf"),
-  _last_termination_attempts(_max_gc_threads, SIZE_FORMAT),
-  _last_gc_worker_end_times_ms(_max_gc_threads, "%.1lf", false),
-  _last_gc_worker_times_ms(_max_gc_threads, "%.1lf"),
-  _last_gc_worker_other_times_ms(_max_gc_threads, "%.1lf"),
-  _last_redirty_logged_cards_time_ms(_max_gc_threads, "%.1lf"),
-  _last_redirty_logged_cards_processed_cards(_max_gc_threads, SIZE_FORMAT),
-  _cur_string_dedup_queue_fixup_worker_times_ms(_max_gc_threads, "%.1lf"),
-  _cur_string_dedup_table_fixup_worker_times_ms(_max_gc_threads, "%.1lf")
+  _max_gc_threads(max_gc_threads)
 {
   assert(max_gc_threads > 0, "Must have some GC threads");
+
+  _gc_par_phases[GCWorkerStart] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Start (ms)", false, G1Log::LevelFiner, 2);
+  _gc_par_phases[ExtRootScan] = new WorkerDataArray<double>(max_gc_threads, "Ext Root Scanning (ms)", true, G1Log::LevelFiner, 2);
+
+  // Root scanning phases
+  _gc_par_phases[ThreadRoots] = new WorkerDataArray<double>(max_gc_threads, "Thread Roots (ms)", true, G1Log::LevelFinest, 3);
+  _gc_par_phases[StringTableRoots] = new WorkerDataArray<double>(max_gc_threads, "StringTable Roots (ms)", true, G1Log::LevelFinest, 3);
+  _gc_par_phases[UniverseRoots] = new WorkerDataArray<double>(max_gc_threads, "Universe Roots (ms)", true, G1Log::LevelFinest, 3);
+  _gc_par_phases[JNIRoots] = new WorkerDataArray<double>(max_gc_threads, "JNI Handles Roots (ms)", true, G1Log::LevelFinest, 3);
+  _gc_par_phases[ObjectSynchronizerRoots] = new WorkerDataArray<double>(max_gc_threads, "ObjectSynchronizer Roots (ms)", true, G1Log::LevelFinest, 3);
+  _gc_par_phases[FlatProfilerRoots] = new WorkerDataArray<double>(max_gc_threads, "FlatProfiler Roots (ms)", true, G1Log::LevelFinest, 3);
+  _gc_par_phases[ManagementRoots] = new WorkerDataArray<double>(max_gc_threads, "Management Roots (ms)", true, G1Log::LevelFinest, 3);
+  _gc_par_phases[SystemDictionaryRoots] = new WorkerDataArray<double>(max_gc_threads, "SystemDictionary Roots (ms)", true, G1Log::LevelFinest, 3);
+  _gc_par_phases[CLDGRoots] = new WorkerDataArray<double>(max_gc_threads, "CLDG Roots (ms)", true, G1Log::LevelFinest, 3);
+  _gc_par_phases[JVMTIRoots] = new WorkerDataArray<double>(max_gc_threads, "JVMTI Roots (ms)", true, G1Log::LevelFinest, 3);
+  _gc_par_phases[CodeCacheRoots] = new WorkerDataArray<double>(max_gc_threads, "CodeCache Roots (ms)", true, G1Log::LevelFinest, 3);
+  _gc_par_phases[CMRefRoots] = new WorkerDataArray<double>(max_gc_threads, "CM RefProcessor Roots (ms)", true, G1Log::LevelFinest, 3);
+  _gc_par_phases[WaitForStrongCLD] = new WorkerDataArray<double>(max_gc_threads, "Wait For Strong CLD (ms)", true, G1Log::LevelFinest, 3);
+  _gc_par_phases[WeakCLDRoots] = new WorkerDataArray<double>(max_gc_threads, "Weak CLD Roots (ms)", true, G1Log::LevelFinest, 3);
+  _gc_par_phases[SATBFiltering] = new WorkerDataArray<double>(max_gc_threads, "SATB Filtering (ms)", true, G1Log::LevelFinest, 3);
+
+  _gc_par_phases[UpdateRS] = new WorkerDataArray<double>(max_gc_threads, "Update RS (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[ScanRS] = new WorkerDataArray<double>(max_gc_threads, "Scan RS (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[CodeRoots] = new WorkerDataArray<double>(max_gc_threads, "Code Root Scanning (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[ObjCopy] = new WorkerDataArray<double>(max_gc_threads, "Object Copy (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[Termination] = new WorkerDataArray<double>(max_gc_threads, "Termination (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[GCWorkerTotal] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Total (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[GCWorkerEnd] = new WorkerDataArray<double>(max_gc_threads, "GC Worker End (ms)", false, G1Log::LevelFiner, 2);
+  _gc_par_phases[Other] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Other (ms)", true, G1Log::LevelFiner, 2);
+
+  _update_rs_processed_buffers = new WorkerDataArray<size_t>(max_gc_threads, "Processed Buffers", true, G1Log::LevelFiner, 3);
+  _gc_par_phases[UpdateRS]->link_thread_work_items(_update_rs_processed_buffers);
+
+  _termination_attempts = new WorkerDataArray<size_t>(max_gc_threads, "Termination Attempts", true, G1Log::LevelFinest, 3);
+  _gc_par_phases[Termination]->link_thread_work_items(_termination_attempts);
+
+  _gc_par_phases[StringDedupQueueFixup] = new WorkerDataArray<double>(max_gc_threads, "Queue Fixup (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[StringDedupTableFixup] = new WorkerDataArray<double>(max_gc_threads, "Table Fixup (ms)", true, G1Log::LevelFiner, 2);
+
+  _gc_par_phases[RedirtyCards] = new WorkerDataArray<double>(max_gc_threads, "Parallel Redirty", true, G1Log::LevelFinest, 3);
+  _redirtied_cards = new WorkerDataArray<size_t>(max_gc_threads, "Redirtied Cards", true, G1Log::LevelFinest, 3);
+  _gc_par_phases[RedirtyCards]->link_thread_work_items(_redirtied_cards);
 }

-void G1GCPhaseTimes::note_gc_start(uint active_gc_threads) {
+void G1GCPhaseTimes::note_gc_start(uint active_gc_threads, bool mark_in_progress) {
   assert(active_gc_threads > 0, "The number of threads must be > 0");
-  assert(active_gc_threads <= _max_gc_threads, "The number of active threads must be <= the max nubmer of threads");
+  assert(active_gc_threads <= _max_gc_threads, "The number of active threads must be <= the max number of threads");
   _active_gc_threads = active_gc_threads;

-  _last_gc_worker_start_times_ms.reset();
-  _last_ext_root_scan_times_ms.reset();
-  _last_satb_filtering_times_ms.reset();
-  _last_update_rs_times_ms.reset();
-  _last_update_rs_processed_buffers.reset();
-  _last_scan_rs_times_ms.reset();
-  _last_strong_code_root_scan_times_ms.reset();
-  _last_obj_copy_times_ms.reset();
-  _last_termination_times_ms.reset();
-  _last_termination_attempts.reset();
-  _last_gc_worker_end_times_ms.reset();
-  _last_gc_worker_times_ms.reset();
-  _last_gc_worker_other_times_ms.reset();
+  for (int i = 0; i < GCParPhasesSentinel; i++) {
+    _gc_par_phases[i]->reset();
+  }

-  _last_redirty_logged_cards_time_ms.reset();
-  _last_redirty_logged_cards_processed_cards.reset();
-
+  _gc_par_phases[StringDedupQueueFixup]->set_enabled(G1StringDedup::is_enabled());
+  _gc_par_phases[StringDedupTableFixup]->set_enabled(G1StringDedup::is_enabled());
 }

 void G1GCPhaseTimes::note_gc_end() {
-  _last_gc_worker_start_times_ms.verify();
-  _last_ext_root_scan_times_ms.verify();
-  _last_satb_filtering_times_ms.verify();
-  _last_update_rs_times_ms.verify();
-  _last_update_rs_processed_buffers.verify();
-  _last_scan_rs_times_ms.verify();
-  _last_strong_code_root_scan_times_ms.verify();
-  _last_obj_copy_times_ms.verify();
-  _last_termination_times_ms.verify();
-  _last_termination_attempts.verify();
-  _last_gc_worker_end_times_ms.verify();
+  for (uint i = 0; i < _active_gc_threads; i++) {
+    double worker_time = _gc_par_phases[GCWorkerEnd]->get(i) - _gc_par_phases[GCWorkerStart]->get(i);
+    record_time_secs(GCWorkerTotal, i , worker_time);

-  for (uint i = 0; i < _active_gc_threads; i++) {
-    double worker_time = _last_gc_worker_end_times_ms.get(i) - _last_gc_worker_start_times_ms.get(i);
-    _last_gc_worker_times_ms.set(i, worker_time);
+    double worker_known_time =
+        _gc_par_phases[ExtRootScan]->get(i) +
+        _gc_par_phases[SATBFiltering]->get(i) +
+        _gc_par_phases[UpdateRS]->get(i) +
+        _gc_par_phases[ScanRS]->get(i) +
+        _gc_par_phases[CodeRoots]->get(i) +
+        _gc_par_phases[ObjCopy]->get(i) +
+        _gc_par_phases[Termination]->get(i);

-    double worker_known_time = _last_ext_root_scan_times_ms.get(i) +
-                               _last_satb_filtering_times_ms.get(i) +
-                               _last_update_rs_times_ms.get(i) +
-                               _last_scan_rs_times_ms.get(i) +
-                               _last_strong_code_root_scan_times_ms.get(i) +
-                               _last_obj_copy_times_ms.get(i) +
-                               _last_termination_times_ms.get(i);
-
-    double worker_other_time = worker_time - worker_known_time;
-    _last_gc_worker_other_times_ms.set(i, worker_other_time);
+    record_time_secs(Other, i, worker_time - worker_known_time);
   }

-  _last_gc_worker_times_ms.verify();
-  _last_gc_worker_other_times_ms.verify();
-
-  _last_redirty_logged_cards_time_ms.verify();
-  _last_redirty_logged_cards_processed_cards.verify();
-}
-
-void G1GCPhaseTimes::note_string_dedup_fixup_start() {
-  _cur_string_dedup_queue_fixup_worker_times_ms.reset();
-  _cur_string_dedup_table_fixup_worker_times_ms.reset();
-}
-
-void G1GCPhaseTimes::note_string_dedup_fixup_end() {
-  _cur_string_dedup_queue_fixup_worker_times_ms.verify();
-  _cur_string_dedup_table_fixup_worker_times_ms.verify();
+  for (int i = 0; i < GCParPhasesSentinel; i++) {
+    _gc_par_phases[i]->verify();
+  }
 }

 void G1GCPhaseTimes::print_stats(int level, const char* str, double value) {
@@ -259,7 +334,7 @@
 }

 void G1GCPhaseTimes::print_stats(int level, const char* str, double value, uint workers) {
-  LineBuffer(level).append_and_print_cr("[%s: %.1lf ms, GC Workers: " UINT32_FORMAT "]", str, value, workers);
+  LineBuffer(level).append_and_print_cr("[%s: %.1lf ms, GC Workers: %u]", str, value, workers);
 }

 double G1GCPhaseTimes::accounted_time_ms() {
@@ -287,46 +362,172 @@
     return misc_time_ms;
 }

+// record the time a phase took in seconds
+void G1GCPhaseTimes::record_time_secs(GCParPhases phase, uint worker_i, double secs) {
+  _gc_par_phases[phase]->set(worker_i, secs);
+}
+
+// add a number of seconds to a phase
+void G1GCPhaseTimes::add_time_secs(GCParPhases phase, uint worker_i, double secs) {
+  _gc_par_phases[phase]->add(worker_i, secs);
+}
+
+void G1GCPhaseTimes::record_thread_work_item(GCParPhases phase, uint worker_i, size_t count) {
+  _gc_par_phases[phase]->set_thread_work_item(worker_i, count);
+}
+
+// return the average time for a phase in milliseconds
+double G1GCPhaseTimes::average_time_ms(GCParPhases phase) {
+  return _gc_par_phases[phase]->average() * 1000.0;
+}
+
+double G1GCPhaseTimes::get_time_ms(GCParPhases phase, uint worker_i) {
+  return _gc_par_phases[phase]->get(worker_i) * 1000.0;
+}
+
+double G1GCPhaseTimes::sum_time_ms(GCParPhases phase) {
+  return _gc_par_phases[phase]->sum() * 1000.0;
+}
+
+double G1GCPhaseTimes::min_time_ms(GCParPhases phase) {
+  return _gc_par_phases[phase]->minimum() * 1000.0;
+}
+
+double G1GCPhaseTimes::max_time_ms(GCParPhases phase) {
+  return _gc_par_phases[phase]->maximum() * 1000.0;
+}
+
+size_t G1GCPhaseTimes::get_thread_work_item(GCParPhases phase, uint worker_i) {
+  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
+  return _gc_par_phases[phase]->thread_work_items()->get(worker_i);
+}
+
+size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase) {
+  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
+  return _gc_par_phases[phase]->thread_work_items()->sum();
+}
+
+double G1GCPhaseTimes::average_thread_work_items(GCParPhases phase) {
+  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
+  return _gc_par_phases[phase]->thread_work_items()->average();
+}
+
+size_t G1GCPhaseTimes::min_thread_work_items(GCParPhases phase) {
+  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
+  return _gc_par_phases[phase]->thread_work_items()->minimum();
+}
+
+size_t G1GCPhaseTimes::max_thread_work_items(GCParPhases phase) {
+  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
+  return _gc_par_phases[phase]->thread_work_items()->maximum();
+}
+
+class G1GCParPhasePrinter : public StackObj {
+  G1GCPhaseTimes* _phase_times;
+ public:
+  G1GCParPhasePrinter(G1GCPhaseTimes* phase_times) : _phase_times(phase_times) {}
+
+  void print(G1GCPhaseTimes::GCParPhases phase_id) {
+    WorkerDataArray<double>* phase = _phase_times->_gc_par_phases[phase_id];
+
+    if (phase->_log_level > G1Log::level() || !phase->_enabled) {
+      return;
+    }
+
+    if (phase->_length == 1) {
+      print_single_length(phase_id, phase);
+    } else {
+      print_multi_length(phase_id, phase);
+    }
+  }
+
+ private:
+
+  void print_single_length(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) {
+    // No need for min, max, average and sum for only one worker
+    LineBuffer buf(phase->_indent_level);
+    buf.append_and_print_cr("[%s:  %.1lf]", phase->_title, _phase_times->get_time_ms(phase_id, 0));
+
+    if (phase->_thread_work_items != NULL) {
+      LineBuffer buf2(phase->_thread_work_items->_indent_level);
+      buf2.append_and_print_cr("[%s:  "SIZE_FORMAT"]", phase->_thread_work_items->_title, _phase_times->sum_thread_work_items(phase_id));
+    }
+  }
+
+  void print_time_values(LineBuffer& buf, G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) {
+    for (uint i = 0; i < phase->_length; ++i) {
+      buf.append("  %.1lf", _phase_times->get_time_ms(phase_id, i));
+    }
+    buf.print_cr();
+  }
+
+  void print_count_values(LineBuffer& buf, G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<size_t>* thread_work_items) {
+    for (uint i = 0; i < thread_work_items->_length; ++i) {
+      buf.append("  " SIZE_FORMAT, _phase_times->get_thread_work_item(phase_id, i));
+    }
+    buf.print_cr();
+  }
+
+  void print_thread_work_items(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<size_t>* thread_work_items) {
+    LineBuffer buf(thread_work_items->_indent_level);
+    buf.append("[%s:", thread_work_items->_title);
+
+    if (G1Log::finest()) {
+      print_count_values(buf, phase_id, thread_work_items);
+    }
+
+    assert(thread_work_items->_print_sum, err_msg("%s does not have print sum true even though it is a count", thread_work_items->_title));
+
+    buf.append_and_print_cr(" Min: " SIZE_FORMAT ", Avg: %.1lf, Max: " SIZE_FORMAT ", Diff: " SIZE_FORMAT ", Sum: " SIZE_FORMAT "]",
+        _phase_times->min_thread_work_items(phase_id), _phase_times->average_thread_work_items(phase_id), _phase_times->max_thread_work_items(phase_id),
+        _phase_times->max_thread_work_items(phase_id) - _phase_times->min_thread_work_items(phase_id), _phase_times->sum_thread_work_items(phase_id));
+  }
+
+  void print_multi_length(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) {
+    LineBuffer buf(phase->_indent_level);
+    buf.append("[%s:", phase->_title);
+
+    if (G1Log::finest()) {
+      print_time_values(buf, phase_id, phase);
+    }
+
+    buf.append(" Min: %.1lf, Avg: %.1lf, Max: %.1lf, Diff: %.1lf",
+        _phase_times->min_time_ms(phase_id), _phase_times->average_time_ms(phase_id), _phase_times->max_time_ms(phase_id),
+        _phase_times->max_time_ms(phase_id) - _phase_times->min_time_ms(phase_id));
+
+    if (phase->_print_sum) {
+      // for things like the start and end times the sum is not
+      // that relevant
+      buf.append(", Sum: %.1lf", _phase_times->sum_time_ms(phase_id));
+    }
+
+    buf.append_and_print_cr("]");
+
+    if (phase->_thread_work_items != NULL) {
+      print_thread_work_items(phase_id, phase->_thread_work_items);
+    }
+  }
+};
+
 void G1GCPhaseTimes::print(double pause_time_sec) {
+  G1GCParPhasePrinter par_phase_printer(this);
+
   if (_root_region_scan_wait_time_ms > 0.0) {
     print_stats(1, "Root Region Scan Waiting", _root_region_scan_wait_time_ms);
   }
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    print_stats(1, "Parallel Time", _cur_collection_par_time_ms, _active_gc_threads);
-    _last_gc_worker_start_times_ms.print(2, "GC Worker Start (ms)");
-    _last_ext_root_scan_times_ms.print(2, "Ext Root Scanning (ms)");
-    if (_last_satb_filtering_times_ms.sum() > 0.0) {
-      _last_satb_filtering_times_ms.print(2, "SATB Filtering (ms)");
-    }
-    _last_update_rs_times_ms.print(2, "Update RS (ms)");
-      _last_update_rs_processed_buffers.print(3, "Processed Buffers");
-    _last_scan_rs_times_ms.print(2, "Scan RS (ms)");
-    _last_strong_code_root_scan_times_ms.print(2, "Code Root Scanning (ms)");
-    _last_obj_copy_times_ms.print(2, "Object Copy (ms)");
-    _last_termination_times_ms.print(2, "Termination (ms)");
-    if (G1Log::finest()) {
-      _last_termination_attempts.print(3, "Termination Attempts");
-    }
-    _last_gc_worker_other_times_ms.print(2, "GC Worker Other (ms)");
-    _last_gc_worker_times_ms.print(2, "GC Worker Total (ms)");
-    _last_gc_worker_end_times_ms.print(2, "GC Worker End (ms)");
-  } else {
-    _last_ext_root_scan_times_ms.print(1, "Ext Root Scanning (ms)");
-    if (_last_satb_filtering_times_ms.sum() > 0.0) {
-      _last_satb_filtering_times_ms.print(1, "SATB Filtering (ms)");
-    }
-    _last_update_rs_times_ms.print(1, "Update RS (ms)");
-      _last_update_rs_processed_buffers.print(2, "Processed Buffers");
-    _last_scan_rs_times_ms.print(1, "Scan RS (ms)");
-    _last_strong_code_root_scan_times_ms.print(1, "Code Root Scanning (ms)");
-    _last_obj_copy_times_ms.print(1, "Object Copy (ms)");
+
+  print_stats(1, "Parallel Time", _cur_collection_par_time_ms, _active_gc_threads);
+  for (int i = 0; i <= GCMainParPhasesLast; i++) {
+    par_phase_printer.print((GCParPhases) i);
   }
+
   print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms);
   print_stats(1, "Code Root Purge", _cur_strong_code_root_purge_time_ms);
   if (G1StringDedup::is_enabled()) {
     print_stats(1, "String Dedup Fixup", _cur_string_dedup_fixup_time_ms, _active_gc_threads);
-    _cur_string_dedup_queue_fixup_worker_times_ms.print(2, "Queue Fixup (ms)");
-    _cur_string_dedup_table_fixup_worker_times_ms.print(2, "Table Fixup (ms)");
+    for (int i = StringDedupPhasesFirst; i <= StringDedupPhasesLast; i++) {
+      par_phase_printer.print((GCParPhases) i);
+    }
   }
   print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
   double misc_time_ms = pause_time_sec * MILLIUNITS - accounted_time_ms();
@@ -350,10 +551,7 @@
   print_stats(2, "Ref Proc", _cur_ref_proc_time_ms);
   print_stats(2, "Ref Enq", _cur_ref_enq_time_ms);
   print_stats(2, "Redirty Cards", _recorded_redirty_logged_cards_time_ms);
-  if (G1Log::finest()) {
-    _last_redirty_logged_cards_time_ms.print(3, "Parallel Redirty");
-    _last_redirty_logged_cards_processed_cards.print(3, "Redirtied Cards");
-  }
+  par_phase_printer.print(RedirtyCards);
   if (G1ReclaimDeadHumongousObjectsAtYoungGC) {
     print_stats(2, "Humongous Reclaim", _cur_fast_reclaim_humongous_time_ms);
     if (G1Log::finest()) {
@@ -373,3 +571,17 @@
     print_stats(2, "Verify After", _cur_verify_after_time_ms);
   }
 }
+
+G1GCParPhaseTimesTracker::G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id) :
+    _phase_times(phase_times), _phase(phase), _worker_id(worker_id) {
+  if (_phase_times != NULL) {
+    _start_time = os::elapsedTime();
+  }
+}
+
+G1GCParPhaseTimesTracker::~G1GCParPhaseTimesTracker() {
+  if (_phase_times != NULL) {
+    _phase_times->record_time_secs(_phase, _worker_id, os::elapsedTime() - _start_time);
+  }
+}
+
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -26,106 +26,60 @@
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP

 #include "memory/allocation.hpp"
-#include "gc_interface/gcCause.hpp"
-
-template <class T>
-class WorkerDataArray  : public CHeapObj<mtGC> {
-  T*          _data;
-  uint        _length;
-  const char* _print_format;
-  bool        _print_sum;
-
-  NOT_PRODUCT(static const T _uninitialized;)
-
-  // We are caching the sum and average to only have to calculate them once.
-  // This is not done in an MT-safe way. It is intended to allow single
-  // threaded code to call sum() and average() multiple times in any order
-  // without having to worry about the cost.
-  bool   _has_new_data;
-  T      _sum;
-  double _average;
-
- public:
-  WorkerDataArray(uint length, const char* print_format, bool print_sum = true) :
-  _length(length), _print_format(print_format), _print_sum(print_sum), _has_new_data(true) {
-    assert(length > 0, "Must have some workers to store data for");
-    _data = NEW_C_HEAP_ARRAY(T, _length, mtGC);
-  }
-
-  ~WorkerDataArray() {
-    FREE_C_HEAP_ARRAY(T, _data, mtGC);
-  }
-
-  void set(uint worker_i, T value) {
-    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
-    assert(_data[worker_i] == (T)-1, err_msg("Overwriting data for worker %d", worker_i));
-    _data[worker_i] = value;
-    _has_new_data = true;
-  }

-  T get(uint worker_i) {
-    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
-    assert(_data[worker_i] != (T)-1, err_msg("No data to add to for worker %d", worker_i));
-    return _data[worker_i];
-  }
-
-  void add(uint worker_i, T value) {
-    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
-    assert(_data[worker_i] != (T)-1, err_msg("No data to add to for worker %d", worker_i));
-    _data[worker_i] += value;
-    _has_new_data = true;
-  }
-
-  double average(){
-    if (_has_new_data) {
-      calculate_totals();
-    }
-    return _average;
-  }
+class LineBuffer;

-  T sum() {
-    if (_has_new_data) {
-      calculate_totals();
-    }
-    return _sum;
-  }
-
-  void print(int level, const char* title);
-
-  void reset() PRODUCT_RETURN;
-  void verify() PRODUCT_RETURN;
-
- private:
-
-  void calculate_totals(){
-    _sum = (T)0;
-    for (uint i = 0; i < _length; ++i) {
-      _sum += _data[i];
-    }
-    _average = (double)_sum / (double)_length;
-    _has_new_data = false;
-  }
-};
+template <class T> class WorkerDataArray;

 class G1GCPhaseTimes : public CHeapObj<mtGC> {
+  friend class G1GCParPhasePrinter;

- private:
   uint _active_gc_threads;
   uint _max_gc_threads;

-  WorkerDataArray<double> _last_gc_worker_start_times_ms;
-  WorkerDataArray<double> _last_ext_root_scan_times_ms;
-  WorkerDataArray<double> _last_satb_filtering_times_ms;
-  WorkerDataArray<double> _last_update_rs_times_ms;
-  WorkerDataArray<int>    _last_update_rs_processed_buffers;
-  WorkerDataArray<double> _last_scan_rs_times_ms;
-  WorkerDataArray<double> _last_strong_code_root_scan_times_ms;
-  WorkerDataArray<double> _last_obj_copy_times_ms;
-  WorkerDataArray<double> _last_termination_times_ms;
-  WorkerDataArray<size_t> _last_termination_attempts;
-  WorkerDataArray<double> _last_gc_worker_end_times_ms;
-  WorkerDataArray<double> _last_gc_worker_times_ms;
-  WorkerDataArray<double> _last_gc_worker_other_times_ms;
+ public:
+  enum GCParPhases {
+    GCWorkerStart,
+    ExtRootScan,
+    ThreadRoots,
+    StringTableRoots,
+    UniverseRoots,
+    JNIRoots,
+    ObjectSynchronizerRoots,
+    FlatProfilerRoots,
+    ManagementRoots,
+    SystemDictionaryRoots,
+    CLDGRoots,
+    JVMTIRoots,
+    CodeCacheRoots,
+    CMRefRoots,
+    WaitForStrongCLD,
+    WeakCLDRoots,
+    SATBFiltering,
+    UpdateRS,
+    ScanRS,
+    CodeRoots,
+    ObjCopy,
+    Termination,
+    Other,
+    GCWorkerTotal,
+    GCWorkerEnd,
+    StringDedupQueueFixup,
+    StringDedupTableFixup,
+    RedirtyCards,
+    GCParPhasesSentinel
+  };
+
+ private:
+  // Markers for grouping the phases in the GCPhases enum above
+  static const int GCMainParPhasesLast = GCWorkerEnd;
+  static const int StringDedupPhasesFirst = StringDedupQueueFixup;
+  static const int StringDedupPhasesLast = StringDedupTableFixup;
+
+  WorkerDataArray<double>* _gc_par_phases[GCParPhasesSentinel];
+  WorkerDataArray<size_t>* _update_rs_processed_buffers;
+  WorkerDataArray<size_t>* _termination_attempts;
+  WorkerDataArray<size_t>* _redirtied_cards;

   double _cur_collection_par_time_ms;
   double _cur_collection_code_root_fixup_time_ms;
@@ -135,9 +89,7 @@
   double _cur_evac_fail_restore_remsets;
   double _cur_evac_fail_remove_self_forwards;

-  double                  _cur_string_dedup_fixup_time_ms;
-  WorkerDataArray<double> _cur_string_dedup_queue_fixup_worker_times_ms;
-  WorkerDataArray<double> _cur_string_dedup_table_fixup_worker_times_ms;
+  double _cur_string_dedup_fixup_time_ms;

   double _cur_clear_ct_time_ms;
   double _cur_ref_proc_time_ms;
@@ -149,8 +101,6 @@
   double _recorded_young_cset_choice_time_ms;
   double _recorded_non_young_cset_choice_time_ms;

-  WorkerDataArray<double> _last_redirty_logged_cards_time_ms;
-  WorkerDataArray<size_t> _last_redirty_logged_cards_processed_cards;
   double _recorded_redirty_logged_cards_time_ms;

   double _recorded_young_free_cset_time_ms;
@@ -171,54 +121,34 @@

  public:
   G1GCPhaseTimes(uint max_gc_threads);
-  void note_gc_start(uint active_gc_threads);
+  void note_gc_start(uint active_gc_threads, bool mark_in_progress);
   void note_gc_end();
   void print(double pause_time_sec);

-  void record_gc_worker_start_time(uint worker_i, double ms) {
-    _last_gc_worker_start_times_ms.set(worker_i, ms);
-  }
-
-  void record_ext_root_scan_time(uint worker_i, double ms) {
-    _last_ext_root_scan_times_ms.set(worker_i, ms);
-  }
+  // record the time a phase took in seconds
+  void record_time_secs(GCParPhases phase, uint worker_i, double secs);

-  void record_satb_filtering_time(uint worker_i, double ms) {
-    _last_satb_filtering_times_ms.set(worker_i, ms);
-  }
+  // add a number of seconds to a phase
+  void add_time_secs(GCParPhases phase, uint worker_i, double secs);

-  void record_update_rs_time(uint worker_i, double ms) {
-    _last_update_rs_times_ms.set(worker_i, ms);
-  }
+  void record_thread_work_item(GCParPhases phase, uint worker_i, size_t count);

-  void record_update_rs_processed_buffers(uint worker_i, int processed_buffers) {
-    _last_update_rs_processed_buffers.set(worker_i, processed_buffers);
-  }
+  // return the average time for a phase in milliseconds
+  double average_time_ms(GCParPhases phase);

-  void record_scan_rs_time(uint worker_i, double ms) {
-    _last_scan_rs_times_ms.set(worker_i, ms);
-  }
-
-  void record_strong_code_root_scan_time(uint worker_i, double ms) {
-    _last_strong_code_root_scan_times_ms.set(worker_i, ms);
-  }
-
-  void record_obj_copy_time(uint worker_i, double ms) {
-    _last_obj_copy_times_ms.set(worker_i, ms);
-  }
+  size_t sum_thread_work_items(GCParPhases phase);

-  void add_obj_copy_time(uint worker_i, double ms) {
-    _last_obj_copy_times_ms.add(worker_i, ms);
-  }
+ private:
+  double get_time_ms(GCParPhases phase, uint worker_i);
+  double sum_time_ms(GCParPhases phase);
+  double min_time_ms(GCParPhases phase);
+  double max_time_ms(GCParPhases phase);
+  size_t get_thread_work_item(GCParPhases phase, uint worker_i);
+  double average_thread_work_items(GCParPhases phase);
+  size_t min_thread_work_items(GCParPhases phase);
+  size_t max_thread_work_items(GCParPhases phase);

-  void record_termination(uint worker_i, double ms, size_t attempts) {
-    _last_termination_times_ms.set(worker_i, ms);
-    _last_termination_attempts.set(worker_i, attempts);
-  }
-
-  void record_gc_worker_end_time(uint worker_i, double ms) {
-    _last_gc_worker_end_times_ms.set(worker_i, ms);
-  }
+ public:

   void record_clear_ct_time(double ms) {
     _cur_clear_ct_time_ms = ms;
@@ -248,21 +178,10 @@
     _cur_evac_fail_remove_self_forwards = ms;
   }

-  void note_string_dedup_fixup_start();
-  void note_string_dedup_fixup_end();
-
   void record_string_dedup_fixup_time(double ms) {
     _cur_string_dedup_fixup_time_ms = ms;
   }

-  void record_string_dedup_queue_fixup_worker_time(uint worker_id, double ms) {
-    _cur_string_dedup_queue_fixup_worker_times_ms.set(worker_id, ms);
-  }
-
-  void record_string_dedup_table_fixup_worker_time(uint worker_id, double ms) {
-    _cur_string_dedup_table_fixup_worker_times_ms.set(worker_id, ms);
-  }
-
   void record_ref_proc_time(double ms) {
     _cur_ref_proc_time_ms = ms;
   }
@@ -301,14 +220,6 @@
     _recorded_non_young_cset_choice_time_ms = time_ms;
   }

-  void record_redirty_logged_cards_time_ms(uint worker_i, double time_ms) {
-    _last_redirty_logged_cards_time_ms.set(worker_i, time_ms);
-  }
-
-  void record_redirty_logged_cards_processed_cards(uint worker_i, size_t processed_buffers) {
-    _last_redirty_logged_cards_processed_cards.set(worker_i, processed_buffers);
-  }
-
   void record_redirty_logged_cards_time_ms(double time_ms) {
     _recorded_redirty_logged_cards_time_ms = time_ms;
   }
@@ -362,38 +273,16 @@
   double fast_reclaim_humongous_time_ms() {
     return _cur_fast_reclaim_humongous_time_ms;
   }
-
-  double average_last_update_rs_time() {
-    return _last_update_rs_times_ms.average();
-  }
-
-  int sum_last_update_rs_processed_buffers() {
-    return _last_update_rs_processed_buffers.sum();
-  }
-
-  double average_last_scan_rs_time(){
-    return _last_scan_rs_times_ms.average();
-  }
+};

-  double average_last_strong_code_root_scan_time(){
-    return _last_strong_code_root_scan_times_ms.average();
-  }
-
-  double average_last_obj_copy_time() {
-    return _last_obj_copy_times_ms.average();
-  }
-
-  double average_last_termination_time() {
-    return _last_termination_times_ms.average();
-  }
-
-  double average_last_ext_root_scan_time() {
-    return _last_ext_root_scan_times_ms.average();
-  }
-
-  double average_last_satb_filtering_times_ms() {
-    return _last_satb_filtering_times_ms.average();
-  }
+class G1GCParPhaseTimesTracker : public StackObj {
+  double _start_time;
+  G1GCPhaseTimes::GCParPhases _phase;
+  G1GCPhaseTimes* _phase_times;
+  uint _worker_id;
+public:
+  G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id);
+  ~G1GCParPhaseTimesTracker();
 };

 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP
--- a/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,16 +36,13 @@
   if (default_use_cache()) {
     _use_cache = true;

-    _hot_cache_size = (1 << G1ConcRSLogCacheSize);
+    _hot_cache_size = (size_t)1 << G1ConcRSLogCacheSize;
     _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size, mtGC);

-    _n_hot = 0;
-    _hot_cache_idx = 0;
+    reset_hot_cache_internal();

     // For refining the cards in the hot cache in parallel
-    uint n_workers = (ParallelGCThreads > 0 ?
-                        _g1h->workers()->total_workers() : 1);
-    _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / (int)n_workers);
+    _hot_cache_par_chunk_size = (int)(ParallelGCThreads > 0 ? ClaimChunkSize : _hot_cache_size);
     _hot_cache_par_claimed_idx = 0;

     _card_counts.initialize(card_counts_storage);
@@ -66,26 +63,21 @@
     // return it for immediate refining.
     return card_ptr;
   }
-
   // Otherwise, the card is hot.
-  jbyte* res = NULL;
-  MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
-  if (_n_hot == _hot_cache_size) {
-    res = _hot_cache[_hot_cache_idx];
-    _n_hot--;
-  }
+  size_t index = Atomic::add_ptr((intptr_t)1, (volatile intptr_t*)&_hot_cache_idx) - 1;
+  size_t masked_index = index & (_hot_cache_size - 1);
+  jbyte* current_ptr = _hot_cache[masked_index];

-  // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
-  _hot_cache[_hot_cache_idx] = card_ptr;
-  _hot_cache_idx++;
-
-  if (_hot_cache_idx == _hot_cache_size) {
-    // Wrap around
-    _hot_cache_idx = 0;
-  }
-  _n_hot++;
-
-  return res;
+  // Try to store the new card pointer into the cache. Compare-and-swap to guard
+  // against the unlikely event of a race resulting in another card pointer to
+  // have already been written to the cache. In this case we will return
+  // card_ptr in favor of the other option, which would be starting over. This
+  // should be OK since card_ptr will likely be the older card already when/if
+  // this ever happens.
+  jbyte* previous_ptr = (jbyte*)Atomic::cmpxchg_ptr(card_ptr,
+                                                    &_hot_cache[masked_index],
+                                                    current_ptr);
+  return (previous_ptr == current_ptr) ? previous_ptr : card_ptr;
 }

 void G1HotCardCache::drain(uint worker_i,
@@ -98,38 +90,37 @@

   assert(_hot_cache != NULL, "Logic");
   assert(!use_cache(), "cache should be disabled");
-  int start_idx;
-
-  while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once
-    int end_idx = start_idx + _hot_cache_par_chunk_size;
+  while (_hot_cache_par_claimed_idx < _hot_cache_size) {
+    size_t end_idx = Atomic::add_ptr((intptr_t)_hot_cache_par_chunk_size,
+                                     (volatile intptr_t*)&_hot_cache_par_claimed_idx);
+    size_t start_idx = end_idx - _hot_cache_par_chunk_size;
+    // The current worker has successfully claimed the chunk [start_idx..end_idx)
+    end_idx = MIN2(end_idx, _hot_cache_size);
+    for (size_t i = start_idx; i < end_idx; i++) {
+      jbyte* card_ptr = _hot_cache[i];
+      if (card_ptr != NULL) {
+        if (g1rs->refine_card(card_ptr, worker_i, true)) {
+          // The part of the heap spanned by the card contains references
+          // that point into the current collection set.
+          // We need to record the card pointer in the DirtyCardQueueSet
+          // that we use for such cards.
+          //
+          // The only time we care about recording cards that contain
+          // references that point into the collection set is during
+          // RSet updating while within an evacuation pause.
+          // In this case worker_i should be the id of a GC worker thread
+          assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
+          assert(worker_i < ParallelGCThreads,
+                 err_msg("incorrect worker id: %u", worker_i));

-    if (start_idx ==
-        Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) {
-      // The current worker has successfully claimed the chunk [start_idx..end_idx)
-      end_idx = MIN2(end_idx, _n_hot);
-      for (int i = start_idx; i < end_idx; i++) {
-        jbyte* card_ptr = _hot_cache[i];
-        if (card_ptr != NULL) {
-          if (g1rs->refine_card(card_ptr, worker_i, true)) {
-            // The part of the heap spanned by the card contains references
-            // that point into the current collection set.
-            // We need to record the card pointer in the DirtyCardQueueSet
-            // that we use for such cards.
-            //
-            // The only time we care about recording cards that contain
-            // references that point into the collection set is during
-            // RSet updating while within an evacuation pause.
-            // In this case worker_i should be the id of a GC worker thread
-            assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
-            assert(worker_i < (ParallelGCThreads == 0 ? 1 : ParallelGCThreads),
-                   err_msg("incorrect worker id: "UINT32_FORMAT, worker_i));
-
-            into_cset_dcq->enqueue(card_ptr);
-          }
+          into_cset_dcq->enqueue(card_ptr);
         }
+      } else {
+        break;
       }
     }
   }
+
   // The existing entries in the hot card cache, which were just refined
   // above, are discarded prior to re-enabling the cache near the end of the GC.
 }
--- a/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -54,21 +54,33 @@
 // code, increasing throughput.

 class G1HotCardCache: public CHeapObj<mtGC> {
-  G1CollectedHeap*   _g1h;
+
+  G1CollectedHeap*  _g1h;
+
+  bool              _use_cache;
+
+  G1CardCounts      _card_counts;

   // The card cache table
-  jbyte**      _hot_cache;
+  jbyte**           _hot_cache;

-  int          _hot_cache_size;
-  int          _n_hot;
-  int          _hot_cache_idx;
+  size_t            _hot_cache_size;
+
+  int               _hot_cache_par_chunk_size;

-  int          _hot_cache_par_chunk_size;
-  volatile int _hot_cache_par_claimed_idx;
+  // Avoids false sharing when concurrently updating _hot_cache_idx or
+  // _hot_cache_par_claimed_idx. These are never updated at the same time
+  // thus it's not necessary to separate them as well
+  char _pad_before[DEFAULT_CACHE_LINE_SIZE];
+
+  volatile size_t _hot_cache_idx;

-  bool         _use_cache;
+  volatile size_t _hot_cache_par_claimed_idx;

-  G1CardCounts _card_counts;
+  char _pad_after[DEFAULT_CACHE_LINE_SIZE];
+
+  // The number of cached cards a thread claims when flushing the cache
+  static const int ClaimChunkSize = 32;

   bool default_use_cache() const {
     return (G1ConcRSLogCacheSize > 0);
@@ -110,16 +122,25 @@
   void reset_hot_cache() {
     assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
     assert(Thread::current()->is_VM_thread(), "Current thread should be the VMthread");
-    _hot_cache_idx = 0; _n_hot = 0;
+    if (default_use_cache()) {
+        reset_hot_cache_internal();
+    }
   }

-  bool hot_cache_is_empty() { return _n_hot == 0; }
-
   // Zeros the values in the card counts table for entire committed heap
   void reset_card_counts();

   // Zeros the values in the card counts table for the given region
   void reset_card_counts(HeapRegion* hr);
+
+ private:
+  void reset_hot_cache_internal() {
+    assert(_hot_cache != NULL, "Logic");
+    _hot_cache_idx = 0;
+    for (size_t i = 0; i < _hot_cache_size; i++) {
+      _hot_cache[i] = NULL;
+    }
+  }
 };

 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1HOTCARDCACHE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1InCSetState.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1INCSETSTATE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1INCSETSTATE_HPP
+
+#include "gc_implementation/g1/g1BiasedArray.hpp"
+#include "memory/allocation.hpp"
+
+// Per-region state during garbage collection.
+struct InCSetState {
+ public:
+  // We use different types to represent the state value. Particularly SPARC puts
+  // values in structs from "left to right", i.e. MSB to LSB. This results in many
+  // unnecessary shift operations when loading and storing values of this type.
+  // This degrades performance significantly (>10%) on that platform.
+  // Other tested ABIs do not seem to have this problem, and actually tend to
+  // favor smaller types, so we use the smallest usable type there.
+#ifdef SPARC
+  #define CSETSTATE_FORMAT INTPTR_FORMAT
+  typedef intptr_t in_cset_state_t;
+#else
+  #define CSETSTATE_FORMAT "%d"
+  typedef int8_t in_cset_state_t;
+#endif
+ private:
+  in_cset_state_t _value;
+ public:
+  enum {
+    // Selection of the values were driven to micro-optimize the encoding and
+    // frequency of the checks.
+    // The most common check is whether the region is in the collection set or not.
+    // This encoding allows us to use an != 0 check which in some architectures
+    // (x86*) can be encoded slightly more efficently than a normal comparison
+    // against zero.
+    // The same situation occurs when checking whether the region is humongous
+    // or not, which is encoded by values < 0.
+    // The other values are simply encoded in increasing generation order, which
+    // makes getting the next generation fast by a simple increment.
+    Humongous    = -1,    // The region is humongous - note that actually any value < 0 would be possible here.
+    NotInCSet    =  0,    // The region is not in the collection set.
+    Young        =  1,    // The region is in the collection set and a young region.
+    Old          =  2,    // The region is in the collection set and an old region.
+    Num
+  };
+
+  InCSetState(in_cset_state_t value = NotInCSet) : _value(value) {
+    assert(is_valid(), err_msg("Invalid state %d", _value));
+  }
+
+  in_cset_state_t value() const        { return _value; }
+
+  void set_old()                       { _value = Old; }
+
+  bool is_in_cset_or_humongous() const { return _value != NotInCSet; }
+  bool is_in_cset() const              { return _value > NotInCSet; }
+  bool is_humongous() const            { return _value < NotInCSet; }
+  bool is_young() const                { return _value == Young; }
+  bool is_old() const                  { return _value == Old; }
+
+#ifdef ASSERT
+  bool is_default() const              { return !is_in_cset_or_humongous(); }
+  bool is_valid() const                { return (_value >= Humongous) && (_value < Num); }
+  bool is_valid_gen() const            { return (_value >= Young && _value <= Old); }
+#endif
+};
+
+// Instances of this class are used for quick tests on whether a reference points
+// into the collection set and into which generation or is a humongous object
+//
+// Each of the array's elements indicates whether the corresponding region is in
+// the collection set and if so in which generation, or a humongous region.
+//
+// We use this to speed up reference processing during young collection and
+// quickly reclaim humongous objects. For the latter, by making a humongous region
+// succeed this test, we sort-of add it to the collection set. During the reference
+// iteration closures, when we see a humongous region, we then simply mark it as
+// referenced, i.e. live.
+class G1InCSetStateFastTestBiasedMappedArray : public G1BiasedMappedArray<InCSetState> {
+ protected:
+  InCSetState default_value() const { return InCSetState::NotInCSet; }
+ public:
+  void set_humongous(uintptr_t index) {
+    assert(get_by_index(index).is_default(),
+           err_msg("State at index " INTPTR_FORMAT" should be default but is " CSETSTATE_FORMAT, index, get_by_index(index).value()));
+    set_by_index(index, InCSetState::Humongous);
+  }
+
+  void clear_humongous(uintptr_t index) {
+    set_by_index(index, InCSetState::NotInCSet);
+  }
+
+  void set_in_young(uintptr_t index) {
+    assert(get_by_index(index).is_default(),
+           err_msg("State at index " INTPTR_FORMAT" should be default but is " CSETSTATE_FORMAT, index, get_by_index(index).value()));
+    set_by_index(index, InCSetState::Young);
+  }
+
+  void set_in_old(uintptr_t index) {
+    assert(get_by_index(index).is_default(),
+           err_msg("State at index " INTPTR_FORMAT" should be default but is " CSETSTATE_FORMAT, index, get_by_index(index).value()));
+    set_by_index(index, InCSetState::Old);
+  }
+
+  bool is_in_cset_or_humongous(HeapWord* addr) const { return at(addr).is_in_cset_or_humongous(); }
+  bool is_in_cset(HeapWord* addr) const { return at(addr).is_in_cset(); }
+  InCSetState at(HeapWord* addr) const { return get_by_address(addr); }
+  void clear() { G1BiasedMappedArray<InCSetState>::clear(); }
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1INCSETSTATE_HPP
--- a/src/share/vm/gc_implementation/g1/g1Log.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1Log.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -28,6 +28,7 @@
 #include "memory/allocation.hpp"

 class G1Log : public AllStatic {
+ public:
   typedef enum {
     LevelNone,
     LevelFine,
@@ -35,6 +36,7 @@
     LevelFinest
   } LogLevel;

+ private:
   static LogLevel _level;

  public:
@@ -50,6 +52,10 @@
     return _level == LevelFinest;
   }

+  static LogLevel level() {
+    return _level;
+  }
+
   static void init();
 };
--- a/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -31,6 +31,7 @@
 #include "code/icBuffer.hpp"
 #include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/g1MarkSweep.hpp"
+#include "gc_implementation/g1/g1RootProcessor.hpp"
 #include "gc_implementation/g1/g1StringDedup.hpp"
 #include "gc_implementation/shared/gcHeapSummary.hpp"
 #include "gc_implementation/shared/gcTimer.hpp"
@@ -126,21 +127,22 @@
   GCTraceTime tm("phase 1", G1Log::fine() && Verbose, true, gc_timer(), gc_tracer()->gc_id());
   GenMarkSweep::trace(" 1");

-  SharedHeap* sh = SharedHeap::heap();
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();

   // Need cleared claim bits for the roots processing
   ClassLoaderDataGraph::clear_claimed_marks();

   MarkingCodeBlobClosure follow_code_closure(&GenMarkSweep::follow_root_closure, !CodeBlobToOopClosure::FixRelocations);
-  sh->process_strong_roots(true,   // activate StrongRootsScope
-                           SharedHeap::SO_None,
-                           &GenMarkSweep::follow_root_closure,
-                           &GenMarkSweep::follow_cld_closure,
-                           &follow_code_closure);
+  {
+    G1RootProcessor root_processor(g1h);
+    root_processor.process_strong_roots(&GenMarkSweep::follow_root_closure,
+                                        &GenMarkSweep::follow_cld_closure,
+                                        &follow_code_closure);
+  }

   // Process reference objects found during marking
   ReferenceProcessor* rp = GenMarkSweep::ref_processor();
-  assert(rp == G1CollectedHeap::heap()->ref_processor_stw(), "Sanity");
+  assert(rp == g1h->ref_processor_stw(), "Sanity");

   rp->setup_policy(clear_all_softrefs);
   const ReferenceProcessorStats& stats =
@@ -226,6 +228,12 @@
   }
 };

+class G1AlwaysTrueClosure: public BoolObjectClosure {
+public:
+  bool do_object_b(oop p) { return true; }
+};
+static G1AlwaysTrueClosure always_true;
+
 void G1MarkSweep::mark_sweep_phase3() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();

@@ -233,24 +241,23 @@
   GCTraceTime tm("phase 3", G1Log::fine() && Verbose, true, gc_timer(), gc_tracer()->gc_id());
   GenMarkSweep::trace("3");

-  SharedHeap* sh = SharedHeap::heap();
-
   // Need cleared claim bits for the roots processing
   ClassLoaderDataGraph::clear_claimed_marks();

   CodeBlobToOopClosure adjust_code_closure(&GenMarkSweep::adjust_pointer_closure, CodeBlobToOopClosure::FixRelocations);
-  sh->process_all_roots(true,  // activate StrongRootsScope
-                        SharedHeap::SO_AllCodeCache,
-                        &GenMarkSweep::adjust_pointer_closure,
-                        &GenMarkSweep::adjust_cld_closure,
-                        &adjust_code_closure);
+  {
+    G1RootProcessor root_processor(g1h);
+    root_processor.process_all_roots(&GenMarkSweep::adjust_pointer_closure,
+                                     &GenMarkSweep::adjust_cld_closure,
+                                     &adjust_code_closure);
+  }

   assert(GenMarkSweep::ref_processor() == g1h->ref_processor_stw(), "Sanity");
   g1h->ref_processor_stw()->weak_oops_do(&GenMarkSweep::adjust_pointer_closure);

   // Now adjust pointers in remaining weak roots.  (All of which should
   // have been cleared if they pointed to non-surviving objects.)
-  sh->process_weak_roots(&GenMarkSweep::adjust_pointer_closure);
+  JNIHandles::weak_oops_do(&always_true, &GenMarkSweep::adjust_pointer_closure);

   if (G1StringDedup::is_enabled()) {
     G1StringDedup::oops_do(&GenMarkSweep::adjust_pointer_closure);
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_HPP

 #include "memory/iterator.hpp"
+#include "oops/markOop.hpp"

 class HeapRegion;
 class G1CollectedHeap;
@@ -239,14 +240,14 @@
   G1CollectedHeap* _g1;
   G1RemSet* _g1_rem_set;
   HeapRegion* _from;
-  OopsInHeapRegionClosure* _push_ref_cl;
+  G1ParPushHeapRSClosure* _push_ref_cl;
   bool _record_refs_into_cset;
   uint _worker_i;

 public:
   G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
                                 G1RemSet* rs,
-                                OopsInHeapRegionClosure* push_ref_cl,
+                                G1ParPushHeapRSClosure* push_ref_cl,
                                 bool record_refs_into_cset,
                                 uint worker_i = 0);

@@ -256,7 +257,8 @@
   }

   bool self_forwarded(oop obj) {
-    bool result = (obj->is_forwarded() && (obj->forwardee()== obj));
+    markOop m = obj->mark();
+    bool result = (m->is_marked() && ((oop)m->decode_pointer() == obj));
     return result;
   }
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -67,8 +67,8 @@

   if (!oopDesc::is_null(heap_oop)) {
     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    G1CollectedHeap::in_cset_state_t state = _g1->in_cset_state(obj);
-    if (state == G1CollectedHeap::InCSet) {
+    const InCSetState state = _g1->in_cset_state(obj);
+    if (state.is_in_cset()) {
       // We're not going to even bother checking whether the object is
       // already forwarded or not, as this usually causes an immediate
       // stall. We'll try to prefetch the object (for write, given that
@@ -87,7 +87,7 @@

       _par_scan_state->push_on_queue(p);
     } else {
-      if (state == G1CollectedHeap::IsHumongous) {
+      if (state.is_humongous()) {
         _g1->set_humongous_is_live(obj);
       }
       _par_scan_state->update_rs(_from, p, _worker_id);
--- a/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -38,6 +38,7 @@
     _g1_rem(g1h->g1_rem_set()),
     _hash_seed(17), _queue_num(queue_num),
     _term_attempts(0),
+    _tenuring_threshold(g1h->g1_policy()->tenuring_threshold()),
     _age_table(false), _scanner(g1h, rp),
     _strong_roots_time(0), _term_time(0) {
   _scanner.set_par_scan_thread_state(this);
@@ -59,6 +60,12 @@

   _g1_par_allocator = G1ParGCAllocator::create_allocator(_g1h);

+  _dest[InCSetState::NotInCSet]    = InCSetState::NotInCSet;
+  // The dest for Young is used when the objects are aged enough to
+  // need to be moved to the next space.
+  _dest[InCSetState::Young]        = InCSetState::Old;
+  _dest[InCSetState::Old]          = InCSetState::Old;
+
   _start = os::elapsedTime();
 }

@@ -150,86 +157,126 @@
   } while (!_refs->is_empty());
 }

-oop G1ParScanThreadState::copy_to_survivor_space(oop const old) {
-  size_t word_sz = old->size();
-  HeapRegion* from_region = _g1h->heap_region_containing_raw(old);
+HeapWord* G1ParScanThreadState::allocate_in_next_plab(InCSetState const state,
+                                                      InCSetState* dest,
+                                                      size_t word_sz,
+                                                      AllocationContext_t const context) {
+  assert(state.is_in_cset_or_humongous(), err_msg("Unexpected state: " CSETSTATE_FORMAT, state.value()));
+  assert(dest->is_in_cset_or_humongous(), err_msg("Unexpected dest: " CSETSTATE_FORMAT, dest->value()));
+
+  // Right now we only have two types of regions (young / old) so
+  // let's keep the logic here simple. We can generalize it when necessary.
+  if (dest->is_young()) {
+    HeapWord* const obj_ptr = _g1_par_allocator->allocate(InCSetState::Old,
+                                                          word_sz, context);
+    if (obj_ptr == NULL) {
+      return NULL;
+    }
+    // Make sure that we won't attempt to copy any other objects out
+    // of a survivor region (given that apparently we cannot allocate
+    // any new ones) to avoid coming into this slow path.
+    _tenuring_threshold = 0;
+    dest->set_old();
+    return obj_ptr;
+  } else {
+    assert(dest->is_old(), err_msg("Unexpected dest: " CSETSTATE_FORMAT, dest->value()));
+    // no other space to try.
+    return NULL;
+  }
+}
+
+InCSetState G1ParScanThreadState::next_state(InCSetState const state, markOop const m, uint& age) {
+  if (state.is_young()) {
+    age = !m->has_displaced_mark_helper() ? m->age()
+                                          : m->displaced_mark_helper()->age();
+    if (age < _tenuring_threshold) {
+      return state;
+    }
+  }
+  return dest(state);
+}
+
+oop G1ParScanThreadState::copy_to_survivor_space(InCSetState const state,
+                                                 oop const old,
+                                                 markOop const old_mark) {
+  const size_t word_sz = old->size();
+  HeapRegion* const from_region = _g1h->heap_region_containing_raw(old);
   // +1 to make the -1 indexes valid...
-  int       young_index = from_region->young_index_in_cset()+1;
+  const int young_index = from_region->young_index_in_cset()+1;
   assert( (from_region->is_young() && young_index >  0) ||
          (!from_region->is_young() && young_index == 0), "invariant" );
-  G1CollectorPolicy* g1p = _g1h->g1_policy();
-  markOop m = old->mark();
-  int age = m->has_displaced_mark_helper() ? m->displaced_mark_helper()->age()
-                                           : m->age();
-  GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, age,
-                                                             word_sz);
-  AllocationContext_t context = from_region->allocation_context();
-  HeapWord* obj_ptr = _g1_par_allocator->allocate(alloc_purpose, word_sz, context);
+  const AllocationContext_t context = from_region->allocation_context();
+
+  uint age = 0;
+  InCSetState dest_state = next_state(state, old_mark, age);
+  HeapWord* obj_ptr = _g1_par_allocator->plab_allocate(dest_state, word_sz, context);
+
+  // PLAB allocations should succeed most of the time, so we'll
+  // normally check against NULL once and that's it.
+  if (obj_ptr == NULL) {
+    obj_ptr = _g1_par_allocator->allocate_direct_or_new_plab(dest_state, word_sz, context);
+    if (obj_ptr == NULL) {
+      obj_ptr = allocate_in_next_plab(state, &dest_state, word_sz, context);
+      if (obj_ptr == NULL) {
+        // This will either forward-to-self, or detect that someone else has
+        // installed a forwarding pointer.
+        return _g1h->handle_evacuation_failure_par(this, old);
+      }
+    }
+  }
+
+  assert(obj_ptr != NULL, "when we get here, allocation should have succeeded");
 #ifndef PRODUCT
   // Should this evacuation fail?
   if (_g1h->evacuation_should_fail()) {
-    if (obj_ptr != NULL) {
-      _g1_par_allocator->undo_allocation(alloc_purpose, obj_ptr, word_sz, context);
-      obj_ptr = NULL;
-    }
+    // Doing this after all the allocation attempts also tests the
+    // undo_allocation() method too.
+    _g1_par_allocator->undo_allocation(dest_state, obj_ptr, word_sz, context);
+    return _g1h->handle_evacuation_failure_par(this, old);
   }
 #endif // !PRODUCT

-  if (obj_ptr == NULL) {
-    // This will either forward-to-self, or detect that someone else has
-    // installed a forwarding pointer.
-    return _g1h->handle_evacuation_failure_par(this, old);
-  }
-
-  oop obj = oop(obj_ptr);
-
   // We're going to allocate linearly, so might as well prefetch ahead.
   Prefetch::write(obj_ptr, PrefetchCopyIntervalInBytes);

-  oop forward_ptr = old->forward_to_atomic(obj);
+  const oop obj = oop(obj_ptr);
+  const oop forward_ptr = old->forward_to_atomic(obj);
   if (forward_ptr == NULL) {
     Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz);

-    // alloc_purpose is just a hint to allocate() above, recheck the type of region
-    // we actually allocated from and update alloc_purpose accordingly
-    HeapRegion* to_region = _g1h->heap_region_containing_raw(obj_ptr);
-    alloc_purpose = to_region->is_young() ? GCAllocForSurvived : GCAllocForTenured;
-
-    if (g1p->track_object_age(alloc_purpose)) {
-      // We could simply do obj->incr_age(). However, this causes a
-      // performance issue. obj->incr_age() will first check whether
-      // the object has a displaced mark by checking its mark word;
-      // getting the mark word from the new location of the object
-      // stalls. So, given that we already have the mark word and we
-      // are about to install it anyway, it's better to increase the
-      // age on the mark word, when the object does not have a
-      // displaced mark word. We're not expecting many objects to have
-      // a displaced marked word, so that case is not optimized
-      // further (it could be...) and we simply call obj->incr_age().
-
-      if (m->has_displaced_mark_helper()) {
-        // in this case, we have to install the mark word first,
+    if (dest_state.is_young()) {
+      if (age < markOopDesc::max_age) {
+        age++;
+      }
+      if (old_mark->has_displaced_mark_helper()) {
+        // In this case, we have to install the mark word first,
         // otherwise obj looks to be forwarded (the old mark word,
         // which contains the forward pointer, was copied)
-        obj->set_mark(m);
-        obj->incr_age();
+        obj->set_mark(old_mark);
+        markOop new_mark = old_mark->displaced_mark_helper()->set_age(age);
+        old_mark->set_displaced_mark_helper(new_mark);
       } else {
-        m = m->incr_age();
-        obj->set_mark(m);
+        obj->set_mark(old_mark->set_age(age));
       }
-      age_table()->add(obj, word_sz);
+      age_table()->add(age, word_sz);
     } else {
-      obj->set_mark(m);
+      obj->set_mark(old_mark);
     }

     if (G1StringDedup::is_enabled()) {
-      G1StringDedup::enqueue_from_evacuation(from_region->is_young(),
-                                             to_region->is_young(),
+      const bool is_from_young = state.is_young();
+      const bool is_to_young = dest_state.is_young();
+      assert(is_from_young == _g1h->heap_region_containing_raw(old)->is_young(),
+             "sanity");
+      assert(is_to_young == _g1h->heap_region_containing_raw(obj)->is_young(),
+             "sanity");
+      G1StringDedup::enqueue_from_evacuation(is_from_young,
+                                             is_to_young,
                                              queue_num(),
                                              obj);
     }

-    size_t* surv_young_words = surviving_young_words();
+    size_t* const surv_young_words = surviving_young_words();
     surv_young_words[young_index] += word_sz;

     if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
@@ -240,14 +287,13 @@
       oop* old_p = set_partial_array_mask(old);
       push_on_queue(old_p);
     } else {
-      // No point in using the slower heap_region_containing() method,
-      // given that we know obj is in the heap.
-      _scanner.set_region(_g1h->heap_region_containing_raw(obj));
+      HeapRegion* const to_region = _g1h->heap_region_containing_raw(obj_ptr);
+      _scanner.set_region(to_region);
       obj->oop_iterate_backwards(&_scanner);
     }
+    return obj;
   } else {
-    _g1_par_allocator->undo_allocation(alloc_purpose, obj_ptr, word_sz, context);
-    obj = forward_ptr;
+    _g1_par_allocator->undo_allocation(dest_state, obj_ptr, word_sz, context);
+    return forward_ptr;
   }
-  return obj;
 }
--- a/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -46,14 +46,16 @@
   G1SATBCardTableModRefBS* _ct_bs;
   G1RemSet* _g1_rem;

-  G1ParGCAllocator*   _g1_par_allocator;
-
-  ageTable            _age_table;
+  G1ParGCAllocator* _g1_par_allocator;

-  G1ParScanClosure    _scanner;
+  ageTable          _age_table;
+  InCSetState       _dest[InCSetState::Num];
+  // Local tenuring threshold.
+  uint              _tenuring_threshold;
+  G1ParScanClosure  _scanner;

-  size_t           _alloc_buffer_waste;
-  size_t           _undo_waste;
+  size_t            _alloc_buffer_waste;
+  size_t            _undo_waste;

   OopsInHeapRegionClosure*      _evac_failure_cl;

@@ -82,6 +84,14 @@
   DirtyCardQueue& dirty_card_queue()             { return _dcq;  }
   G1SATBCardTableModRefBS* ctbs()                { return _ct_bs; }

+  InCSetState dest(InCSetState original) const {
+    assert(original.is_valid(),
+           err_msg("Original state invalid: " CSETSTATE_FORMAT, original.value()));
+    assert(_dest[original.value()].is_valid_gen(),
+           err_msg("Dest state is invalid: " CSETSTATE_FORMAT, _dest[original.value()].value()));
+    return _dest[original.value()];
+  }
+
  public:
   G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num, ReferenceProcessor* rp);
   ~G1ParScanThreadState();
@@ -112,7 +122,6 @@
       }
    }
   }
- public:

   void set_evac_failure_closure(OopsInHeapRegionClosure* evac_failure_cl) {
     _evac_failure_cl = evac_failure_cl;
@@ -193,9 +202,20 @@
   template <class T> inline void deal_with_reference(T* ref_to_scan);

   inline void dispatch_reference(StarTask ref);
+
+  // Tries to allocate word_sz in the PLAB of the next "generation" after trying to
+  // allocate into dest. State is the original (source) cset state for the object
+  // that is allocated for.
+  // Returns a non-NULL pointer if successful, and updates dest if required.
+  HeapWord* allocate_in_next_plab(InCSetState const state,
+                                  InCSetState* dest,
+                                  size_t word_sz,
+                                  AllocationContext_t const context);
+
+  inline InCSetState next_state(InCSetState const state, markOop const m, uint& age);
  public:

-  oop copy_to_survivor_space(oop const obj);
+  oop copy_to_survivor_space(InCSetState const state, oop const obj, markOop const old_mark);

   void trim_queue();
--- a/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -38,20 +38,21 @@
   // set, due to (benign) races in the claim mechanism during RSet scanning more
   // than one thread might claim the same card. So the same card may be
   // processed multiple times. So redo this check.
-  G1CollectedHeap::in_cset_state_t in_cset_state = _g1h->in_cset_state(obj);
-  if (in_cset_state == G1CollectedHeap::InCSet) {
+  const InCSetState in_cset_state = _g1h->in_cset_state(obj);
+  if (in_cset_state.is_in_cset()) {
     oop forwardee;
-    if (obj->is_forwarded()) {
-      forwardee = obj->forwardee();
+    markOop m = obj->mark();
+    if (m->is_marked()) {
+      forwardee = (oop) m->decode_pointer();
     } else {
-      forwardee = copy_to_survivor_space(obj);
+      forwardee = copy_to_survivor_space(in_cset_state, obj, m);
     }
     oopDesc::encode_store_heap_oop(p, forwardee);
-  } else if (in_cset_state == G1CollectedHeap::IsHumongous) {
+  } else if (in_cset_state.is_humongous()) {
     _g1h->set_humongous_is_live(obj);
   } else {
-    assert(in_cset_state == G1CollectedHeap::InNeither,
-           err_msg("In_cset_state must be InNeither here, but is %d", in_cset_state));
+    assert(!in_cset_state.is_in_cset_or_humongous(),
+           err_msg("In_cset_state must be NotInCSet here, but is " CSETSTATE_FORMAT, in_cset_state.value()));
   }

   assert(obj != NULL, "Must be");
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -78,9 +78,8 @@
     _cards_scanned(NULL), _total_cards_scanned(0),
     _prev_period_summary()
 {
-  _seq_task = new SubTasksDone(NumSeqTasks);
   guarantee(n_workers() > 0, "There should be some workers");
-  _cset_rs_update_cl = NEW_C_HEAP_ARRAY(OopsInHeapRegionClosure*, n_workers(), mtGC);
+  _cset_rs_update_cl = NEW_C_HEAP_ARRAY(G1ParPushHeapRSClosure*, n_workers(), mtGC);
   for (uint i = 0; i < n_workers(); i++) {
     _cset_rs_update_cl[i] = NULL;
   }
@@ -90,11 +89,10 @@
 }

 G1RemSet::~G1RemSet() {
-  delete _seq_task;
   for (uint i = 0; i < n_workers(); i++) {
     assert(_cset_rs_update_cl[i] == NULL, "it should be");
   }
-  FREE_C_HEAP_ARRAY(OopsInHeapRegionClosure*, _cset_rs_update_cl, mtGC);
+  FREE_C_HEAP_ARRAY(G1ParPushHeapRSClosure*, _cset_rs_update_cl, mtGC);
 }

 void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
@@ -108,7 +106,7 @@
   size_t _cards_done, _cards;
   G1CollectedHeap* _g1h;

-  OopsInHeapRegionClosure* _oc;
+  G1ParPushHeapRSClosure* _oc;
   CodeBlobClosure* _code_root_cl;

   G1BlockOffsetSharedArray* _bot_shared;
@@ -120,7 +118,7 @@
   bool   _try_claimed;

 public:
-  ScanRSClosure(OopsInHeapRegionClosure* oc,
+  ScanRSClosure(G1ParPushHeapRSClosure* oc,
                 CodeBlobClosure* code_root_cl,
                 uint worker_i) :
     _oc(oc),
@@ -142,16 +140,13 @@
   void scanCard(size_t index, HeapRegion *r) {
     // Stack allocate the DirtyCardToOopClosure instance
     HeapRegionDCTOC cl(_g1h, r, _oc,
-                       CardTableModRefBS::Precise,
-                       HeapRegionDCTOC::IntoCSFilterKind);
+                       CardTableModRefBS::Precise);

     // Set the "from" region in the closure.
     _oc->set_region(r);
-    HeapWord* card_start = _bot_shared->address_for_index(index);
-    HeapWord* card_end = card_start + G1BlockOffsetSharedArray::N_words;
-    Space *sp = SharedHeap::heap()->space_containing(card_start);
-    MemRegion sm_region = sp->used_region_at_save_marks();
-    MemRegion mr = sm_region.intersection(MemRegion(card_start,card_end));
+    MemRegion card_region(_bot_shared->address_for_index(index), G1BlockOffsetSharedArray::N_words);
+    MemRegion pre_gc_allocated(r->bottom(), r->scan_top());
+    MemRegion mr = pre_gc_allocated.intersection(card_region);
     if (!mr.is_empty() && !_ct_bs->is_card_claimed(index)) {
       // We make the card as "claimed" lazily (so races are possible
       // but they're benign), which reduces the number of duplicate
@@ -240,7 +235,7 @@
   size_t cards_looked_up() { return _cards;}
 };

-void G1RemSet::scanRS(OopsInHeapRegionClosure* oc,
+void G1RemSet::scanRS(G1ParPushHeapRSClosure* oc,
                       CodeBlobClosure* code_root_cl,
                       uint worker_i) {
   double rs_time_start = os::elapsedTime();
@@ -258,9 +253,8 @@
   assert(_cards_scanned != NULL, "invariant");
   _cards_scanned[worker_i] = scanRScl.cards_done();

-  _g1p->phase_times()->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
-  _g1p->phase_times()->record_strong_code_root_scan_time(worker_i,
-                                                         scanRScl.strong_code_root_scan_time_sec() * 1000.0);
+  _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec);
+  _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, scanRScl.strong_code_root_scan_time_sec());
 }

 // Closure used for updating RSets and recording references that
@@ -297,29 +291,18 @@
 };

 void G1RemSet::updateRS(DirtyCardQueue* into_cset_dcq, uint worker_i) {
-  double start = os::elapsedTime();
+  G1GCParPhaseTimesTracker x(_g1p->phase_times(), G1GCPhaseTimes::UpdateRS, worker_i);
   // Apply the given closure to all remaining log entries.
   RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq);

   _g1->iterate_dirty_card_closure(&into_cset_update_rs_cl, into_cset_dcq, false, worker_i);
-
-  // Now there should be no dirty cards.
-  if (G1RSLogCheckCardTable) {
-    CountNonCleanMemRegionClosure cl(_g1);
-    _ct_bs->mod_card_iterate(&cl);
-    // XXX This isn't true any more: keeping cards of young regions
-    // marked dirty broke it.  Need some reasonable fix.
-    guarantee(cl.n() == 0, "Card table should be clean.");
-  }
-
-  _g1p->phase_times()->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
 }

 void G1RemSet::cleanupHRRS() {
   HeapRegionRemSet::cleanup();
 }

-void G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
+void G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* oc,
                                            CodeBlobClosure* code_root_cl,
                                            uint worker_i) {
 #if CARD_REPEAT_HISTO
@@ -344,23 +327,8 @@

   assert((ParallelGCThreads > 0) || worker_i == 0, "invariant");

-  // The two flags below were introduced temporarily to serialize
-  // the updating and scanning of remembered sets. There are some
-  // race conditions when these two operations are done in parallel
-  // and they are causing failures. When we resolve said race
-  // conditions, we'll revert back to parallel remembered set
-  // updating and scanning. See CRs 6677707 and 6677708.
-  if (G1UseParallelRSetUpdating || (worker_i == 0)) {
-    updateRS(&into_cset_dcq, worker_i);
-  } else {
-    _g1p->phase_times()->record_update_rs_processed_buffers(worker_i, 0);
-    _g1p->phase_times()->record_update_rs_time(worker_i, 0.0);
-  }
-  if (G1UseParallelRSetScanning || (worker_i == 0)) {
-    scanRS(oc, code_root_cl, worker_i);
-  } else {
-    _g1p->phase_times()->record_scan_rs_time(worker_i, 0.0);
-  }
+  updateRS(&into_cset_dcq, worker_i);
+  scanRS(oc, code_root_cl, worker_i);

   // We now clear the cached values of _cset_rs_update_cl for this worker
   _cset_rs_update_cl[worker_i] = NULL;
@@ -461,7 +429,7 @@
 G1UpdateRSOrPushRefOopClosure::
 G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
                               G1RemSet* rs,
-                              OopsInHeapRegionClosure* push_ref_cl,
+                              G1ParPushHeapRSClosure* push_ref_cl,
                               bool record_refs_into_cset,
                               uint worker_i) :
   _g1(g1h), _g1_rem_set(rs), _from(NULL),
@@ -562,7 +530,7 @@
   ct_freq_note_card(_ct_bs->index_for(start));
 #endif

-  OopsInHeapRegionClosure* oops_in_heap_closure = NULL;
+  G1ParPushHeapRSClosure* oops_in_heap_closure = NULL;
   if (check_for_refs_into_cset) {
     // ConcurrentG1RefineThreads have worker numbers larger than what
     // _cset_rs_update_cl[] is set up to handle. But those threads should
--- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -33,6 +33,7 @@
 class G1CollectedHeap;
 class CardTableModRefBarrierSet;
 class ConcurrentG1Refine;
+class G1ParPushHeapRSClosure;

 // A G1RemSet in which each heap region has a rem set that records the
 // external heap references into it.  Uses a mod ref bs to track updates,
@@ -58,7 +59,6 @@
   };

   CardTableModRefBS*     _ct_bs;
-  SubTasksDone*          _seq_task;
   G1CollectorPolicy*     _g1p;

   ConcurrentG1Refine*    _cg1r;
@@ -68,7 +68,7 @@

   // Used for caching the closure that is responsible for scanning
   // references into the collection set.
-  OopsInHeapRegionClosure** _cset_rs_update_cl;
+  G1ParPushHeapRSClosure** _cset_rs_update_cl;

   // Print the given summary info
   virtual void print_summary_info(G1RemSetSummary * summary, const char * header = NULL);
@@ -95,7 +95,7 @@
   // partitioning the work to be done. It should be the same as
   // the "i" passed to the calling thread's work(i) function.
   // In the sequential case this param will be ignored.
-  void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
+  void oops_into_collection_set_do(G1ParPushHeapRSClosure* blk,
                                    CodeBlobClosure* code_root_cl,
                                    uint worker_i);

@@ -107,7 +107,7 @@
   void prepare_for_oops_into_collection_set_do();
   void cleanup_after_oops_into_collection_set_do();

-  void scanRS(OopsInHeapRegionClosure* oc,
+  void scanRS(G1ParPushHeapRSClosure* oc,
               CodeBlobClosure* code_root_cl,
               uint worker_i);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1RootProcessor.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -0,0 +1,337 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+
+#include "classfile/symbolTable.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "code/codeCache.hpp"
+#include "gc_implementation/g1/bufferingOopClosure.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1CollectorPolicy.hpp"
+#include "gc_implementation/g1/g1GCPhaseTimes.hpp"
+#include "gc_implementation/g1/g1RemSet.inline.hpp"
+#include "gc_implementation/g1/g1RootProcessor.hpp"
+#include "memory/allocation.inline.hpp"
+#include "runtime/fprofiler.hpp"
+#include "runtime/mutex.hpp"
+#include "services/management.hpp"
+
+class G1CodeBlobClosure : public CodeBlobClosure {
+  class HeapRegionGatheringOopClosure : public OopClosure {
+    G1CollectedHeap* _g1h;
+    OopClosure* _work;
+    nmethod* _nm;
+
+    template <typename T>
+    void do_oop_work(T* p) {
+      _work->do_oop(p);
+      T oop_or_narrowoop = oopDesc::load_heap_oop(p);
+      if (!oopDesc::is_null(oop_or_narrowoop)) {
+        oop o = oopDesc::decode_heap_oop_not_null(oop_or_narrowoop);
+        HeapRegion* hr = _g1h->heap_region_containing_raw(o);
+        assert(!_g1h->obj_in_cs(o) || hr->rem_set()->strong_code_roots_list_contains(_nm), "if o still in CS then evacuation failed and nm must already be in the remset");
+        hr->add_strong_code_root(_nm);
+      }
+    }
+
+  public:
+    HeapRegionGatheringOopClosure(OopClosure* oc) : _g1h(G1CollectedHeap::heap()), _work(oc), _nm(NULL) {}
+
+    void do_oop(oop* o) {
+      do_oop_work(o);
+    }
+
+    void do_oop(narrowOop* o) {
+      do_oop_work(o);
+    }
+
+    void set_nm(nmethod* nm) {
+      _nm = nm;
+    }
+  };
+
+  HeapRegionGatheringOopClosure _oc;
+public:
+  G1CodeBlobClosure(OopClosure* oc) : _oc(oc) {}
+
+  void do_code_blob(CodeBlob* cb) {
+    nmethod* nm = cb->as_nmethod_or_null();
+    if (nm != NULL) {
+      if (!nm->test_set_oops_do_mark()) {
+        _oc.set_nm(nm);
+        nm->oops_do(&_oc);
+        nm->fix_oop_relocations();
+      }
+    }
+  }
+};
+
+
+void G1RootProcessor::worker_has_discovered_all_strong_classes() {
+  uint n_workers = _g1h->n_par_threads();
+  assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading");
+
+  uint new_value = (uint)Atomic::add(1, &_n_workers_discovered_strong_classes);
+  if (new_value == n_workers) {
+    // This thread is last. Notify the others.
+    MonitorLockerEx ml(&_lock, Mutex::_no_safepoint_check_flag);
+    _lock.notify_all();
+  }
+}
+
+void G1RootProcessor::wait_until_all_strong_classes_discovered() {
+  uint n_workers = _g1h->n_par_threads();
+  assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading");
+
+  if ((uint)_n_workers_discovered_strong_classes != n_workers) {
+    MonitorLockerEx ml(&_lock, Mutex::_no_safepoint_check_flag);
+    while ((uint)_n_workers_discovered_strong_classes != n_workers) {
+      _lock.wait(Mutex::_no_safepoint_check_flag, 0, false);
+    }
+  }
+}
+
+G1RootProcessor::G1RootProcessor(G1CollectedHeap* g1h) :
+    _g1h(g1h),
+    _process_strong_tasks(new SubTasksDone(G1RP_PS_NumElements)),
+    _srs(g1h),
+    _lock(Mutex::leaf, "G1 Root Scanning barrier lock", false),
+    _n_workers_discovered_strong_classes(0) {}
+
+void G1RootProcessor::evacuate_roots(OopClosure* scan_non_heap_roots,
+                                     OopClosure* scan_non_heap_weak_roots,
+                                     CLDClosure* scan_strong_clds,
+                                     CLDClosure* scan_weak_clds,
+                                     bool trace_metadata,
+                                     uint worker_i) {
+  // First scan the shared roots.
+  double ext_roots_start = os::elapsedTime();
+  G1GCPhaseTimes* phase_times = _g1h->g1_policy()->phase_times();
+
+  BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots);
+  BufferingOopClosure buf_scan_non_heap_weak_roots(scan_non_heap_weak_roots);
+
+  OopClosure* const weak_roots = &buf_scan_non_heap_weak_roots;
+  OopClosure* const strong_roots = &buf_scan_non_heap_roots;
+
+  // CodeBlobClosures are not interoperable with BufferingOopClosures
+  G1CodeBlobClosure root_code_blobs(scan_non_heap_roots);
+
+  process_java_roots(strong_roots,
+                     trace_metadata ? scan_strong_clds : NULL,
+                     scan_strong_clds,
+                     trace_metadata ? NULL : scan_weak_clds,
+                     &root_code_blobs,
+                     phase_times,
+                     worker_i);
+
+  // This is the point where this worker thread will not find more strong CLDs/nmethods.
+  // Report this so G1 can synchronize the strong and weak CLDs/nmethods processing.
+  if (trace_metadata) {
+    worker_has_discovered_all_strong_classes();
+  }
+
+  process_vm_roots(strong_roots, weak_roots, phase_times, worker_i);
+
+  {
+    // Now the CM ref_processor roots.
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::CMRefRoots, worker_i);
+    if (!_process_strong_tasks->is_task_claimed(G1RP_PS_refProcessor_oops_do)) {
+      // We need to treat the discovered reference lists of the
+      // concurrent mark ref processor as roots and keep entries
+      // (which are added by the marking threads) on them live
+      // until they can be processed at the end of marking.
+      _g1h->ref_processor_cm()->weak_oops_do(&buf_scan_non_heap_roots);
+    }
+  }
+
+  if (trace_metadata) {
+    {
+      G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::WaitForStrongCLD, worker_i);
+      // Barrier to make sure all workers passed
+      // the strong CLD and strong nmethods phases.
+      wait_until_all_strong_classes_discovered();
+    }
+
+    // Now take the complement of the strong CLDs.
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::WeakCLDRoots, worker_i);
+    ClassLoaderDataGraph::roots_cld_do(NULL, scan_weak_clds);
+  } else {
+    phase_times->record_time_secs(G1GCPhaseTimes::WaitForStrongCLD, worker_i, 0.0);
+    phase_times->record_time_secs(G1GCPhaseTimes::WeakCLDRoots, worker_i, 0.0);
+  }
+
+  // Finish up any enqueued closure apps (attributed as object copy time).
+  buf_scan_non_heap_roots.done();
+  buf_scan_non_heap_weak_roots.done();
+
+  double obj_copy_time_sec = buf_scan_non_heap_roots.closure_app_seconds()
+      + buf_scan_non_heap_weak_roots.closure_app_seconds();
+
+  phase_times->record_time_secs(G1GCPhaseTimes::ObjCopy, worker_i, obj_copy_time_sec);
+
+  double ext_root_time_sec = os::elapsedTime() - ext_roots_start - obj_copy_time_sec;
+
+  phase_times->record_time_secs(G1GCPhaseTimes::ExtRootScan, worker_i, ext_root_time_sec);
+
+  // During conc marking we have to filter the per-thread SATB buffers
+  // to make sure we remove any oops into the CSet (which will show up
+  // as implicitly live).
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::SATBFiltering, worker_i);
+    if (!_process_strong_tasks->is_task_claimed(G1RP_PS_filter_satb_buffers) && _g1h->mark_in_progress()) {
+      JavaThread::satb_mark_queue_set().filter_thread_buffers();
+    }
+  }
+
+  _process_strong_tasks->all_tasks_completed();
+}
+
+void G1RootProcessor::process_strong_roots(OopClosure* oops,
+                                           CLDClosure* clds,
+                                           CodeBlobClosure* blobs) {
+
+  process_java_roots(oops, clds, clds, NULL, blobs, NULL, 0);
+  process_vm_roots(oops, NULL, NULL, 0);
+
+  _process_strong_tasks->all_tasks_completed();
+}
+
+void G1RootProcessor::process_all_roots(OopClosure* oops,
+                                        CLDClosure* clds,
+                                        CodeBlobClosure* blobs) {
+
+  process_java_roots(oops, NULL, clds, clds, NULL, NULL, 0);
+  process_vm_roots(oops, oops, NULL, 0);
+
+  if (!_process_strong_tasks->is_task_claimed(G1RP_PS_CodeCache_oops_do)) {
+    CodeCache::blobs_do(blobs);
+  }
+
+  _process_strong_tasks->all_tasks_completed();
+}
+
+void G1RootProcessor::process_java_roots(OopClosure* strong_roots,
+                                         CLDClosure* thread_stack_clds,
+                                         CLDClosure* strong_clds,
+                                         CLDClosure* weak_clds,
+                                         CodeBlobClosure* strong_code,
+                                         G1GCPhaseTimes* phase_times,
+                                         uint worker_i) {
+  assert(thread_stack_clds == NULL || weak_clds == NULL, "There is overlap between those, only one may be set");
+  // Iterating over the CLDG and the Threads are done early to allow us to
+  // first process the strong CLDs and nmethods and then, after a barrier,
+  // let the thread process the weak CLDs and nmethods.
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::CLDGRoots, worker_i);
+    if (!_process_strong_tasks->is_task_claimed(G1RP_PS_ClassLoaderDataGraph_oops_do)) {
+      ClassLoaderDataGraph::roots_cld_do(strong_clds, weak_clds);
+    }
+  }
+
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::ThreadRoots, worker_i);
+    Threads::possibly_parallel_oops_do(strong_roots, thread_stack_clds, strong_code);
+  }
+}
+
+void G1RootProcessor::process_vm_roots(OopClosure* strong_roots,
+                                       OopClosure* weak_roots,
+                                       G1GCPhaseTimes* phase_times,
+                                       uint worker_i) {
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::UniverseRoots, worker_i);
+    if (!_process_strong_tasks->is_task_claimed(G1RP_PS_Universe_oops_do)) {
+      Universe::oops_do(strong_roots);
+    }
+  }
+
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::JNIRoots, worker_i);
+    if (!_process_strong_tasks->is_task_claimed(G1RP_PS_JNIHandles_oops_do)) {
+      JNIHandles::oops_do(strong_roots);
+    }
+  }
+
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::ObjectSynchronizerRoots, worker_i);
+    if (!_process_strong_tasks-> is_task_claimed(G1RP_PS_ObjectSynchronizer_oops_do)) {
+      ObjectSynchronizer::oops_do(strong_roots);
+    }
+  }
+
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::FlatProfilerRoots, worker_i);
+    if (!_process_strong_tasks->is_task_claimed(G1RP_PS_FlatProfiler_oops_do)) {
+      FlatProfiler::oops_do(strong_roots);
+    }
+  }
+
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::ManagementRoots, worker_i);
+    if (!_process_strong_tasks->is_task_claimed(G1RP_PS_Management_oops_do)) {
+      Management::oops_do(strong_roots);
+    }
+  }
+
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::JVMTIRoots, worker_i);
+    if (!_process_strong_tasks->is_task_claimed(G1RP_PS_jvmti_oops_do)) {
+      JvmtiExport::oops_do(strong_roots);
+    }
+  }
+
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::SystemDictionaryRoots, worker_i);
+    if (!_process_strong_tasks->is_task_claimed(G1RP_PS_SystemDictionary_oops_do)) {
+      SystemDictionary::roots_oops_do(strong_roots, weak_roots);
+    }
+  }
+
+  {
+    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::StringTableRoots, worker_i);
+    // All threads execute the following. A specific chunk of buckets
+    // from the StringTable are the individual tasks.
+    if (weak_roots != NULL) {
+      StringTable::possibly_parallel_oops_do(weak_roots);
+    }
+  }
+}
+
+void G1RootProcessor::scan_remembered_sets(G1ParPushHeapRSClosure* scan_rs,
+                                           OopClosure* scan_non_heap_weak_roots,
+                                           uint worker_i) {
+  G1GCPhaseTimes* phase_times = _g1h->g1_policy()->phase_times();
+  G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::CodeCacheRoots, worker_i);
+
+  // Now scan the complement of the collection set.
+  G1CodeBlobClosure scavenge_cs_nmethods(scan_non_heap_weak_roots);
+
+  _g1h->g1_rem_set()->oops_into_collection_set_do(scan_rs, &scavenge_cs_nmethods, worker_i);
+}
+
+void G1RootProcessor::set_num_workers(int active_workers) {
+  _process_strong_tasks->set_n_threads(active_workers);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1RootProcessor.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_ROOTPROCESSOR_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_ROOTPROCESSOR_HPP
+
+#include "memory/allocation.hpp"
+#include "memory/sharedHeap.hpp"
+#include "runtime/mutex.hpp"
+
+class CLDClosure;
+class CodeBlobClosure;
+class G1CollectedHeap;
+class G1GCPhaseTimes;
+class G1ParPushHeapRSClosure;
+class Monitor;
+class OopClosure;
+class SubTasksDone;
+
+// Scoped object to assist in applying oop, CLD and code blob closures to
+// root locations. Handles claiming of different root scanning tasks
+// and takes care of global state for root scanning via a StrongRootsScope.
+// In the parallel case there is a shared G1RootProcessor object where all
+// worker thread call the process_roots methods.
+class G1RootProcessor : public StackObj {
+  G1CollectedHeap* _g1h;
+  SubTasksDone* _process_strong_tasks;
+  SharedHeap::StrongRootsScope _srs;
+
+  // Used to implement the Thread work barrier.
+  Monitor _lock;
+  volatile jint _n_workers_discovered_strong_classes;
+
+  enum G1H_process_roots_tasks {
+    G1RP_PS_Universe_oops_do,
+    G1RP_PS_JNIHandles_oops_do,
+    G1RP_PS_ObjectSynchronizer_oops_do,
+    G1RP_PS_FlatProfiler_oops_do,
+    G1RP_PS_Management_oops_do,
+    G1RP_PS_SystemDictionary_oops_do,
+    G1RP_PS_ClassLoaderDataGraph_oops_do,
+    G1RP_PS_jvmti_oops_do,
+    G1RP_PS_CodeCache_oops_do,
+    G1RP_PS_filter_satb_buffers,
+    G1RP_PS_refProcessor_oops_do,
+    // Leave this one last.
+    G1RP_PS_NumElements
+  };
+
+  void worker_has_discovered_all_strong_classes();
+  void wait_until_all_strong_classes_discovered();
+
+  void process_java_roots(OopClosure* scan_non_heap_roots,
+                          CLDClosure* thread_stack_clds,
+                          CLDClosure* scan_strong_clds,
+                          CLDClosure* scan_weak_clds,
+                          CodeBlobClosure* scan_strong_code,
+                          G1GCPhaseTimes* phase_times,
+                          uint worker_i);
+
+  void process_vm_roots(OopClosure* scan_non_heap_roots,
+                        OopClosure* scan_non_heap_weak_roots,
+                        G1GCPhaseTimes* phase_times,
+                        uint worker_i);
+
+public:
+  G1RootProcessor(G1CollectedHeap* g1h);
+
+  // Apply closures to the strongly and weakly reachable roots in the system
+  // in a single pass.
+  // Record and report timing measurements for sub phases using the worker_i
+  void evacuate_roots(OopClosure* scan_non_heap_roots,
+                      OopClosure* scan_non_heap_weak_roots,
+                      CLDClosure* scan_strong_clds,
+                      CLDClosure* scan_weak_clds,
+                      bool trace_metadata,
+                      uint worker_i);
+
+  // Apply oops, clds and blobs to all strongly reachable roots in the system
+  void process_strong_roots(OopClosure* oops,
+                            CLDClosure* clds,
+                            CodeBlobClosure* blobs);
+
+  // Apply oops, clds and blobs to strongly and weakly reachable roots in the system
+  void process_all_roots(OopClosure* oops,
+                         CLDClosure* clds,
+                         CodeBlobClosure* blobs);
+
+  // Apply scan_rs to all locations in the union of the remembered sets for all
+  // regions in the collection set
+  // (having done "set_region" to indicate the region in which the root resides),
+  void scan_remembered_sets(G1ParPushHeapRSClosure* scan_rs,
+                            OopClosure* scan_non_heap_weak_roots,
+                            uint worker_i);
+
+  // Inform the root processor about the number of worker threads
+  void set_num_workers(int active_workers);
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_ROOTPROCESSOR_HPP
--- a/src/share/vm/gc_implementation/g1/g1StringDedup.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1StringDedup.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -105,7 +105,7 @@

 void G1StringDedup::oops_do(OopClosure* keep_alive) {
   assert(is_enabled(), "String deduplication not enabled");
-  unlink_or_oops_do(NULL, keep_alive);
+  unlink_or_oops_do(NULL, keep_alive, true /* allow_resize_and_rehash */);
 }

 void G1StringDedup::unlink(BoolObjectClosure* is_alive) {
@@ -122,37 +122,35 @@
 class G1StringDedupUnlinkOrOopsDoTask : public AbstractGangTask {
 private:
   G1StringDedupUnlinkOrOopsDoClosure _cl;
+  G1GCPhaseTimes* _phase_times;

 public:
   G1StringDedupUnlinkOrOopsDoTask(BoolObjectClosure* is_alive,
                                   OopClosure* keep_alive,
-                                  bool allow_resize_and_rehash) :
+                                  bool allow_resize_and_rehash,
+                                  G1GCPhaseTimes* phase_times) :
     AbstractGangTask("G1StringDedupUnlinkOrOopsDoTask"),
-    _cl(is_alive, keep_alive, allow_resize_and_rehash) {
-  }
+    _cl(is_alive, keep_alive, allow_resize_and_rehash), _phase_times(phase_times) { }

   virtual void work(uint worker_id) {
-    double queue_fixup_start = os::elapsedTime();
-    G1StringDedupQueue::unlink_or_oops_do(&_cl);
-
-    double table_fixup_start = os::elapsedTime();
-    G1StringDedupTable::unlink_or_oops_do(&_cl, worker_id);
-
-    double queue_fixup_time_ms = (table_fixup_start - queue_fixup_start) * 1000.0;
-    double table_fixup_time_ms = (os::elapsedTime() - table_fixup_start) * 1000.0;
-    G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
-    g1p->phase_times()->record_string_dedup_queue_fixup_worker_time(worker_id, queue_fixup_time_ms);
-    g1p->phase_times()->record_string_dedup_table_fixup_worker_time(worker_id, table_fixup_time_ms);
+    {
+      G1GCParPhaseTimesTracker x(_phase_times, G1GCPhaseTimes::StringDedupQueueFixup, worker_id);
+      G1StringDedupQueue::unlink_or_oops_do(&_cl);
+    }
+    {
+      G1GCParPhaseTimesTracker x(_phase_times, G1GCPhaseTimes::StringDedupTableFixup, worker_id);
+      G1StringDedupTable::unlink_or_oops_do(&_cl, worker_id);
+    }
   }
 };

-void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive, bool allow_resize_and_rehash) {
+void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive,
+                                      OopClosure* keep_alive,
+                                      bool allow_resize_and_rehash,
+                                      G1GCPhaseTimes* phase_times) {
   assert(is_enabled(), "String deduplication not enabled");
-  G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
-  g1p->phase_times()->note_string_dedup_fixup_start();
-  double fixup_start = os::elapsedTime();

-  G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash);
+  G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash, phase_times);
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     G1CollectedHeap* g1h = G1CollectedHeap::heap();
     g1h->set_par_threads();
@@ -161,10 +159,6 @@
   } else {
     task.work(0);
   }
-
-  double fixup_time_ms = (os::elapsedTime() - fixup_start) * 1000.0;
-  g1p->phase_times()->record_string_dedup_fixup_time(fixup_time_ms);
-  g1p->phase_times()->note_string_dedup_fixup_end();
 }

 void G1StringDedup::threads_do(ThreadClosure* tc) {
--- a/src/share/vm/gc_implementation/g1/g1StringDedup.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1StringDedup.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -90,6 +90,7 @@
 class ThreadClosure;
 class outputStream;
 class G1StringDedupTable;
+class G1GCPhaseTimes;

 //
 // Main interface for interacting with string deduplication.
@@ -130,7 +131,7 @@
   static void oops_do(OopClosure* keep_alive);
   static void unlink(BoolObjectClosure* is_alive);
   static void unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive,
-                                bool allow_resize_and_rehash = true);
+                                bool allow_resize_and_rehash, G1GCPhaseTimes* phase_times = NULL);

   static void threads_do(ThreadClosure* tc);
   static void print_worker_threads_on(outputStream* st);
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -217,14 +217,6 @@
   product(uintx, G1HeapRegionSize, 0,                                       \
           "Size of the G1 regions.")                                        \
                                                                             \
-  experimental(bool, G1UseParallelRSetUpdating, true,                       \
-          "Enables the parallelization of remembered set updating "         \
-          "during evacuation pauses")                                       \
-                                                                            \
-  experimental(bool, G1UseParallelRSetScanning, true,                       \
-          "Enables the parallelization of remembered set scanning "         \
-          "during evacuation pauses")                                       \
-                                                                            \
   product(uintx, G1ConcRefinementThreads, 0,                                \
           "If non-0 is the number of parallel rem set update threads, "     \
           "otherwise the value is determined ergonomically.")               \
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -47,93 +47,55 @@
 size_t HeapRegion::CardsPerRegion    = 0;

 HeapRegionDCTOC::HeapRegionDCTOC(G1CollectedHeap* g1,
-                                 HeapRegion* hr, ExtendedOopClosure* cl,
-                                 CardTableModRefBS::PrecisionStyle precision,
-                                 FilterKind fk) :
+                                 HeapRegion* hr,
+                                 G1ParPushHeapRSClosure* cl,
+                                 CardTableModRefBS::PrecisionStyle precision) :
   DirtyCardToOopClosure(hr, cl, precision, NULL),
-  _hr(hr), _fk(fk), _g1(g1) { }
+  _hr(hr), _rs_scan(cl), _g1(g1) { }

 FilterOutOfRegionClosure::FilterOutOfRegionClosure(HeapRegion* r,
                                                    OopClosure* oc) :
   _r_bottom(r->bottom()), _r_end(r->end()), _oc(oc) { }

-template<class ClosureType>
-HeapWord* walk_mem_region_loop(ClosureType* cl, G1CollectedHeap* g1h,
-                               HeapRegion* hr,
-                               HeapWord* cur, HeapWord* top) {
-  oop cur_oop = oop(cur);
-  size_t oop_size = hr->block_size(cur);
-  HeapWord* next_obj = cur + oop_size;
-  while (next_obj < top) {
-    // Keep filtering the remembered set.
-    if (!g1h->is_obj_dead(cur_oop, hr)) {
-      // Bottom lies entirely below top, so we can call the
-      // non-memRegion version of oop_iterate below.
-      cur_oop->oop_iterate(cl);
-    }
-    cur = next_obj;
-    cur_oop = oop(cur);
-    oop_size = hr->block_size(cur);
-    next_obj = cur + oop_size;
-  }
-  return cur;
-}
-
 void HeapRegionDCTOC::walk_mem_region(MemRegion mr,
                                       HeapWord* bottom,
                                       HeapWord* top) {
   G1CollectedHeap* g1h = _g1;
   size_t oop_size;
-  ExtendedOopClosure* cl2 = NULL;
-
-  FilterIntoCSClosure intoCSFilt(this, g1h, _cl);
-  FilterOutOfRegionClosure outOfRegionFilt(_hr, _cl);
-
-  switch (_fk) {
-  case NoFilterKind:          cl2 = _cl; break;
-  case IntoCSFilterKind:      cl2 = &intoCSFilt; break;
-  case OutOfRegionFilterKind: cl2 = &outOfRegionFilt; break;
-  default:                    ShouldNotReachHere();
-  }
+  HeapWord* cur = bottom;

   // Start filtering what we add to the remembered set. If the object is
   // not considered dead, either because it is marked (in the mark bitmap)
   // or it was allocated after marking finished, then we add it. Otherwise
   // we can safely ignore the object.
-  if (!g1h->is_obj_dead(oop(bottom), _hr)) {
-    oop_size = oop(bottom)->oop_iterate(cl2, mr);
+  if (!g1h->is_obj_dead(oop(cur), _hr)) {
+    oop_size = oop(cur)->oop_iterate(_rs_scan, mr);
   } else {
-    oop_size = _hr->block_size(bottom);
+    oop_size = _hr->block_size(cur);
   }

-  bottom += oop_size;
-
-  if (bottom < top) {
-    // We replicate the loop below for several kinds of possible filters.
-    switch (_fk) {
-    case NoFilterKind:
-      bottom = walk_mem_region_loop(_cl, g1h, _hr, bottom, top);
-      break;
+  cur += oop_size;

-    case IntoCSFilterKind: {
-      FilterIntoCSClosure filt(this, g1h, _cl);
-      bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top);
-      break;
-    }
-
-    case OutOfRegionFilterKind: {
-      FilterOutOfRegionClosure filt(_hr, _cl);
-      bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top);
-      break;
-    }
-
-    default:
-      ShouldNotReachHere();
+  if (cur < top) {
+    oop cur_oop = oop(cur);
+    oop_size = _hr->block_size(cur);
+    HeapWord* next_obj = cur + oop_size;
+    while (next_obj < top) {
+      // Keep filtering the remembered set.
+      if (!g1h->is_obj_dead(cur_oop, _hr)) {
+        // Bottom lies entirely below top, so we can call the
+        // non-memRegion version of oop_iterate below.
+        cur_oop->oop_iterate(_rs_scan);
+      }
+      cur = next_obj;
+      cur_oop = oop(cur);
+      oop_size = _hr->block_size(cur);
+      next_obj = cur + oop_size;
     }

     // Last object. Need to do dead-obj filtering here too.
-    if (!g1h->is_obj_dead(oop(bottom), _hr)) {
-      oop(bottom)->oop_iterate(cl2, mr);
+    if (!g1h->is_obj_dead(oop(cur), _hr)) {
+      oop(cur)->oop_iterate(_rs_scan, mr);
     }
   }
 }
@@ -338,7 +300,7 @@
   _orig_end = mr.end();
   hr_clear(false /*par*/, false /*clear_space*/);
   set_top(bottom());
-  record_top_and_timestamp();
+  record_timestamp();
 }

 CompactibleSpace* HeapRegion::next_compaction_space() const {
@@ -426,9 +388,9 @@

   // If we're within a stop-world GC, then we might look at a card in a
   // GC alloc region that extends onto a GC LAB, which may not be
-  // parseable.  Stop such at the "saved_mark" of the region.
+  // parseable.  Stop such at the "scan_top" of the region.
   if (g1h->is_gc_active()) {
-    mr = mr.intersection(used_region_at_save_marks());
+    mr = mr.intersection(MemRegion(bottom(), scan_top()));
   } else {
     mr = mr.intersection(used_region());
   }
@@ -468,7 +430,7 @@
   oop obj;

   HeapWord* next = cur;
-  while (next <= start) {
+  do {
     cur = next;
     obj = oop(cur);
     if (obj->klass_or_null() == NULL) {
@@ -477,45 +439,38 @@
     }
     // Otherwise...
     next = cur + block_size(cur);
-  }
+  } while (next <= start);

   // If we finish the above loop...We have a parseable object that
   // begins on or before the start of the memory region, and ends
   // inside or spans the entire region.
-
-  assert(obj == oop(cur), "sanity");
   assert(cur <= start, "Loop postcondition");
   assert(obj->klass_or_null() != NULL, "Loop postcondition");
-  assert((cur + block_size(cur)) > start, "Loop postcondition");

-  if (!g1h->is_obj_dead(obj)) {
-    obj->oop_iterate(cl, mr);
-  }
-
-  while (cur < end) {
+  do {
     obj = oop(cur);
+    assert((cur + block_size(cur)) > (HeapWord*)obj, "Loop invariant");
     if (obj->klass_or_null() == NULL) {
       // Ran into an unparseable point.
       return cur;
-    };
+    }

-    // Otherwise:
-    next = cur + block_size(cur);
+    // Advance the current pointer. "obj" still points to the object to iterate.
+    cur = cur + block_size(cur);

     if (!g1h->is_obj_dead(obj)) {
-      if (next < end || !obj->is_objArray()) {
-        // This object either does not span the MemRegion
-        // boundary, or if it does it's not an array.
-        // Apply closure to whole object.
+      // Non-objArrays are sometimes marked imprecise at the object start. We
+      // always need to iterate over them in full.
+      // We only iterate over object arrays in full if they are completely contained
+      // in the memory region.
+      if (!obj->is_objArray() || (((HeapWord*)obj) >= start && cur <= end)) {
         obj->oop_iterate(cl);
       } else {
-        // This obj is an array that spans the boundary.
-        // Stop at the boundary.
         obj->oop_iterate(cl, mr);
       }
     }
-    cur = next;
-  }
+  } while (cur < end);
+
   return NULL;
 }

@@ -980,7 +935,7 @@

 void G1OffsetTableContigSpace::clear(bool mangle_space) {
   set_top(bottom());
-  set_saved_mark_word(bottom());
+  _scan_top = bottom();
   CompactibleSpace::clear(mangle_space);
   reset_bot();
 }
@@ -1012,41 +967,42 @@
   return _offsets.threshold();
 }

-HeapWord* G1OffsetTableContigSpace::saved_mark_word() const {
+HeapWord* G1OffsetTableContigSpace::scan_top() const {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  assert( _gc_time_stamp <= g1h->get_gc_time_stamp(), "invariant" );
   HeapWord* local_top = top();
   OrderAccess::loadload();
-  if (_gc_time_stamp < g1h->get_gc_time_stamp()) {
+  const unsigned local_time_stamp = _gc_time_stamp;
+  assert(local_time_stamp <= g1h->get_gc_time_stamp(), "invariant");
+  if (local_time_stamp < g1h->get_gc_time_stamp()) {
     return local_top;
   } else {
-    return Space::saved_mark_word();
+    return _scan_top;
   }
 }

-void G1OffsetTableContigSpace::record_top_and_timestamp() {
+void G1OffsetTableContigSpace::record_timestamp() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   unsigned curr_gc_time_stamp = g1h->get_gc_time_stamp();

   if (_gc_time_stamp < curr_gc_time_stamp) {
-    // The order of these is important, as another thread might be
-    // about to start scanning this region. If it does so after
-    // set_saved_mark and before _gc_time_stamp = ..., then the latter
-    // will be false, and it will pick up top() as the high water mark
-    // of region. If it does so after _gc_time_stamp = ..., then it
-    // will pick up the right saved_mark_word() as the high water mark
-    // of the region. Either way, the behaviour will be correct.
-    Space::set_saved_mark_word(top());
-    OrderAccess::storestore();
+    // Setting the time stamp here tells concurrent readers to look at
+    // scan_top to know the maximum allowed address to look at.
+
+    // scan_top should be bottom for all regions except for the
+    // retained old alloc region which should have scan_top == top
+    HeapWord* st = _scan_top;
+    guarantee(st == _bottom || st == _top, "invariant");
+
     _gc_time_stamp = curr_gc_time_stamp;
-    // No need to do another barrier to flush the writes above. If
-    // this is called in parallel with other threads trying to
-    // allocate into the region, the caller should call this while
-    // holding a lock and when the lock is released the writes will be
-    // flushed.
   }
 }

+void G1OffsetTableContigSpace::record_retained_region() {
+  // scan_top is the maximum address where it's safe for the next gc to
+  // scan this region.
+  _scan_top = top();
+}
+
 void G1OffsetTableContigSpace::safe_object_iterate(ObjectClosure* blk) {
   object_iterate(blk);
 }
@@ -1080,6 +1036,8 @@
 void G1OffsetTableContigSpace::initialize(MemRegion mr, bool clear_space, bool mangle_space) {
   CompactibleSpace::initialize(mr, clear_space, mangle_space);
   _top = bottom();
+  _scan_top = bottom();
+  set_saved_mark_word(NULL);
   reset_bot();
 }
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -67,17 +67,9 @@
 // sets.

 class HeapRegionDCTOC : public DirtyCardToOopClosure {
-public:
-  // Specification of possible DirtyCardToOopClosure filtering.
-  enum FilterKind {
-    NoFilterKind,
-    IntoCSFilterKind,
-    OutOfRegionFilterKind
-  };
-
-protected:
+private:
   HeapRegion* _hr;
-  FilterKind _fk;
+  G1ParPushHeapRSClosure* _rs_scan;
   G1CollectedHeap* _g1;

   // Walk the given memory region from bottom to (actual) top
@@ -90,9 +82,9 @@

 public:
   HeapRegionDCTOC(G1CollectedHeap* g1,
-                  HeapRegion* hr, ExtendedOopClosure* cl,
-                  CardTableModRefBS::PrecisionStyle precision,
-                  FilterKind fk);
+                  HeapRegion* hr,
+                  G1ParPushHeapRSClosure* cl,
+                  CardTableModRefBS::PrecisionStyle precision);
 };

 // The complicating factor is that BlockOffsetTable diverged
@@ -101,28 +93,25 @@
 // OffsetTableContigSpace.  If the two versions of BlockOffsetTable could
 // be reconciled, then G1OffsetTableContigSpace could go away.

-// The idea behind time stamps is the following. Doing a save_marks on
-// all regions at every GC pause is time consuming (if I remember
-// well, 10ms or so). So, we would like to do that only for regions
-// that are GC alloc regions. To achieve this, we use time
-// stamps. For every evacuation pause, G1CollectedHeap generates a
-// unique time stamp (essentially a counter that gets
-// incremented). Every time we want to call save_marks on a region,
-// we set the saved_mark_word to top and also copy the current GC
-// time stamp to the time stamp field of the space. Reading the
-// saved_mark_word involves checking the time stamp of the
-// region. If it is the same as the current GC time stamp, then we
-// can safely read the saved_mark_word field, as it is valid. If the
-// time stamp of the region is not the same as the current GC time
-// stamp, then we instead read top, as the saved_mark_word field is
-// invalid. Time stamps (on the regions and also on the
-// G1CollectedHeap) are reset at every cleanup (we iterate over
-// the regions anyway) and at the end of a Full GC. The current scheme
-// that uses sequential unsigned ints will fail only if we have 4b
+// The idea behind time stamps is the following. We want to keep track of
+// the highest address where it's safe to scan objects for each region.
+// This is only relevant for current GC alloc regions so we keep a time stamp
+// per region to determine if the region has been allocated during the current
+// GC or not. If the time stamp is current we report a scan_top value which
+// was saved at the end of the previous GC for retained alloc regions and which is
+// equal to the bottom for all other regions.
+// There is a race between card scanners and allocating gc workers where we must ensure
+// that card scanners do not read the memory allocated by the gc workers.
+// In order to enforce that, we must not return a value of _top which is more recent than the
+// time stamp. This is due to the fact that a region may become a gc alloc region at
+// some point after we've read the timestamp value as being < the current time stamp.
+// The time stamps are re-initialized to zero at cleanup and at Full GCs.
+// The current scheme that uses sequential unsigned ints will fail only if we have 4b
 // evacuation pauses between two cleanups, which is _highly_ unlikely.
 class G1OffsetTableContigSpace: public CompactibleSpace {
   friend class VMStructs;
   HeapWord* _top;
+  HeapWord* volatile _scan_top;
  protected:
   G1BlockOffsetArrayContigSpace _offsets;
   Mutex _par_alloc_lock;
@@ -166,10 +155,11 @@
   void set_bottom(HeapWord* value);
   void set_end(HeapWord* value);

-  virtual HeapWord* saved_mark_word() const;
-  void record_top_and_timestamp();
+  HeapWord* scan_top() const;
+  void record_timestamp();
   void reset_gc_time_stamp() { _gc_time_stamp = 0; }
   unsigned get_gc_time_stamp() { return _gc_time_stamp; }
+  void record_retained_region();

   // See the comment above in the declaration of _pre_dummy_top for an
   // explanation of what it is.
@@ -193,6 +183,8 @@
   virtual HeapWord* allocate(size_t word_size);
   HeapWord* par_allocate(size_t word_size);

+  HeapWord* saved_mark_word() const { ShouldNotReachHere(); return NULL; }
+
   // MarkSweep support phase3
   virtual HeapWord* initialize_threshold();
   virtual HeapWord* cross_threshold(HeapWord* start, HeapWord* end);
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -622,7 +622,7 @@
                          true,  // Process younger gens, if any,
                                 // as strong roots.
                          false, // no scope; this is parallel code
-                         SharedHeap::SO_ScavengeCodeCache,
+                         GenCollectedHeap::SO_ScavengeCodeCache,
                          GenCollectedHeap::StrongAndWeakRoots,
                          &par_scan_state.to_space_root_closure(),
                          &par_scan_state.older_gen_closure(),
--- a/src/share/vm/gc_implementation/shared/ageTable.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/gc_implementation/shared/ageTable.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -55,7 +55,10 @@

   // add entry
   void add(oop p, size_t oop_size) {
-    uint age = p->age();
+    add(p->age(), oop_size);
+  }
+
+  void add(uint age, size_t oop_size) {
     assert(age > 0 && age < table_size, "invalid age of object");
     sizes[age] += oop_size;
   }
--- a/src/share/vm/interpreter/bytecodes.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/interpreter/bytecodes.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -420,6 +420,7 @@
   static bool        is_astore      (Code code)    { return (code == _astore || code == _astore_0 || code == _astore_1
                                                                              || code == _astore_2 || code == _astore_3); }

+  static bool        is_store_into_local(Code code){ return (_istore <= code && code <= _astore_3); }
   static bool        is_const       (Code code)    { return (_aconst_null <= code && code <= _ldc2_w); }
   static bool        is_zero_const  (Code code)    { return (code == _aconst_null || code == _iconst_0
                                                            || code == _fconst_0 || code == _dconst_0); }
--- a/src/share/vm/interpreter/oopMapCache.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/interpreter/oopMapCache.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -244,10 +244,8 @@
   method()->print_value();
   tty->print(" @ %d = [%d] { ", bci(), n);
   for (int i = 0; i < n; i++) {
-#ifdef ENABLE_ZAP_DEAD_LOCALS
     if (is_dead(i)) tty->print("%d+ ", i);
     else
-#endif
     if (is_oop(i)) tty->print("%d ", i);
   }
   tty->print_cr("}");
@@ -402,13 +400,11 @@
       value |= (mask << oop_bit_number );
     }

-  #ifdef ENABLE_ZAP_DEAD_LOCALS
     // set dead bit
     if (!cell->is_live()) {
       value |= (mask << dead_bit_number);
       assert(!cell->is_reference(), "dead value marked as oop");
     }
-  #endif
   }

   // make sure last word is stored
--- a/src/share/vm/interpreter/oopMapCache.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/interpreter/oopMapCache.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -66,19 +66,15 @@

  public:
   enum {
-    N                = 2,                // the number of words reserved
+    N                = 4,                // the number of words reserved
                                          // for inlined mask storage
     small_mask_limit = N * BitsPerWord,  // the maximum number of bits
                                          // available for small masks,
                                          // small_mask_limit can be set to 0
                                          // for testing bit_mask allocation

-#ifdef ENABLE_ZAP_DEAD_LOCALS
     bits_per_entry   = 2,
     dead_bit_number  = 1,
-#else
-    bits_per_entry   = 1,
-#endif
     oop_bit_number   = 0
   };

@@ -119,10 +115,6 @@

   void set_expression_stack_size(int sz)         { _expression_stack_size = sz; }

-#ifdef ENABLE_ZAP_DEAD_LOCALS
-  bool is_dead(int offset) const                 { return (entry_at(offset) & (1 << dead_bit_number)) != 0; }
-#endif
-
   // Lookup
   bool match(methodHandle method, int bci) const { return _method == method() && _bci == bci; }
   bool is_empty() const;
@@ -144,6 +136,7 @@
   void print() const;

   int number_of_entries() const                  { return mask_size() / bits_per_entry; }
+  bool is_dead(int offset) const                 { return (entry_at(offset) & (1 << dead_bit_number)) != 0; }
   bool is_oop (int offset) const                 { return (entry_at(offset) & (1 << oop_bit_number )) != 0; }

   int expression_stack_size() const              { return _expression_stack_size; }
--- a/src/share/vm/memory/defNewGeneration.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/memory/defNewGeneration.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -629,7 +629,7 @@
                          true,  // Process younger gens, if any,
                                 // as strong roots.
                          true,  // activate StrongRootsScope
-                         SharedHeap::SO_ScavengeCodeCache,
+                         GenCollectedHeap::SO_ScavengeCodeCache,
                          GenCollectedHeap::StrongAndWeakRoots,
                          &fsc_with_no_gc_barrier,
                          &fsc_with_gc_barrier,
--- a/src/share/vm/memory/genCollectedHeap.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/memory/genCollectedHeap.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -26,6 +26,7 @@
 #include "classfile/symbolTable.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
+#include "code/codeCache.hpp"
 #include "code/icBuffer.hpp"
 #include "gc_implementation/shared/collectorCounters.hpp"
 #include "gc_implementation/shared/gcTrace.hpp"
@@ -49,6 +50,7 @@
 #include "runtime/handles.inline.hpp"
 #include "runtime/java.hpp"
 #include "runtime/vmThread.hpp"
+#include "services/management.hpp"
 #include "services/memoryService.hpp"
 #include "utilities/vmError.hpp"
 #include "utilities/workgroup.hpp"
@@ -63,7 +65,15 @@

 // The set of potentially parallel tasks in root scanning.
 enum GCH_strong_roots_tasks {
-  // We probably want to parallelize both of these internally, but for now...
+  GCH_PS_Universe_oops_do,
+  GCH_PS_JNIHandles_oops_do,
+  GCH_PS_ObjectSynchronizer_oops_do,
+  GCH_PS_FlatProfiler_oops_do,
+  GCH_PS_Management_oops_do,
+  GCH_PS_SystemDictionary_oops_do,
+  GCH_PS_ClassLoaderDataGraph_oops_do,
+  GCH_PS_jvmti_oops_do,
+  GCH_PS_CodeCache_oops_do,
   GCH_PS_younger_gens,
   // Leave this one last.
   GCH_PS_NumElements
@@ -72,13 +82,9 @@
 GenCollectedHeap::GenCollectedHeap(GenCollectorPolicy *policy) :
   SharedHeap(policy),
   _gen_policy(policy),
-  _gen_process_roots_tasks(new SubTasksDone(GCH_PS_NumElements)),
+  _process_strong_tasks(new SubTasksDone(GCH_PS_NumElements)),
   _full_collections_completed(0)
 {
-  if (_gen_process_roots_tasks == NULL ||
-      !_gen_process_roots_tasks->valid()) {
-    vm_exit_during_initialization("Failed necessary allocation.");
-  }
   assert(policy != NULL, "Sanity check");
 }

@@ -589,29 +595,137 @@

 void GenCollectedHeap::set_par_threads(uint t) {
   SharedHeap::set_par_threads(t);
-  _gen_process_roots_tasks->set_n_threads(t);
+  set_n_termination(t);
+}
+
+void GenCollectedHeap::set_n_termination(uint t) {
+  _process_strong_tasks->set_n_threads(t);
 }

-void GenCollectedHeap::
-gen_process_roots(int level,
-                  bool younger_gens_as_roots,
-                  bool activate_scope,
-                  SharedHeap::ScanningOption so,
-                  OopsInGenClosure* not_older_gens,
-                  OopsInGenClosure* weak_roots,
-                  OopsInGenClosure* older_gens,
-                  CLDClosure* cld_closure,
-                  CLDClosure* weak_cld_closure,
-                  CodeBlobClosure* code_closure) {
+#ifdef ASSERT
+class AssertNonScavengableClosure: public OopClosure {
+public:
+  virtual void do_oop(oop* p) {
+    assert(!Universe::heap()->is_in_partial_collection(*p),
+      "Referent should not be scavengable.");  }
+  virtual void do_oop(narrowOop* p) { ShouldNotReachHere(); }
+};
+static AssertNonScavengableClosure assert_is_non_scavengable_closure;
+#endif
+
+void GenCollectedHeap::process_roots(bool activate_scope,
+                                     ScanningOption so,
+                                     OopClosure* strong_roots,
+                                     OopClosure* weak_roots,
+                                     CLDClosure* strong_cld_closure,
+                                     CLDClosure* weak_cld_closure,
+                                     CodeBlobClosure* code_roots) {
+  StrongRootsScope srs(this, activate_scope);

   // General roots.
-  SharedHeap::process_roots(activate_scope, so,
-                            not_older_gens, weak_roots,
-                            cld_closure, weak_cld_closure,
-                            code_closure);
+  assert(_strong_roots_parity != 0, "must have called prologue code");
+  assert(code_roots != NULL, "code root closure should always be set");
+  // _n_termination for _process_strong_tasks should be set up stream
+  // in a method not running in a GC worker.  Otherwise the GC worker
+  // could be trying to change the termination condition while the task
+  // is executing in another GC worker.
+
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_ClassLoaderDataGraph_oops_do)) {
+    ClassLoaderDataGraph::roots_cld_do(strong_cld_closure, weak_cld_closure);
+  }
+
+  // Some CLDs contained in the thread frames should be considered strong.
+  // Don't process them if they will be processed during the ClassLoaderDataGraph phase.
+  CLDClosure* roots_from_clds_p = (strong_cld_closure != weak_cld_closure) ? strong_cld_closure : NULL;
+  // Only process code roots from thread stacks if we aren't visiting the entire CodeCache anyway
+  CodeBlobClosure* roots_from_code_p = (so & SO_AllCodeCache) ? NULL : code_roots;
+
+  Threads::possibly_parallel_oops_do(strong_roots, roots_from_clds_p, roots_from_code_p);
+
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_Universe_oops_do)) {
+    Universe::oops_do(strong_roots);
+  }
+  // Global (strong) JNI handles
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_JNIHandles_oops_do)) {
+    JNIHandles::oops_do(strong_roots);
+  }
+
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_ObjectSynchronizer_oops_do)) {
+    ObjectSynchronizer::oops_do(strong_roots);
+  }
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_FlatProfiler_oops_do)) {
+    FlatProfiler::oops_do(strong_roots);
+  }
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_Management_oops_do)) {
+    Management::oops_do(strong_roots);
+  }
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_jvmti_oops_do)) {
+    JvmtiExport::oops_do(strong_roots);
+  }
+
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_SystemDictionary_oops_do)) {
+    SystemDictionary::roots_oops_do(strong_roots, weak_roots);
+  }
+
+  // All threads execute the following. A specific chunk of buckets
+  // from the StringTable are the individual tasks.
+  if (weak_roots != NULL) {
+    if (CollectedHeap::use_parallel_gc_threads()) {
+      StringTable::possibly_parallel_oops_do(weak_roots);
+    } else {
+      StringTable::oops_do(weak_roots);
+    }
+  }
+
+  if (!_process_strong_tasks->is_task_claimed(GCH_PS_CodeCache_oops_do)) {
+    if (so & SO_ScavengeCodeCache) {
+      assert(code_roots != NULL, "must supply closure for code cache");
+
+      // We only visit parts of the CodeCache when scavenging.
+      CodeCache::scavenge_root_nmethods_do(code_roots);
+    }
+    if (so & SO_AllCodeCache) {
+      assert(code_roots != NULL, "must supply closure for code cache");
+
+      // CMSCollector uses this to do intermediate-strength collections.
+      // We scan the entire code cache, since CodeCache::do_unloading is not called.
+      CodeCache::blobs_do(code_roots);
+    }
+    // Verify that the code cache contents are not subject to
+    // movement by a scavenging collection.
+    DEBUG_ONLY(CodeBlobToOopClosure assert_code_is_non_scavengable(&assert_is_non_scavengable_closure, !CodeBlobToOopClosure::FixRelocations));
+    DEBUG_ONLY(CodeCache::asserted_non_scavengable_nmethods_do(&assert_code_is_non_scavengable));
+  }
+
+}
+
+void GenCollectedHeap::gen_process_roots(int level,
+                                         bool younger_gens_as_roots,
+                                         bool activate_scope,
+                                         ScanningOption so,
+                                         bool only_strong_roots,
+                                         OopsInGenClosure* not_older_gens,
+                                         OopsInGenClosure* older_gens,
+                                         CLDClosure* cld_closure) {
+  const bool is_adjust_phase = !only_strong_roots && !younger_gens_as_roots;
+
+  bool is_moving_collection = false;
+  if (level == 0 || is_adjust_phase) {
+    // young collections are always moving
+    is_moving_collection = true;
+  }
+
+  MarkingCodeBlobClosure mark_code_closure(not_older_gens, is_moving_collection);
+  OopsInGenClosure* weak_roots = only_strong_roots ? NULL : not_older_gens;
+  CLDClosure* weak_cld_closure = only_strong_roots ? NULL : cld_closure;
+
+  process_roots(activate_scope, so,
+                not_older_gens, weak_roots,
+                cld_closure, weak_cld_closure,
+                &mark_code_closure);

   if (younger_gens_as_roots) {
-    if (!_gen_process_roots_tasks->is_task_claimed(GCH_PS_younger_gens)) {
+    if (!_process_strong_tasks->is_task_claimed(GCH_PS_younger_gens)) {
       for (int i = 0; i < level; i++) {
         not_older_gens->set_generation(_gens[i]);
         _gens[i]->oop_iterate(not_older_gens);
@@ -627,43 +741,18 @@
     older_gens->reset_generation();
   }

-  _gen_process_roots_tasks->all_tasks_completed();
+  _process_strong_tasks->all_tasks_completed();
 }

-void GenCollectedHeap::
-gen_process_roots(int level,
-                  bool younger_gens_as_roots,
-                  bool activate_scope,
-                  SharedHeap::ScanningOption so,
-                  bool only_strong_roots,
-                  OopsInGenClosure* not_older_gens,
-                  OopsInGenClosure* older_gens,
-                  CLDClosure* cld_closure) {

-  const bool is_adjust_phase = !only_strong_roots && !younger_gens_as_roots;
-
-  bool is_moving_collection = false;
-  if (level == 0 || is_adjust_phase) {
-    // young collections are always moving
-    is_moving_collection = true;
-  }
-
-  MarkingCodeBlobClosure mark_code_closure(not_older_gens, is_moving_collection);
-  CodeBlobClosure* code_closure = &mark_code_closure;
-
-  gen_process_roots(level,
-                    younger_gens_as_roots,
-                    activate_scope, so,
-                    not_older_gens, only_strong_roots ? NULL : not_older_gens,
-                    older_gens,
-                    cld_closure, only_strong_roots ? NULL : cld_closure,
-                    code_closure);
-
-}
+class AlwaysTrueClosure: public BoolObjectClosure {
+public:
+  bool do_object_b(oop p) { return true; }
+};
+static AlwaysTrueClosure always_true;

 void GenCollectedHeap::gen_process_weak_roots(OopClosure* root_closure) {
-  SharedHeap::process_weak_roots(root_closure);
-  // "Local" "weak" refs
+  JNIHandles::weak_oops_do(&always_true, root_closure);
   for (int i = 0; i < _n_gens; i++) {
     _gens[i]->ref_processor()->weak_oops_do(root_closure);
   }
--- a/src/share/vm/memory/genCollectedHeap.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/memory/genCollectedHeap.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -79,8 +79,7 @@

   // Data structure for claiming the (potentially) parallel tasks in
   // (gen-specific) roots processing.
-  SubTasksDone* _gen_process_roots_tasks;
-  SubTasksDone* gen_process_roots_tasks() { return _gen_process_roots_tasks; }
+  SubTasksDone* _process_strong_tasks;

   // In block contents verification, the number of header words to skip
   NOT_PRODUCT(static size_t _skip_header_HeapWords;)
@@ -390,6 +389,7 @@
   static GenCollectedHeap* heap();

   void set_par_threads(uint t);
+  void set_n_termination(uint t);

   // Invoke the "do_oop" method of one of the closures "not_older_gens"
   // or "older_gens" on root locations for the generation at
@@ -403,11 +403,25 @@
   // The "so" argument determines which of the roots
   // the closure is applied to:
   // "SO_None" does none;
+  enum ScanningOption {
+    SO_None                =  0x0,
+    SO_AllCodeCache        =  0x8,
+    SO_ScavengeCodeCache   = 0x10
+  };
+
  private:
+  void process_roots(bool activate_scope,
+                     ScanningOption so,
+                     OopClosure* strong_roots,
+                     OopClosure* weak_roots,
+                     CLDClosure* strong_cld_closure,
+                     CLDClosure* weak_cld_closure,
+                     CodeBlobClosure* code_roots);
+
   void gen_process_roots(int level,
                          bool younger_gens_as_roots,
                          bool activate_scope,
-                         SharedHeap::ScanningOption so,
+                         ScanningOption so,
                          OopsInGenClosure* not_older_gens,
                          OopsInGenClosure* weak_roots,
                          OopsInGenClosure* older_gens,
@@ -422,7 +436,7 @@
   void gen_process_roots(int level,
                          bool younger_gens_as_roots,
                          bool activate_scope,
-                         SharedHeap::ScanningOption so,
+                         ScanningOption so,
                          bool only_strong_roots,
                          OopsInGenClosure* not_older_gens,
                          OopsInGenClosure* older_gens,
--- a/src/share/vm/memory/genMarkSweep.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/memory/genMarkSweep.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -210,7 +210,7 @@
   gch->gen_process_roots(level,
                          false, // Younger gens are not roots.
                          true,  // activate StrongRootsScope
-                         SharedHeap::SO_None,
+                         GenCollectedHeap::SO_None,
                          GenCollectedHeap::StrongRootsOnly,
                          &follow_root_closure,
                          &follow_root_closure,
@@ -295,7 +295,7 @@
   gch->gen_process_roots(level,
                          false, // Younger gens are not roots.
                          true,  // activate StrongRootsScope
-                         SharedHeap::SO_AllCodeCache,
+                         GenCollectedHeap::SO_AllCodeCache,
                          GenCollectedHeap::StrongAndWeakRoots,
                          &adjust_pointer_closure,
                          &adjust_pointer_closure,
--- a/src/share/vm/memory/sharedHeap.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/memory/sharedHeap.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -32,7 +32,6 @@
 #include "runtime/atomic.inline.hpp"
 #include "runtime/fprofiler.hpp"
 #include "runtime/java.hpp"
-#include "services/management.hpp"
 #include "utilities/copy.hpp"
 #include "utilities/workgroup.hpp"

@@ -40,33 +39,13 @@

 SharedHeap* SharedHeap::_sh;

-// The set of potentially parallel tasks in root scanning.
-enum SH_process_roots_tasks {
-  SH_PS_Universe_oops_do,
-  SH_PS_JNIHandles_oops_do,
-  SH_PS_ObjectSynchronizer_oops_do,
-  SH_PS_FlatProfiler_oops_do,
-  SH_PS_Management_oops_do,
-  SH_PS_SystemDictionary_oops_do,
-  SH_PS_ClassLoaderDataGraph_oops_do,
-  SH_PS_jvmti_oops_do,
-  SH_PS_CodeCache_oops_do,
-  // Leave this one last.
-  SH_PS_NumElements
-};
-
 SharedHeap::SharedHeap(CollectorPolicy* policy_) :
   CollectedHeap(),
   _collector_policy(policy_),
   _rem_set(NULL),
-  _strong_roots_scope(NULL),
   _strong_roots_parity(0),
-  _process_strong_tasks(new SubTasksDone(SH_PS_NumElements)),
   _workers(NULL)
 {
-  if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) {
-    vm_exit_during_initialization("Failed necessary allocation.");
-  }
   _sh = this;  // ch is static, should be set only once.
   if ((UseParNewGC ||
       (UseConcMarkSweepGC && (CMSParallelInitialMarkEnabled ||
@@ -84,14 +63,6 @@
   }
 }

-int SharedHeap::n_termination() {
-  return _process_strong_tasks->n_threads();
-}
-
-void SharedHeap::set_n_termination(int t) {
-  _process_strong_tasks->set_n_threads(t);
-}
-
 bool SharedHeap::heap_lock_held_for_gc() {
   Thread* t = Thread::current();
   return    Heap_lock->owned_by_self()
@@ -102,31 +73,6 @@
 void SharedHeap::set_par_threads(uint t) {
   assert(t == 0 || !UseSerialGC, "Cannot have parallel threads");
   _n_par_threads = t;
-  _process_strong_tasks->set_n_threads(t);
-}
-
-#ifdef ASSERT
-class AssertNonScavengableClosure: public OopClosure {
-public:
-  virtual void do_oop(oop* p) {
-    assert(!Universe::heap()->is_in_partial_collection(*p),
-      "Referent should not be scavengable.");  }
-  virtual void do_oop(narrowOop* p) { ShouldNotReachHere(); }
-};
-static AssertNonScavengableClosure assert_is_non_scavengable_closure;
-#endif
-
-SharedHeap::StrongRootsScope* SharedHeap::active_strong_roots_scope() const {
-  return _strong_roots_scope;
-}
-void SharedHeap::register_strong_roots_scope(SharedHeap::StrongRootsScope* scope) {
-  assert(_strong_roots_scope == NULL, "Should only have one StrongRootsScope active");
-  assert(scope != NULL, "Illegal argument");
-  _strong_roots_scope = scope;
-}
-void SharedHeap::unregister_strong_roots_scope(SharedHeap::StrongRootsScope* scope) {
-  assert(_strong_roots_scope == scope, "Wrong scope unregistered");
-  _strong_roots_scope = NULL;
 }

 void SharedHeap::change_strong_roots_parity() {
@@ -140,174 +86,15 @@
 }

 SharedHeap::StrongRootsScope::StrongRootsScope(SharedHeap* heap, bool activate)
-  : MarkScope(activate), _sh(heap), _n_workers_done_with_threads(0)
+  : MarkScope(activate), _sh(heap)
 {
   if (_active) {
-    _sh->register_strong_roots_scope(this);
     _sh->change_strong_roots_parity();
     // Zero the claimed high water mark in the StringTable
     StringTable::clear_parallel_claimed_index();
   }
 }

-SharedHeap::StrongRootsScope::~StrongRootsScope() {
-  if (_active) {
-    _sh->unregister_strong_roots_scope(this);
-  }
-}
-
-Monitor* SharedHeap::StrongRootsScope::_lock = new Monitor(Mutex::leaf, "StrongRootsScope lock", false);
-
-void SharedHeap::StrongRootsScope::mark_worker_done_with_threads(uint n_workers) {
-  // The Thread work barrier is only needed by G1 Class Unloading.
-  // No need to use the barrier if this is single-threaded code.
-  if (UseG1GC && ClassUnloadingWithConcurrentMark && n_workers > 0) {
-    uint new_value = (uint)Atomic::add(1, &_n_workers_done_with_threads);
-    if (new_value == n_workers) {
-      // This thread is last. Notify the others.
-      MonitorLockerEx ml(_lock, Mutex::_no_safepoint_check_flag);
-      _lock->notify_all();
-    }
-  }
-}
-
-void SharedHeap::StrongRootsScope::wait_until_all_workers_done_with_threads(uint n_workers) {
-  assert(UseG1GC,                          "Currently only used by G1");
-  assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading");
-
-  // No need to use the barrier if this is single-threaded code.
-  if (n_workers > 0 && (uint)_n_workers_done_with_threads != n_workers) {
-    MonitorLockerEx ml(_lock, Mutex::_no_safepoint_check_flag);
-    while ((uint)_n_workers_done_with_threads != n_workers) {
-      _lock->wait(Mutex::_no_safepoint_check_flag, 0, false);
-    }
-  }
-}
-
-void SharedHeap::process_roots(bool activate_scope,
-                               ScanningOption so,
-                               OopClosure* strong_roots,
-                               OopClosure* weak_roots,
-                               CLDClosure* strong_cld_closure,
-                               CLDClosure* weak_cld_closure,
-                               CodeBlobClosure* code_roots) {
-  StrongRootsScope srs(this, activate_scope);
-
-  // General roots.
-  assert(_strong_roots_parity != 0, "must have called prologue code");
-  assert(code_roots != NULL, "code root closure should always be set");
-  // _n_termination for _process_strong_tasks should be set up stream
-  // in a method not running in a GC worker.  Otherwise the GC worker
-  // could be trying to change the termination condition while the task
-  // is executing in another GC worker.
-
-  // Iterating over the CLDG and the Threads are done early to allow G1 to
-  // first process the strong CLDs and nmethods and then, after a barrier,
-  // let the thread process the weak CLDs and nmethods.
-
-  if (!_process_strong_tasks->is_task_claimed(SH_PS_ClassLoaderDataGraph_oops_do)) {
-    ClassLoaderDataGraph::roots_cld_do(strong_cld_closure, weak_cld_closure);
-  }
-
-  // Some CLDs contained in the thread frames should be considered strong.
-  // Don't process them if they will be processed during the ClassLoaderDataGraph phase.
-  CLDClosure* roots_from_clds_p = (strong_cld_closure != weak_cld_closure) ? strong_cld_closure : NULL;
-  // Only process code roots from thread stacks if we aren't visiting the entire CodeCache anyway
-  CodeBlobClosure* roots_from_code_p = (so & SO_AllCodeCache) ? NULL : code_roots;
-
-  Threads::possibly_parallel_oops_do(strong_roots, roots_from_clds_p, roots_from_code_p);
-
-  // This is the point where this worker thread will not find more strong CLDs/nmethods.
-  // Report this so G1 can synchronize the strong and weak CLDs/nmethods processing.
-  active_strong_roots_scope()->mark_worker_done_with_threads(n_par_threads());
-
-  if (!_process_strong_tasks->is_task_claimed(SH_PS_Universe_oops_do)) {
-    Universe::oops_do(strong_roots);
-  }
-  // Global (strong) JNI handles
-  if (!_process_strong_tasks->is_task_claimed(SH_PS_JNIHandles_oops_do))
-    JNIHandles::oops_do(strong_roots);
-
-  if (!_process_strong_tasks-> is_task_claimed(SH_PS_ObjectSynchronizer_oops_do))
-    ObjectSynchronizer::oops_do(strong_roots);
-  if (!_process_strong_tasks->is_task_claimed(SH_PS_FlatProfiler_oops_do))
-    FlatProfiler::oops_do(strong_roots);
-  if (!_process_strong_tasks->is_task_claimed(SH_PS_Management_oops_do))
-    Management::oops_do(strong_roots);
-  if (!_process_strong_tasks->is_task_claimed(SH_PS_jvmti_oops_do))
-    JvmtiExport::oops_do(strong_roots);
-
-  if (!_process_strong_tasks->is_task_claimed(SH_PS_SystemDictionary_oops_do)) {
-    SystemDictionary::roots_oops_do(strong_roots, weak_roots);
-  }
-
-  // All threads execute the following. A specific chunk of buckets
-  // from the StringTable are the individual tasks.
-  if (weak_roots != NULL) {
-    if (CollectedHeap::use_parallel_gc_threads()) {
-      StringTable::possibly_parallel_oops_do(weak_roots);
-    } else {
-      StringTable::oops_do(weak_roots);
-    }
-  }
-
-  if (!_process_strong_tasks->is_task_claimed(SH_PS_CodeCache_oops_do)) {
-    if (so & SO_ScavengeCodeCache) {
-      assert(code_roots != NULL, "must supply closure for code cache");
-
-      // We only visit parts of the CodeCache when scavenging.
-      CodeCache::scavenge_root_nmethods_do(code_roots);
-    }
-    if (so & SO_AllCodeCache) {
-      assert(code_roots != NULL, "must supply closure for code cache");
-
-      // CMSCollector uses this to do intermediate-strength collections.
-      // We scan the entire code cache, since CodeCache::do_unloading is not called.
-      CodeCache::blobs_do(code_roots);
-    }
-    // Verify that the code cache contents are not subject to
-    // movement by a scavenging collection.
-    DEBUG_ONLY(CodeBlobToOopClosure assert_code_is_non_scavengable(&assert_is_non_scavengable_closure, !CodeBlobToOopClosure::FixRelocations));
-    DEBUG_ONLY(CodeCache::asserted_non_scavengable_nmethods_do(&assert_code_is_non_scavengable));
-  }
-
-  _process_strong_tasks->all_tasks_completed();
-}
-
-void SharedHeap::process_all_roots(bool activate_scope,
-                                   ScanningOption so,
-                                   OopClosure* roots,
-                                   CLDClosure* cld_closure,
-                                   CodeBlobClosure* code_closure) {
-  process_roots(activate_scope, so,
-                roots, roots,
-                cld_closure, cld_closure,
-                code_closure);
-}
-
-void SharedHeap::process_strong_roots(bool activate_scope,
-                                      ScanningOption so,
-                                      OopClosure* roots,
-                                      CLDClosure* cld_closure,
-                                      CodeBlobClosure* code_closure) {
-  process_roots(activate_scope, so,
-                roots, NULL,
-                cld_closure, NULL,
-                code_closure);
-}
-
-
-class AlwaysTrueClosure: public BoolObjectClosure {
-public:
-  bool do_object_b(oop p) { return true; }
-};
-static AlwaysTrueClosure always_true;
-
-void SharedHeap::process_weak_roots(OopClosure* root_closure) {
-  // Global (weak) JNI handles
-  JNIHandles::weak_oops_do(&always_true, root_closure);
-}
-
 void SharedHeap::set_barrier_set(BarrierSet* bs) {
   _barrier_set = bs;
   // Cached barrier set for fast access in oops
--- a/src/share/vm/memory/sharedHeap.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/memory/sharedHeap.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -61,18 +61,18 @@
 //    counts the number of tasks that have been done and then reset
 //    the SubTasksDone so that it can be used again.  When the number of
 //    tasks is set to the number of GC workers, then _n_threads must
-//    be set to the number of active GC workers. G1CollectedHeap,
-//    HRInto_G1RemSet, GenCollectedHeap and SharedHeap have SubTasksDone.
-//    This seems too many.
+//    be set to the number of active GC workers. G1RootProcessor and
+//    GenCollectedHeap have SubTasksDone.
 //    3) SequentialSubTasksDone has an _n_threads that is used in
 //    a way similar to SubTasksDone and has the same dependency on the
 //    number of active GC workers.  CompactibleFreeListSpace and Space
 //    have SequentialSubTasksDone's.
-// Example of using SubTasksDone and SequentialSubTasksDone
-// G1CollectedHeap::g1_process_roots()
-//  to SharedHeap::process_roots() and uses
-//  SubTasksDone* _process_strong_tasks to claim tasks.
-//  process_roots() calls
+//
+// Examples of using SubTasksDone and SequentialSubTasksDone:
+//  G1RootProcessor and GenCollectedHeap::process_roots() use
+//  SubTasksDone* _process_strong_tasks to claim tasks for workers
+//
+//  GenCollectedHeap::gen_process_roots() calls
 //      rem_set()->younger_refs_iterate()
 //  to scan the card table and which eventually calls down into
 //  CardTableModRefBS::par_non_clean_card_iterate_work().  This method
@@ -104,10 +104,6 @@
   friend class VM_GC_Operation;
   friend class VM_CGC_Operation;

-private:
-  // For claiming strong_roots tasks.
-  SubTasksDone* _process_strong_tasks;
-
 protected:
   // There should be only a single instance of "SharedHeap" in a program.
   // This is enforced with the protected constructor below, which will also
@@ -144,7 +140,6 @@
   static SharedHeap* heap() { return _sh; }

   void set_barrier_set(BarrierSet* bs);
-  SubTasksDone* process_strong_tasks() { return _process_strong_tasks; }

   // Does operations required after initialization has been done.
   virtual void post_initialize();
@@ -201,69 +196,19 @@
   // strong_roots_prologue calls change_strong_roots_parity, if
   // parallel tasks are enabled.
   class StrongRootsScope : public MarkingCodeBlobClosure::MarkScope {
-    // Used to implement the Thread work barrier.
-    static Monitor* _lock;
-
     SharedHeap*   _sh;
-    volatile jint _n_workers_done_with_threads;

    public:
     StrongRootsScope(SharedHeap* heap, bool activate = true);
-    ~StrongRootsScope();
-
-    // Mark that this thread is done with the Threads work.
-    void mark_worker_done_with_threads(uint n_workers);
-    // Wait until all n_workers are done with the Threads work.
-    void wait_until_all_workers_done_with_threads(uint n_workers);
   };
   friend class StrongRootsScope;

-  // The current active StrongRootScope
-  StrongRootsScope* _strong_roots_scope;
-
-  StrongRootsScope* active_strong_roots_scope() const;
-
  private:
-  void register_strong_roots_scope(StrongRootsScope* scope);
-  void unregister_strong_roots_scope(StrongRootsScope* scope);
   void change_strong_roots_parity();

  public:
-  enum ScanningOption {
-    SO_None                =  0x0,
-    SO_AllCodeCache        =  0x8,
-    SO_ScavengeCodeCache   = 0x10
-  };
-
   FlexibleWorkGang* workers() const { return _workers; }

-  // Invoke the "do_oop" method the closure "roots" on all root locations.
-  // The "so" argument determines which roots the closure is applied to:
-  // "SO_None" does none;
-  // "SO_AllCodeCache" applies the closure to all elements of the CodeCache.
-  // "SO_ScavengeCodeCache" applies the closure to elements on the scavenge root list in the CodeCache.
-  void process_roots(bool activate_scope,
-                     ScanningOption so,
-                     OopClosure* strong_roots,
-                     OopClosure* weak_roots,
-                     CLDClosure* strong_cld_closure,
-                     CLDClosure* weak_cld_closure,
-                     CodeBlobClosure* code_roots);
-  void process_all_roots(bool activate_scope,
-                         ScanningOption so,
-                         OopClosure* roots,
-                         CLDClosure* cld_closure,
-                         CodeBlobClosure* code_roots);
-  void process_strong_roots(bool activate_scope,
-                            ScanningOption so,
-                            OopClosure* roots,
-                            CLDClosure* cld_closure,
-                            CodeBlobClosure* code_roots);
-
-
-  // Apply "root_closure" to the JNI weak roots..
-  void process_weak_roots(OopClosure* root_closure);
-
   // The functions below are helper functions that a subclass of
   // "SharedHeap" can use in the implementation of its virtual
   // functions.
@@ -278,9 +223,6 @@
   // (such as process roots) subsequently.
   virtual void set_par_threads(uint t);

-  int n_termination();
-  void set_n_termination(int t);
-
   //
   // New methods from CollectedHeap
   //
@@ -292,8 +234,4 @@
                              size_t capacity);
 };

-inline SharedHeap::ScanningOption operator|(SharedHeap::ScanningOption so0, SharedHeap::ScanningOption so1) {
-  return static_cast<SharedHeap::ScanningOption>(static_cast<int>(so0) | static_cast<int>(so1));
-}
-
 #endif // SHARE_VM_MEMORY_SHAREDHEAP_HPP
--- a/src/share/vm/prims/jvmtiImpl.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/prims/jvmtiImpl.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "interpreter/interpreter.hpp"
+#include "interpreter/oopMapCache.hpp"
 #include "jvmtifiles/jvmtiEnv.hpp"
 #include "memory/resourceArea.hpp"
 #include "oops/instanceKlass.hpp"
@@ -744,6 +745,13 @@
 }

 void VM_GetOrSetLocal::doit() {
+  InterpreterOopMap oop_mask;
+  _jvf->method()->mask_for(_jvf->bci(), &oop_mask);
+  if (oop_mask.is_dead(_index)) {
+    // The local can be invalid and uninitialized in the scope of current bci
+    _result = JVMTI_ERROR_INVALID_SLOT;
+    return;
+  }
   if (_set) {
     // Force deoptimization of frame if compiled because it's
     // possible the compiler emitted some locals as constant values,
--- a/src/share/vm/runtime/mutexLocker.cpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/runtime/mutexLocker.cpp	Wed Apr 01 11:31:42 2015 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -119,7 +119,6 @@
 Mutex*   OldSets_lock                 = NULL;
 Monitor* RootRegionScan_lock          = NULL;
 Mutex*   MMUTracker_lock              = NULL;
-Mutex*   HotCardCache_lock            = NULL;

 Monitor* GCTaskManager_lock           = NULL;

@@ -200,7 +199,6 @@
     def(OldSets_lock               , Mutex  , leaf     ,   true );
     def(RootRegionScan_lock        , Monitor, leaf     ,   true );
     def(MMUTracker_lock            , Mutex  , leaf     ,   true );
-    def(HotCardCache_lock          , Mutex  , special  ,   true );
     def(EvacFailureStack_lock      , Mutex  , nonleaf  ,   true );

     def(StringDedupQueue_lock      , Monitor, leaf,        true );
--- a/src/share/vm/runtime/mutexLocker.hpp	Wed Mar 25 11:31:54 2015 -0700
+++ b/src/share/vm/runtime/mutexLocker.hpp	Wed Apr 01 11:31:42 2015 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -137,7 +137,6 @@
 extern Monitor* RootRegionScan_lock;             // used to notify that the CM threads have finished scanning the IM snapshot regions
 extern Mutex*   MMUTracker_lock;                 // protects the MMU
                                                  // tracker data structures
-extern Mutex*   HotCardCache_lock;               // protects the hot card cache

 extern Mutex*   Management_lock;                 // a lock used to serialize JVM management
 extern Monitor* Service_lock;                    // a lock used for service thread operation
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/g1/TestG1TraceReclaimDeadHumongousObjectsAtYoungGC.java	Wed Apr 01 11:31:42 2015 -0700
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestG1TraceReclaimDeadHumongousObjectsAtYoungGC
+ * @bug 8058801
+ * @summary Ensure that the output for a G1TraceReclaimDeadHumongousObjectsAtYoungGC
+ * includes the expected necessary messages.
+ * @key gc
+ * @library /testlibrary
+ */
+
+import com.oracle.java.testlibrary.ProcessTools;
+import com.oracle.java.testlibrary.OutputAnalyzer;
+import java.util.LinkedList;
+
+public class TestG1TraceReclaimDeadHumongousObjectsAtYoungGC {
+  public static void main(String[] args) throws Exception {
+    testGCLogs();
+    testHumongousObjectGCLogs();
+  }
+
+  private static void testGCLogs() throws Exception {
+
+    ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
+                                               "-Xms128M",
+                                               "-Xmx128M",
+                                               "-Xmn16M",
+                                               "-XX:G1HeapRegionSize=1M",
+                                               "-XX:+PrintGC",
+                                               "-XX:+UnlockExperimentalVMOptions",
+                                               "-XX:G1LogLevel=finest",
+                                               "-XX:+G1TraceReclaimDeadHumongousObjectsAtYoungGC",
+                                               GCTest.class.getName());
+
+    OutputAnalyzer output = new OutputAnalyzer(pb.start());
+
+    // As G1ReclaimDeadHumongousObjectsAtYoungGC is set(default), below logs should be displayed.
+    // And GCTest doesn't have humongous objects, so values should be zero.
+    output.shouldContain("[Humongous Reclaim");
+    output.shouldContain("[Humongous Total: 0]");
+    output.shouldContain("[Humongous Candidate: 0]");
+    output.shouldContain("[Humongous Reclaimed: 0]");
+
+    output.shouldHaveExitValue(0);
+  }
+
+  private static void testHumongousObjectGCLogs() throws Exception {
+    ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
+                                               "-Xms128M",
+                                               "-Xmx128M",
+                                               "-Xmn16M",
+                                               "-XX:G1HeapRegionSize=1M",
+                                               "-XX:+PrintGC",
+                                               "-XX:+UnlockExperimentalVMOptions",
+                                               "-XX:G1LogLevel=finest",
+                                               "-XX:+G1TraceReclaimDeadHumongousObjectsAtYoungGC",
+                                               GCWithHumongousObjectTest.class.getName());
+
+    OutputAnalyzer output = new OutputAnalyzer(pb.start());
+
+    // As G1ReclaimDeadHumongousObjectsAtYoungGC is set(default), below logs should be displayed.
+    output.shouldContain("[Humongous Reclaim");
+    output.shouldContain("[Humongous Total");
+    output.shouldContain("[Humongous Candidate");
+    output.shouldContain("[Humongous Reclaimed");
+
+    // As G1TraceReclaimDeadHumongousObjectsAtYoungGC is set and GCWithHumongousObjectTest has humongous objects,
+    // these logs should be displayed.
+    output.shouldContain("Live humongous");
+    output.shouldContain("Reclaim humongous region");
+    output.shouldHaveExitValue(0);
+  }
+
+  static class GCTest {
+    private static byte[] garbage;
+
+    public static void main(String [] args) {
+      System.out.println("Creating garbage");
+      // create 128MB of garbage. This should result in at least one GC
+      for (int i = 0; i < 1024; i++) {
+        garbage = new byte[128 * 1024];
+      }
+      System.out.println("Done");
+    }
+  }
+
+  static class GCWithHumongousObjectTest {
+
+    public static final int M = 1024*1024;
+    public static LinkedList<Object> garbageList = new LinkedList<Object>();
+    // A large object referenced by a static.
+    static int[] filler = new int[10 * M];
+
+    public static void genGarbage() {
+      for (int i = 0; i < 32*1024; i++) {
+        garbageList.add(new int[100]);
+      }
+      garbageList.clear();
+    }
+
+    public static void main(String[] args) {
+
+      int[] large = new int[M];
+      Object ref = large;
+
+      System.out.println("Creating garbage");
+      for (int i = 0; i < 100; i++) {
+        // A large object that will be reclaimed eagerly.
+        large = new int[6*M];
+        genGarbage();
+        // Make sure that the compiler cannot completely remove
+        // the allocation of the large object until here.
+        System.out.println(large);
+      }
+
+      // Keep the reference to the first object alive.
+      System.out.println(ref);
+      System.out.println("Done");
+    }
+  }
+}
--- a/test/gc/g1/TestGCLogMessages.java	Wed Mar 25 11:31:54 2015 -0700
+++ b/test/gc/g1/TestGCLogMessages.java	Wed Apr 01 11:31:42 2015 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015 Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,7 +23,7 @@

 /*
  * @test TestGCLogMessages
- * @bug 8035406 8027295 8035398 8019342 8027959
+ * @bug 8035406 8027295 8035398 8019342 8027959 8027962
  * @summary Ensure that the PrintGCDetails output for a minor GC with G1
  * includes the expected necessary messages.
  * @key gc
@@ -34,128 +34,158 @@
 import com.oracle.java.testlibrary.OutputAnalyzer;

 public class TestGCLogMessages {
-  public static void main(String[] args) throws Exception {
-    testNormalLogs();
-    testWithToSpaceExhaustionLogs();
-  }

-  private static void testNormalLogs() throws Exception {
-
-    ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
-                                                              "-Xmx10M",
-                                                              GCTest.class.getName());
+    private enum Level {
+        OFF, FINER, FINEST;
+        public boolean lessOrEqualTo(Level other) {
+            return this.compareTo(other) < 0;
+        }
+    }

-    OutputAnalyzer output = new OutputAnalyzer(pb.start());
+    private class LogMessageWithLevel {
+        String message;
+        Level level;

-    output.shouldNotContain("[Redirty Cards");
-    output.shouldNotContain("[Parallel Redirty");
-    output.shouldNotContain("[Redirtied Cards");
-    output.shouldNotContain("[Code Root Purge");
-    output.shouldNotContain("[String Dedup Fixup");
-    output.shouldNotContain("[Young Free CSet");
-    output.shouldNotContain("[Non-Young Free CSet");
-    output.shouldNotContain("[Humongous Reclaim");
-    output.shouldHaveExitValue(0);
+        public LogMessageWithLevel(String message, Level level) {
+            this.message = message;
+            this.level = level;
+        }
+    };

-    pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
-                                               "-XX:+UseStringDeduplication",
-                                               "-Xmx10M",
-                                               "-XX:+PrintGCDetails",
-                                               GCTest.class.getName());
-
-    output = new OutputAnalyzer(pb.start());
+    private LogMessageWithLevel allLogMessages[] = new LogMessageWithLevel[] {
+        // Ext Root Scan
+        new LogMessageWithLevel("Thread Roots (ms)", Level.FINEST),
+        new LogMessageWithLevel("StringTable Roots (ms)", Level.FINEST),
+        new LogMessageWithLevel("Universe Roots (ms)", Level.FINEST),
+        new LogMessageWithLevel("JNI Handles Roots (ms)", Level.FINEST),
+        new LogMessageWithLevel("ObjectSynchronizer Roots (ms)", Level.FINEST),
+        new LogMessageWithLevel("FlatProfiler Roots", Level.FINEST),
+        new LogMessageWithLevel("Management Roots", Level.FINEST),
+        new LogMessageWithLevel("SystemDictionary Roots", Level.FINEST),
+        new LogMessageWithLevel("CLDG Roots", Level.FINEST),
+        new LogMessageWithLevel("JVMTI Roots", Level.FINEST),
+        new LogMessageWithLevel("CodeCache Roots", Level.FINEST),
+        new LogMessageWithLevel("SATB Filtering", Level.FINEST),
+        new LogMessageWithLevel("CM RefProcessor Roots", Level.FINEST),
+        new LogMessageWithLevel("Wait For Strong CLD", Level.FINEST),
+        new LogMessageWithLevel("Weak CLD Roots", Level.FINEST),
+        // Redirty Cards
+        new LogMessageWithLevel("Redirty Cards", Level.FINER),
+        new LogMessageWithLevel("Parallel Redirty", Level.FINEST),
+        new LogMessageWithLevel("Redirtied Cards", Level.FINEST),
+        // Misc Top-level
+        new LogMessageWithLevel("Code Root Purge", Level.FINER),
+        new LogMessageWithLevel("String Dedup Fixup", Level.FINER),
+        // Free CSet
+        new LogMessageWithLevel("Young Free CSet", Level.FINEST),
+        new LogMessageWithLevel("Non-Young Free CSet", Level.FINEST),
+        // Humongous Eager Reclaim
+        new LogMessageWithLevel("Humongous Reclaim", Level.FINER),
+    };

-    output.shouldContain("[Redirty Cards");
-    output.shouldNotContain("[Parallel Redirty");
-    output.shouldNotContain("[Redirtied Cards");
-    output.shouldContain("[Code Root Purge");
-    output.shouldContain("[String Dedup Fixup");
-    output.shouldNotContain("[Young Free CSet");
-    output.shouldNotContain("[Non-Young Free CSet");
-    output.shouldContain("[Humongous Reclaim");
-    output.shouldNotContain("[Humongous Total");
-    output.shouldNotContain("[Humongous Candidate");
-    output.shouldNotContain("[Humongous Reclaimed");
-    output.shouldHaveExitValue(0);
+    void checkMessagesAtLevel(OutputAnalyzer output, LogMessageWithLevel messages[], Level level) throws Exception {
+        for (LogMessageWithLevel l : messages) {
+            if (level.lessOrEqualTo(l.level)) {
+                output.shouldNotContain(l.message);
+            } else {
+                output.shouldContain(l.message);
+            }
+        }
+    }

-    pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
-                                               "-XX:+UseStringDeduplication",
-                                               "-Xmx10M",
-                                               "-XX:+PrintGCDetails",
-                                               "-XX:+UnlockExperimentalVMOptions",
-                                               "-XX:G1LogLevel=finest",
-                                               GCTest.class.getName());
+    public static void main(String[] args) throws Exception {
+        new TestGCLogMessages().testNormalLogs();
+        new TestGCLogMessages().testWithToSpaceExhaustionLogs();
+    }
+
+    private void testNormalLogs() throws Exception {

-    output = new OutputAnalyzer(pb.start());
+        ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
+                                                                  "-Xmx10M",
+                                                                  GCTest.class.getName());
+
+        OutputAnalyzer output = new OutputAnalyzer(pb.start());
+        checkMessagesAtLevel(output, allLogMessages, Level.OFF);
+        output.shouldHaveExitValue(0);

-    output.shouldContain("[Redirty Cards");
-    output.shouldContain("[Parallel Redirty");
-    output.shouldContain("[Redirtied Cards");
-    output.shouldContain("[Code Root Purge");
-    output.shouldContain("[String Dedup Fixup");
-    output.shouldContain("[Young Free CSet");
-    output.shouldContain("[Non-Young Free CSet");
-    output.shouldContain("[Humongous Reclaim");
-    output.shouldContain("[Humongous Total");
-    output.shouldContain("[Humongous Candidate");
-    output.shouldContain("[Humongous Reclaimed");
-    output.shouldHaveExitValue(0);
-  }
+        pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
+                                                   "-XX:+UseStringDeduplication",
+                                                   "-Xmx10M",
+                                                   "-XX:+PrintGCDetails",
+                                                   GCTest.class.getName());
+
+        output = new OutputAnalyzer(pb.start());
+        checkMessagesAtLevel(output, allLogMessages, Level.FINER);
+
+        pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
+                                                   "-XX:+UseStringDeduplication",
+                                                   "-Xmx10M",
+                                                   "-XX:+PrintGCDetails",
+                                                   "-XX:+UnlockExperimentalVMOptions",
+                                                   "-XX:G1LogLevel=finest",
+                                                   GCTest.class.getName());

-  private static void testWithToSpaceExhaustionLogs() throws Exception {
-    ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
-                                               "-Xmx10M",
-                                               "-Xmn5M",
-                                               "-XX:+PrintGCDetails",
-                                               GCTestWithToSpaceExhaustion.class.getName());
+        output = new OutputAnalyzer(pb.start());
+        checkMessagesAtLevel(output, allLogMessages, Level.FINEST);
+        output.shouldHaveExitValue(0);
+    }

-    OutputAnalyzer output = new OutputAnalyzer(pb.start());
-    output.shouldContain("[Evacuation Failure");
-    output.shouldNotContain("[Recalculate Used");
-    output.shouldNotContain("[Remove Self Forwards");
-    output.shouldNotContain("[Restore RemSet");
-    output.shouldHaveExitValue(0);
+    LogMessageWithLevel exhFailureMessages[] = new LogMessageWithLevel[] {
+        new LogMessageWithLevel("Evacuation Failure", Level.FINER),
+        new LogMessageWithLevel("Recalculate Used", Level.FINEST),
+        new LogMessageWithLevel("Remove Self Forwards", Level.FINEST),
+        new LogMessageWithLevel("Restore RemSet", Level.FINEST),
+    };

-    pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
-                                               "-Xmx10M",
-                                               "-Xmn5M",
-                                               "-XX:+PrintGCDetails",
-                                               "-XX:+UnlockExperimentalVMOptions",
-                                               "-XX:G1LogLevel=finest",
-                                               GCTestWithToSpaceExhaustion.class.getName());
+    private void testWithToSpaceExhaustionLogs() throws Exception {
+        ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
+                                                                  "-Xmx32M",
+                                                                  "-Xmn16M",
+                                                                  "-XX:+PrintGCDetails",
+                                                                  GCTestWithToSpaceExhaustion.class.getName());
+
+        OutputAnalyzer output = new OutputAnalyzer(pb.start());
+        checkMessagesAtLevel(output, exhFailureMessages, Level.FINER);
+        output.shouldHaveExitValue(0);

-    output = new OutputAnalyzer(pb.start());
-    output.shouldContain("[Evacuation Failure");
-    output.shouldContain("[Recalculate Used");
-    output.shouldContain("[Remove Self Forwards");
-    output.shouldContain("[Restore RemSet");
-    output.shouldHaveExitValue(0);
-  }
+        pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC",
+                                                   "-Xmx32M",
+                                                   "-Xmn16M",
+                                                   "-XX:+PrintGCDetails",
+                                                   "-XX:+UnlockExperimentalVMOptions",
+                                                   "-XX:G1LogLevel=finest",
+                                                   GCTestWithToSpaceExhaustion.class.getName());

-  static class GCTest {
-    private static byte[] garbage;
-    public static void main(String [] args) {
-      System.out.println("Creating garbage");
-      // create 128MB of garbage. This should result in at least one GC
-      for (int i = 0; i < 1024; i++) {
-        garbage = new byte[128 * 1024];
-      }
-      System.out.println("Done");
+        output = new OutputAnalyzer(pb.start());
+        checkMessagesAtLevel(output, exhFailureMessages, Level.FINEST);
+        output.shouldHaveExitValue(0);
     }
-  }
+
+    static class GCTest {
+        private static byte[] garbage;
+        public static void main(String [] args) {
+            System.out.println("Creating garbage");
+            // create 128MB of garbage. This should result in at least one GC
+            for (int i = 0; i < 1024; i++) {
+                garbage = new byte[128 * 1024];
+            }
+            System.out.println("Done");
+        }
+    }

-  static class GCTestWithToSpaceExhaustion {
-    private static byte[] garbage;
-    private static byte[] largeObject;
-    public static void main(String [] args) {
-      largeObject = new byte[5*1024*1024];
-      System.out.println("Creating garbage");
-      // create 128MB of garbage. This should result in at least one GC,
-      // some of them with to-space exhaustion.
-      for (int i = 0; i < 1024; i++) {
-        garbage = new byte[128 * 1024];
-      }
-      System.out.println("Done");
+    static class GCTestWithToSpaceExhaustion {
+        private static byte[] garbage;
+        private static byte[] largeObject;
+        public static void main(String [] args) {
+            largeObject = new byte[16*1024*1024];
+            System.out.println("Creating garbage");
+            // create 128MB of garbage. This should result in at least one GC,
+            // some of them with to-space exhaustion.
+            for (int i = 0; i < 1024; i++) {
+                garbage = new byte[128 * 1024];
+            }
+            System.out.println("Done");
+        }
     }
-  }
 }
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/handlerInTry/HandlerInTry.jasm	Wed Apr 01 11:31:42 2015 -0700
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * HandlerInTry contains a try block in a ctor whose handler is inside
+ * the same try block.  The try block starts at line 74 (try t2;), ends at
+ * line 106 (endtry t2;), but its handler starts at line 101 (catch t2 #0;).
+ */
+super public class HandlerInTry
+    version 51:0
+{
+
+public static final synthetic Field ___transactionFactory_2002349702336125:"Ljava/lang/Object;";
+
+public Method "<init>":"(Ljava/lang/Object;)V"
+    stack 5 locals 5
+{
+        invokestatic    Method ThreadLocalTransaction.getThreadLocalTransaction:"()Ljava/lang/Object;";
+        checkcast    class java/lang/Object;
+        astore_2;
+        aload_2;
+        invokestatic    Method TransactionLogicDonor.isActiveTransaction:"(Ljava/lang/Object;)Z";
+        ifeq    L21;
+        aload_0;
+        aload_1;
+        aload_2;
+        invokespecial    Method "<init>":"(Ljava/lang/Object;Ljava/lang/Object;)V";
+        return;
+    L21:    stack_frame_type append;
+        locals_map class java/lang/Object;
+        aload_2;
+        getstatic    Field ___transactionFactory_2002349702336125:"Ljava/lang/Object;";
+        invokestatic    Method TransactionLogicDonor.createTransaction:"(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;";
+        astore_2;
+        aload_2;
+        iconst_1;
+        pop;
+        aload_2;
+        invokestatic    Method ThreadLocalTransaction.setThreadLocalTransaction:"(Ljava/lang/Object;)V";
+        try t0, t1;
+        aload_0;
+        aload_1;
+        aload_2;
+        invokespecial    Method "<init>":"(Ljava/lang/Object;Ljava/lang/Object;)V";
+        aload_2;
+        pop;
+        aconst_null;
+        astore_2;
+        endtry t0, t1;
+        invokestatic    Method ThreadLocalTransaction.clearThreadLocalTransaction:"()V";
+        pop;
+        goto    L107;
+        catch t0 java/lang/Throwable;
+        try t2;
+        stack_frame_type full;
+        locals_map bogus, class java/lang/Object, class java/lang/Object;
+        stack_map class java/lang/Throwable;
+        astore_3;
+        aload_2;
+        pop;
+        aload_3;
+        instanceof    class ControlFlowError;
+        ifeq    L82;
+        new    class java/lang/NullPointerException;
+        dup;
+        invokespecial    Method java/lang/NullPointerException."<init>":"()V";
+        athrow;
+    L82:    stack_frame_type append;
+        locals_map class java/lang/Throwable;
+        aload_3;
+        instanceof    class java/lang/Error;
+        ifeq    L94;
+        aload_3;
+        checkcast    class java/lang/Error;
+        athrow;
+    L94:    stack_frame_type same;
+        aload_3;
+        checkcast    class java/lang/Exception;
+        athrow;
+        catch t1 #0;
+        catch t2 #0;
+        stack_frame_type full;
+        locals_map bogus, class java/lang/Object, class java/lang/Object;
+        stack_map class java/lang/Throwable;
+        astore    4;
+        endtry t2;
+        invokestatic    Method ThreadLocalTransaction.clearThreadLocalTransaction:"()V";
+        aload    4;
+        athrow;
+    L107:    stack_frame_type full;
+        locals_map class HandlerInTry, class java/lang/Object, null;
+        return;
+}
+
+} // end Class HandlerInTry
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/handlerInTry/IsolatedHandlerInTry.jasm	Wed Apr 01 11:31:42 2015 -0700
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * IsolatedHandlerInTry contains a try block in a ctor whose handler is inside
+ * the same try block but the handler can only be reached if an exception
+ * occurs.  The handler does a return.  So, a VerifyException should be thrown.
+ * The try block starts at line 77 (try t2;) and ends at line 113 (endtry t2;).
+ * Its handler starts at line 107 (catch t2 #0;).  The handler can only be reached
+ * by exception because of the athrow at line 106.
+ */
+super public class IsolatedHandlerInTry
+    version 51:0
+{
+
+public static final synthetic Field ___transactionFactory_2002349702336125:"Ljava/lang/Object;";
+
+public Method "<init>":"(Ljava/lang/Object;)V"
+    stack 5 locals 5
+{
+        invokestatic    Method ThreadLocalTransaction.getThreadLocalTransaction:"()Ljava/lang/Object;";
+        checkcast    class java/lang/Object;
+        astore_2;
+        aload_2;
+        invokestatic    Method TransactionLogicDonor.isActiveTransaction:"(Ljava/lang/Object;)Z";
+        ifeq    L21;
+        aload_0;
+        aload_1;
+        aload_2;
+        invokespecial    Method "<init>":"(Ljava/lang/Object;Ljava/lang/Object;)V";
+        return;
+    L21:    stack_frame_type append;
+        locals_map class java/lang/Object;
+        aload_2;
+        getstatic    Field ___transactionFactory_2002349702336125:"Ljava/lang/Object;";
+        invokestatic    Method TransactionLogicDonor.createTransaction:"(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;";
+        astore_2;
+        aload_2;
+        iconst_1;
+        pop;
+        aload_2;
+        invokestatic    Method ThreadLocalTransaction.setThreadLocalTransaction:"(Ljava/lang/Object;)V";
+        try t0, t1;
+        aload_0;
+        aload_1;
+        aload_2;
+        invokespecial    Method "<init>":"(Ljava/lang/Object;Ljava/lang/Object;)V";
+        aload_2;
+        pop;
+        aconst_null;
+        astore_2;
+        endtry t0, t1;
+        invokestatic    Method ThreadLocalTransaction.clearThreadLocalTransaction:"()V";
+        pop;
+        goto    L107;
+        catch t0 java/lang/Throwable;
+        try t2;
+        stack_frame_type full;
+        locals_map bogus, class java/lang/Object, class java/lang/Object;
+        stack_map class java/lang/Throwable;
+        astore_3;
+        aload_2;
+        pop;
+        aload_3;
+        instanceof    class ControlFlowError;
+        ifeq    L82;
+        new    class java/lang/NullPointerException;
+        dup;
+        invokespecial    Method java/lang/NullPointerException."<init>":"()V";
+        athrow;
+    L82:    stack_frame_type append;
+        locals_map class java/lang/Throwable;
+        aload_3;
+        instanceof    class java/lang/Error;
+        ifeq    L94;
+        aload_3;
+        checkcast    class java/lang/Error;
+        athrow;
+    L94:    stack_frame_type same;
+        aload_3;
+        checkcast    class java/lang/Exception;
+        catch t1 #0;
+        stack_frame_type full;
+        locals_map bogus, class java/lang/Object, class java/lang/Object;
+        stack_map class java/lang/Throwable;
+        athrow;
+        catch t2 #0;
+        stack_frame_type full;
+        locals_map bogus, class java/lang/Object, class java/lang/Object;
+        stack_map class java/lang/Throwable;
+        astore    4;
+        return;
+        endtry t2;
+        stack_frame_type full;
+        locals_map bogus, class java/lang/Object, class java/lang/Object, class java/lang/Object;
+        stack_map class java/lang/Throwable;
+        invokestatic    Method ThreadLocalTransaction.clearThreadLocalTransaction:"()V";
+        athrow;
+    L107:    stack_frame_type full;
+        locals_map class IsolatedHandlerInTry, class java/lang/Object, null;
+        return;
+}
+
+} // end Class IsolatedHandlerInTry
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/handlerInTry/LoadHandlerInTry.java	Wed Apr 01 11:31:42 2015 -0700
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8075118
+ * @summary Allow a ctor to call super() from a switch bytecode.
+ * @compile HandlerInTry.jasm
+ * @compile IsolatedHandlerInTry.jasm
+ * @run main/othervm -Xverify:all LoadHandlerInTry
+ */
+
+/*
+ * This test has two cases:
+ *
+ * 1. class HandlerInTry:  Class HandlerInTry contains a TRY block in a
+ *    constructor whose handler is inside the same TRY block.  The last
+ *    few bytecodes and exception table look like this:
+ *
+ *         ...
+ *      87: athrow
+ *      88: astore        4
+ *      90: invokestatic  #9
+ *      93: aload         4
+ *      95: athrow
+ *      96: return
+ *    Exception table:
+ *       from    to  target type
+ *          36    46    53   Class java/lang/Throwable
+ *          36    46    88   any
+ *          53    90    88   any
+ *
+ * Note that the target for the third handler in the Exception table is
+ * inside its TRY block.
+ * Without the fix for bug JDK-8075118, this test will time out.
+ *
+ *
+ * 2. class IsolatedHandlerInTry: Class IsolatedHandlerInTry also contains
+ *    a TRY block in a constructoer whose handler is inside its TRY block.
+ *    But the handler is only reachable if an exception is thrown.  The
+ *    handler's bytecodes will not get parsed as part of parsing the TRY
+ *    block.  They will only get parsed as a handler for the TRY block.
+ *    Since the isolated handler does a 'return', a VerifyError exception
+ *    should get thrown.
+ */
+
+public class LoadHandlerInTry {
+
+    public static void main(String[] args) throws Exception {
+        System.out.println("Regression test for bug 8075118");
+        try {
+            Class newClass = Class.forName("HandlerInTry");
+        } catch (Exception e) {
+            System.out.println("Failed: Exception was thrown: " + e.toString());
+            throw e;
+        }
+
+        try {
+            Class newClass = Class.forName("IsolatedHandlerInTry");
+            throw new RuntimeException(
+                 "Failed to throw VerifyError for IsolatedHandlerInTry");
+        } catch (java.lang.VerifyError e) {
+            System.out.println("Passed: VerifyError exception was thrown");
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/stackMapCheck/BadMap.jasm	Wed Apr 01 11:31:42 2015 -0700
@@ -0,0 +1,152 @@
+ /*
+  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+  * under the terms of the GNU General Public License version 2 only, as
+  * published by the Free Software Foundation.
+  *
+  * This code is distributed in the hope that it will be useful, but WITHOUT
+  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  * version 2 for more details (a copy is included in the LICENSE file that
+  * accompanied this code).
+  *
+  * You should have received a copy of the GNU General Public License version
+  * 2 along with this work; if not, write to the Free Software Foundation,
+  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+  *
+  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+  * or visit www.oracle.com if you need additional information or have any
+  * questions.
+  *
+  */
+
+/*
+ * This class should throw VerifyError because the StackMap for bytecode index
+ * 45 (astore_2, line 123) is incorrect. The stack maps for bytecode indexes 45
+ * and 49 (astore, line 133) do not match because 45 does not supply enough
+ * locals to satisfy 49.
+ *
+ * The astore_2 bytecode at bytecode index 45 changes the type state,
+ * preventing the stackmap mismatch.  But, if the incoming type state is used,
+ * as required by JVM Spec 8, then the verifier will detected the stackmap
+ * mismatch, and throw VerifyError.
+ */
+
+super public class BadMap
+    version 51:0
+{
+
+
+public Method "<init>":"()V"
+    stack 1 locals 1
+{
+        aload_0;
+        invokespecial    Method java/lang/Object."<init>":"()V";
+        return;
+}
+
+public static Method main:"([Ljava/lang/String;)V"
+    throws java/lang/Throwable
+    stack 0 locals 1
+{
+        return;
+}
+
+public static Method foo:"()V"
+    stack 3 locals 5
+{
+        iconst_0;
+        ifne    L5;
+        nop;
+        try t7;
+    L5:    stack_frame_type full;
+        aconst_null;
+        dup;
+        astore_0;
+        astore_1;
+        try t0;
+        aconst_null;
+        astore_0;
+        endtry t0;
+        goto    L19;
+        catch t0 java/io/IOException;
+        stack_frame_type full;
+        locals_map class java/lang/Object, null;
+        stack_map class java/io/IOException;
+        astore_2;
+        aconst_null;
+        dup;
+        astore_1;
+        astore_0;
+        try t1;
+    L19:    stack_frame_type full;
+        locals_map class java/lang/Object, class java/lang/Object;
+        aconst_null;
+        astore_2;
+        endtry t1;
+        aload_1;
+        ifnonnull    L37;
+        nop;
+        goto    L37;
+        catch t1 #0;
+        catch t2 #0;
+        try t2;
+        stack_frame_type full;
+        locals_map class java/lang/Object, class java/lang/Object;
+        stack_map class java/lang/Throwable;
+        astore_3;
+        endtry t2;
+        aload_1;
+        ifnonnull    L35;
+        nop;
+    L35:    stack_frame_type full;
+        locals_map class java/lang/Object, class java/lang/Object, bogus, class java/lang/Throwable;
+        aload_3;
+        athrow;
+        try t3, t4;
+    L37:    stack_frame_type full;
+        locals_map class java/lang/Object, class java/lang/Object, class java/lang/Object;
+        aload_1;
+        ifnonnull    L42;
+        nop;
+        endtry t3, t4;
+    L42:    stack_frame_type full;
+        locals_map class java/lang/Object, class java/lang/Object, class java/lang/Object;
+        goto    L54;
+        catch t3 java/lang/Exception;
+        try t5;
+        stack_frame_type full;
+        locals_map class java/lang/Object, class java/lang/Object;
+        stack_map class java/lang/Exception;
+        astore_2;   // astore_2, at bci 45, that changes the type state.
+        endtry t5;
+        goto    L54;
+        catch t4 #0;
+        catch t5 #0;
+        catch t6 #0;
+        try t6;
+        stack_frame_type full;
+        locals_map class java/lang/Object, class java/lang/Object, class java/lang/Object;
+        stack_map class java/lang/Throwable;
+        astore    4;
+        endtry t6;
+        aload    4;
+        athrow;
+    L54:    stack_frame_type full;
+        locals_map class java/lang/Object, class java/lang/Object, class java/lang/Object;
+        goto    L57;
+    L57:    stack_frame_type full;
+        locals_map class java/lang/Object, class java/lang/Object, class java/lang/Object;
+        nop;
+        endtry t7;
+        return;
+        catch t7 #0;
+        stack_frame_type full;
+        stack_map class java/lang/Throwable;
+        nop;
+        athrow;
+}
+
+} // end Class BadMap
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/stackMapCheck/BadMapDstore.jasm	Wed Apr 01 11:31:42 2015 -0700
@@ -0,0 +1,79 @@
+ /*
+  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+  * under the terms of the GNU General Public License version 2 only, as
+  * published by the Free Software Foundation.
+  *
+  * This code is distributed in the hope that it will be useful, but WITHOUT
+  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  * version 2 for more details (a copy is included in the LICENSE file that
+  * accompanied this code).
+  *
+  * You should have received a copy of the GNU General Public License version
+  * 2 along with this work; if not, write to the Free Software Foundation,
+  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+  *
+  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+  * or visit www.oracle.com if you need additional information or have any
+  * questions.
+  *
+  */
+
+/*
+ * This class should throw VerifyError because the StackMap for bytecode index
+ * 9 (dstore_2, line 60) is incorrect. The stack maps for bytecode indexes 9
+ * and 18 (astore_2, line 70) do not match because 9 does not supply enough
+ * locals to satisfy 18.
+ *
+ * The dstore_2 bytecode at bytecode index 9 changes the type state,
+ * preventing the stackmap mismatch.  But, if the incoming type state is used,
+ * as required by JVM Spec 8, then the verifier will detected the stackmap
+ * mismatch, and throw VerifyError.
+ */
+
+super public class BadMapDstore
+    version 51:0
+{
+
+Field blah:I;
+
+public Method "<init>":"()V"
+    stack 1 locals 1
+{
+        aload_0;
+        invokespecial    Method java/lang/Object."<init>":"()V";
+        return;
+}
+
+public static Method main:"([Ljava/lang/String;)V"
+    stack 4 locals 4
+{
+        new    class BadMapDstore;
+        dup;
+        invokespecial    Method "<init>":"()V";
+        astore_1;
+        dconst_1;
+        try t0;
+        dstore_2;
+        aload_1;
+        iconst_5;
+        putfield    Field blah:"I";
+        endtry t0;
+        goto    L22;
+        catch t0 java/lang/Throwable;
+        stack_frame_type full;
+        locals_map class "[Ljava/lang/String;", class BadMapDstore, double;
+        stack_map class java/lang/Throwable;
+        astore_2;
+        aload_1;
+        dconst_0;
+        dstore_2;
+        pop;
+    L22:    stack_frame_type same;
+        return;
+}
+
+} // end Class BadMapDstore
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/stackMapCheck/BadMapIstore.jasm	Wed Apr 01 11:31:42 2015 -0700
@@ -0,0 +1,79 @@
+ /*
+  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+  * under the terms of the GNU General Public License version 2 only, as
+  * published by the Free Software Foundation.
+  *
+  * This code is distributed in the hope that it will be useful, but WITHOUT
+  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  * version 2 for more details (a copy is included in the LICENSE file that
+  * accompanied this code).
+  *
+  * You should have received a copy of the GNU General Public License version
+  * 2 along with this work; if not, write to the Free Software Foundation,
+  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+  *
+  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+  * or visit www.oracle.com if you need additional information or have any
+  * questions.
+  *
+  */
+
+/*
+ * This class should throw VerifyError because the StackMap for bytecode index
+ * 9 (istore_2, line 60) is incorrect. The stack maps for bytecode indexes 9
+ * and 18 (astore_2, line 70) do not match because 9 does not supply enough
+ * locals to satisfy 18.
+ *
+ * The istore_2 bytecode at bytecode index 9 changes the type state,
+ * preventing the stackmap mismatch.  But, if the incoming type state is used,
+ * as required by JVM Spec 8, then the verifier will detected the stackmap
+ * mismatch, and throw VerifyError.
+ */
+
+super public class BadMapIstore
+    version 51:0
+{
+
+Field blah:I;
+
+public Method "<init>":"()V"
+    stack 1 locals 1
+{
+        aload_0;
+        invokespecial    Method java/lang/Object."<init>":"()V";
+        return;
+}
+
+public static Method main:"([Ljava/lang/String;)V"
+    stack 2 locals 3
+{
+        new    class BadMapIstore;
+        dup;
+        invokespecial    Method "<init>":"()V";
+        astore_1;
+        iconst_2;
+        try t0;
+        istore_2;
+        aload_1;
+        iconst_5;
+        putfield    Field blah:"I";
+        endtry t0;
+        goto    L22;
+        catch t0 java/lang/Throwable;
+        stack_frame_type full;
+        locals_map class "[Ljava/lang/String;", class BadMapIstore, int;
+        stack_map class java/lang/Throwable;
+        astore_2;
+        aload_1;
+        iconst_4;
+        istore_2;
+        pop;
+    L22:    stack_frame_type same;
+        return;
+}
+
+} // end Class BadMapIstore
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/stackMapCheck/StackMapCheck.java	Wed Apr 01 11:31:42 2015 -0700
@@ -0,0 +1,63 @@
+ /*
+  * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+  * under the terms of the GNU General Public License version 2 only, as
+  * published by the Free Software Foundation.
+  *
+  * This code is distributed in the hope that it will be useful, but WITHOUT
+  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+  * version 2 for more details (a copy is included in the LICENSE file that
+  * accompanied this code).
+  *
+  * You should have received a copy of the GNU General Public License version
+  * 2 along with this work; if not, write to the Free Software Foundation,
+  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+  *
+  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+  * or visit www.oracle.com if you need additional information or have any
+  * questions.
+  *
+  */
+
+/*
+ * @test
+ * @bug 7127066
+ * @summary Class verifier accepts an invalid class file
+ * @compile BadMap.jasm
+ * @compile BadMapDstore.jasm
+ * @compile BadMapIstore.jasm
+ * @run main/othervm -Xverify:all StackMapCheck
+ */
+
+public class StackMapCheck {
+    public static void main(String args[]) throws Throwable {
+
+        System.out.println("Regression test for bug 7127066");
+        try {
+            Class newClass = Class.forName("BadMap");
+            throw new RuntimeException(
+                "StackMapCheck failed, BadMap did not throw VerifyError");
+        } catch (java.lang.VerifyError e) {
+            System.out.println("BadMap passed, VerifyError was thrown");
+        }
+
+        try {
+            Class newClass = Class.forName("BadMapDstore");
+            throw new RuntimeException(
+                "StackMapCheck failed, BadMapDstore did not throw VerifyError");
+        } catch (java.lang.VerifyError e) {
+            System.out.println("BadMapDstore passed, VerifyError was thrown");
+        }
+
+        try {
+            Class newClass = Class.forName("BadMapIstore");
+            throw new RuntimeException(
+                "StackMapCheck failed, BadMapIstore did not throw VerifyError");
+        } catch (java.lang.VerifyError e) {
+            System.out.println("BadMapIstore passed, VerifyError was thrown");
+        }
+    }
+}