changeset 2324:0ac769a57c64

6627983: G1: Bad oop deference during marking Summary: Bulk zeroing reduction didn't work with G1, because arraycopy would call pre-barriers on uninitialized oops. The solution is to have version of arraycopy stubs that don't have pre-barriers. Also refactored arraycopy stubs generation on SPARC to be more readable and reduced the number of stubs necessary in some cases. Reviewed-by: jrose, kvn, never
author iveresov
date Tue, 01 Mar 2011 14:56:48 -0800
parents bc6b27fb3568
children 8c9c9ee30d71
files src/cpu/sparc/vm/stubGenerator_sparc.cpp src/cpu/x86/vm/stubGenerator_x86_32.cpp src/cpu/x86/vm/stubGenerator_x86_64.cpp src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp src/share/vm/memory/barrierSet.cpp src/share/vm/memory/barrierSet.hpp src/share/vm/opto/library_call.cpp src/share/vm/runtime/arguments.cpp src/share/vm/runtime/stubRoutines.cpp src/share/vm/runtime/stubRoutines.hpp
diffstat 10 files changed, 356 insertions(+), 208 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Tue Mar 01 10:27:15 2011 -0800
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Tue Mar 01 14:56:48 2011 -0800
@@ -1033,31 +1033,40 @@
   //
   //  The input registers are overwritten.
   //
-  void gen_write_ref_array_pre_barrier(Register addr, Register count) {
+  void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
     BarrierSet* bs = Universe::heap()->barrier_set();
-    if (bs->has_write_ref_pre_barrier()) {
-      assert(bs->has_write_ref_array_pre_opt(),
-             "Else unsupported barrier set.");
-
-      __ save_frame(0);
-      // Save the necessary global regs... will be used after.
-      if (addr->is_global()) {
-        __ mov(addr, L0);
-      }
-      if (count->is_global()) {
-        __ mov(count, L1);
-      }
-      __ mov(addr->after_save(), O0);
-      // Get the count into O1
-      __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
-      __ delayed()->mov(count->after_save(), O1);
-      if (addr->is_global()) {
-        __ mov(L0, addr);
-      }
-      if (count->is_global()) {
-        __ mov(L1, count);
-      }
-      __ restore();
+    switch (bs->kind()) {
+      case BarrierSet::G1SATBCT:
+      case BarrierSet::G1SATBCTLogging:
+        // With G1, don't generate the call if we statically know that the target in uninitialized
+        if (!dest_uninitialized) {
+          __ save_frame(0);
+          // Save the necessary global regs... will be used after.
+          if (addr->is_global()) {
+            __ mov(addr, L0);
+          }
+          if (count->is_global()) {
+            __ mov(count, L1);
+          }
+          __ mov(addr->after_save(), O0);
+          // Get the count into O1
+          __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
+          __ delayed()->mov(count->after_save(), O1);
+          if (addr->is_global()) {
+            __ mov(L0, addr);
+          }
+          if (count->is_global()) {
+            __ mov(L1, count);
+          }
+          __ restore();
+        }
+        break;
+      case BarrierSet::CardTableModRef:
+      case BarrierSet::CardTableExtension:
+      case BarrierSet::ModRef:
+        break;
+      default:
+        ShouldNotReachHere();
     }
   }
   //
@@ -1071,7 +1080,7 @@
   //  The input registers are overwritten.
   //
   void gen_write_ref_array_post_barrier(Register addr, Register count,
-                                   Register tmp) {
+                                        Register tmp) {
     BarrierSet* bs = Universe::heap()->barrier_set();
 
     switch (bs->kind()) {
@@ -2406,7 +2415,7 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
 
-    assert(!aligned, "usage");
+    assert(aligned, "Should always be aligned");
 
     assert_clean_int(O2, O3);     // Make sure 'count' is clean int.
 
@@ -2435,7 +2444,8 @@
   //      to:    O1
   //      count: O2 treated as signed
   //
-  address generate_disjoint_oop_copy(bool aligned, address *entry, const char *name) {
+  address generate_disjoint_oop_copy(bool aligned, address *entry, const char *name,
+                                     bool dest_uninitialized = false) {
 
     const Register from  = O0;  // source array address
     const Register to    = O1;  // destination array address
@@ -2456,7 +2466,7 @@
     // save arguments for barrier generation
     __ mov(to, G1);
     __ mov(count, G5);
-    gen_write_ref_array_pre_barrier(G1, G5);
+    gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);
   #ifdef _LP64
     assert_clean_int(count, O3);     // Make sure 'count' is clean int.
     if (UseCompressedOops) {
@@ -2486,7 +2496,8 @@
   //      count: O2 treated as signed
   //
   address generate_conjoint_oop_copy(bool aligned, address nooverlap_target,
-                                     address *entry, const char *name) {
+                                     address *entry, const char *name,
+                                     bool dest_uninitialized = false) {
 
     const Register from  = O0;  // source array address
     const Register to    = O1;  // destination array address
@@ -2509,7 +2520,7 @@
     // save arguments for barrier generation
     __ mov(to, G1);
     __ mov(count, G5);
-    gen_write_ref_array_pre_barrier(G1, G5);
+    gen_write_ref_array_pre_barrier(G1, G5, dest_uninitialized);
 
   #ifdef _LP64
     if (UseCompressedOops) {
@@ -2578,7 +2589,7 @@
   //      ckval: O4 (super_klass)
   //      ret:   O0 zero for success; (-1^K) where K is partial transfer count
   //
-  address generate_checkcast_copy(const char *name, address *entry) {
+  address generate_checkcast_copy(const char *name, address *entry, bool dest_uninitialized = false) {
 
     const Register O0_from   = O0;      // source array address
     const Register O1_to     = O1;      // destination array address
@@ -2624,8 +2635,7 @@
       // caller can pass a 64-bit byte count here (from generic stub)
       BLOCK_COMMENT("Entry:");
     }
-
-    gen_write_ref_array_pre_barrier(O1_to, O2_count);
+    gen_write_ref_array_pre_barrier(O1_to, O2_count, dest_uninitialized);
 
     Label load_element, store_element, do_card_marks, fail, done;
     __ addcc(O2_count, 0, G1_remain);   // initialize loop index, and test it
@@ -3083,56 +3093,104 @@
     address entry_jlong_arraycopy;
     address entry_checkcast_arraycopy;
 
-    StubRoutines::_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(false, &entry,
-                                                                           "jbyte_disjoint_arraycopy");
-    StubRoutines::_jbyte_arraycopy           = generate_conjoint_byte_copy(false, entry, &entry_jbyte_arraycopy,
-                                                                           "jbyte_arraycopy");
-    StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry,
-                                                                            "jshort_disjoint_arraycopy");
-    StubRoutines::_jshort_arraycopy          = generate_conjoint_short_copy(false, entry, &entry_jshort_arraycopy,
-                                                                            "jshort_arraycopy");
-    StubRoutines::_jint_disjoint_arraycopy   = generate_disjoint_int_copy(false, &entry,
-                                                                          "jint_disjoint_arraycopy");
-    StubRoutines::_jint_arraycopy            = generate_conjoint_int_copy(false, entry, &entry_jint_arraycopy,
-                                                                          "jint_arraycopy");
-    StubRoutines::_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(false, &entry,
-                                                                           "jlong_disjoint_arraycopy");
-    StubRoutines::_jlong_arraycopy           = generate_conjoint_long_copy(false, entry, &entry_jlong_arraycopy,
-                                                                           "jlong_arraycopy");
-    StubRoutines::_oop_disjoint_arraycopy    = generate_disjoint_oop_copy(false, &entry,
-                                                                          "oop_disjoint_arraycopy");
-    StubRoutines::_oop_arraycopy             = generate_conjoint_oop_copy(false, entry, &entry_oop_arraycopy,
-                                                                          "oop_arraycopy");
-
-
-    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(true, &entry,
-                                                                                   "arrayof_jbyte_disjoint_arraycopy");
-    StubRoutines::_arrayof_jbyte_arraycopy           = generate_conjoint_byte_copy(true, entry, NULL,
-                                                                                   "arrayof_jbyte_arraycopy");
-
+    //*** jbyte
+    // Always need aligned and unaligned versions
+    StubRoutines::_jbyte_disjoint_arraycopy         = generate_disjoint_byte_copy(false, &entry,
+                                                                                  "jbyte_disjoint_arraycopy");
+    StubRoutines::_jbyte_arraycopy                  = generate_conjoint_byte_copy(false, entry,
+                                                                                  &entry_jbyte_arraycopy,
+                                                                                  "jbyte_arraycopy");
+    StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry,
+                                                                                  "arrayof_jbyte_disjoint_arraycopy");
+    StubRoutines::_arrayof_jbyte_arraycopy          = generate_conjoint_byte_copy(true, entry, NULL,
+                                                                                  "arrayof_jbyte_arraycopy");
+
+    //*** jshort
+    // Always need aligned and unaligned versions
+    StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, &entry,
+                                                                                    "jshort_disjoint_arraycopy");
+    StubRoutines::_jshort_arraycopy                  = generate_conjoint_short_copy(false, entry,
+                                                                                    &entry_jshort_arraycopy,
+                                                                                    "jshort_arraycopy");
     StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
                                                                                     "arrayof_jshort_disjoint_arraycopy");
     StubRoutines::_arrayof_jshort_arraycopy          = generate_conjoint_short_copy(true, entry, NULL,
                                                                                     "arrayof_jshort_arraycopy");
 
-    StubRoutines::_arrayof_jint_disjoint_arraycopy   = generate_disjoint_int_copy(true, &entry,
-                                                                                  "arrayof_jint_disjoint_arraycopy");
+    //*** jint
+    // Aligned versions
+    StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry,
+                                                                                "arrayof_jint_disjoint_arraycopy");
+    StubRoutines::_arrayof_jint_arraycopy          = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
+                                                                                "arrayof_jint_arraycopy");
 #ifdef _LP64
-    // since sizeof(jint) < sizeof(HeapWord), there's a different flavor:
-    StubRoutines::_arrayof_jint_arraycopy     = generate_conjoint_int_copy(true, entry, NULL, "arrayof_jint_arraycopy");
-  #else
-    StubRoutines::_arrayof_jint_arraycopy     = StubRoutines::_jint_arraycopy;
+    // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
+    // entry_jint_arraycopy always points to the unaligned version (notice that we overwrite it).
+    StubRoutines::_jint_disjoint_arraycopy         = generate_disjoint_int_copy(false, &entry,
+                                                                                "jint_disjoint_arraycopy");
+    StubRoutines::_jint_arraycopy                  = generate_conjoint_int_copy(false, entry,
+                                                                                &entry_jint_arraycopy,
+                                                                                "jint_arraycopy");
+#else
+    // In 32 bit jints are always HeapWordSize aligned, so always use the aligned version
+    // (in fact in 32bit we always have a pre-loop part even in the aligned version,
+    //  because it uses 64-bit loads/stores, so the aligned flag is actually ignored).
+    StubRoutines::_jint_disjoint_arraycopy = StubRoutines::_arrayof_jint_disjoint_arraycopy;
+    StubRoutines::_jint_arraycopy          = StubRoutines::_arrayof_jint_arraycopy;
 #endif
 
-    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = generate_disjoint_long_copy(true, NULL,
-                                                                                   "arrayof_jlong_disjoint_arraycopy");
-    StubRoutines::_arrayof_oop_disjoint_arraycopy    =  generate_disjoint_oop_copy(true, NULL,
-                                                                                   "arrayof_oop_disjoint_arraycopy");
-
-    StubRoutines::_arrayof_jlong_arraycopy    = StubRoutines::_jlong_arraycopy;
-    StubRoutines::_arrayof_oop_arraycopy      = StubRoutines::_oop_arraycopy;
-
-    StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
+
+    //*** jlong
+    // It is always aligned
+    StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry,
+                                                                                  "arrayof_jlong_disjoint_arraycopy");
+    StubRoutines::_arrayof_jlong_arraycopy          = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
+                                                                                  "arrayof_jlong_arraycopy");
+    StubRoutines::_jlong_disjoint_arraycopy         = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
+    StubRoutines::_jlong_arraycopy                  = StubRoutines::_arrayof_jlong_arraycopy;
+
+
+    //*** oops
+    // Aligned versions
+    StubRoutines::_arrayof_oop_disjoint_arraycopy        = generate_disjoint_oop_copy(true, &entry,
+                                                                                      "arrayof_oop_disjoint_arraycopy");
+    StubRoutines::_arrayof_oop_arraycopy                 = generate_conjoint_oop_copy(true, entry, &entry_oop_arraycopy,
+                                                                                      "arrayof_oop_arraycopy");
+    // Aligned versions without pre-barriers
+    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(true, &entry,
+                                                                                      "arrayof_oop_disjoint_arraycopy_uninit",
+                                                                                      /*dest_uninitialized*/true);
+    StubRoutines::_arrayof_oop_arraycopy_uninit          = generate_conjoint_oop_copy(true, entry, NULL,
+                                                                                      "arrayof_oop_arraycopy_uninit",
+                                                                                      /*dest_uninitialized*/true);
+#ifdef _LP64
+    if (UseCompressedOops) {
+      // With compressed oops we need unaligned versions, notice that we overwrite entry_oop_arraycopy.
+      StubRoutines::_oop_disjoint_arraycopy            = generate_disjoint_oop_copy(false, &entry,
+                                                                                    "oop_disjoint_arraycopy");
+      StubRoutines::_oop_arraycopy                     = generate_conjoint_oop_copy(false, entry, &entry_oop_arraycopy,
+                                                                                    "oop_arraycopy");
+      // Unaligned versions without pre-barriers
+      StubRoutines::_oop_disjoint_arraycopy_uninit     = generate_disjoint_oop_copy(false, &entry,
+                                                                                    "oop_disjoint_arraycopy_uninit",
+                                                                                    /*dest_uninitialized*/true);
+      StubRoutines::_oop_arraycopy_uninit              = generate_conjoint_oop_copy(false, entry, NULL,
+                                                                                    "oop_arraycopy_uninit",
+                                                                                    /*dest_uninitialized*/true);
+    } else
+#endif
+    {
+      // oop arraycopy is always aligned on 32bit and 64bit without compressed oops
+      StubRoutines::_oop_disjoint_arraycopy            = StubRoutines::_arrayof_oop_disjoint_arraycopy;
+      StubRoutines::_oop_arraycopy                     = StubRoutines::_arrayof_oop_arraycopy;
+      StubRoutines::_oop_disjoint_arraycopy_uninit     = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
+      StubRoutines::_oop_arraycopy_uninit              = StubRoutines::_arrayof_oop_arraycopy_uninit;
+    }
+
+    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
+    StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
+                                                                        /*dest_uninitialized*/true);
+
     StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
                                                               entry_jbyte_arraycopy,
                                                               entry_jshort_arraycopy,
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Tue Mar 01 10:27:15 2011 -0800
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Tue Mar 01 14:56:48 2011 -0800
@@ -729,18 +729,19 @@
   //  Input:
   //     start   -  starting address
   //     count   -  element count
-  void  gen_write_ref_array_pre_barrier(Register start, Register count) {
+  void  gen_write_ref_array_pre_barrier(Register start, Register count, bool uninitialized_target) {
     assert_different_registers(start, count);
     BarrierSet* bs = Universe::heap()->barrier_set();
     switch (bs->kind()) {
       case BarrierSet::G1SATBCT:
       case BarrierSet::G1SATBCTLogging:
-        {
-          __ pusha();                      // push registers
-          __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre),
-                          start, count);
-          __ popa();
-        }
+        // With G1, don't generate the call if we statically know that the target in uninitialized
+        if (!uninitialized_target) {
+           __ pusha();                      // push registers
+           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre),
+                           start, count);
+           __ popa();
+         }
         break;
       case BarrierSet::CardTableModRef:
       case BarrierSet::CardTableExtension:
@@ -919,7 +920,8 @@
 
   address generate_disjoint_copy(BasicType t, bool aligned,
                                  Address::ScaleFactor sf,
-                                 address* entry, const char *name) {
+                                 address* entry, const char *name,
+                                 bool dest_uninitialized = false) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -950,7 +952,7 @@
     if (t == T_OBJECT) {
       __ testl(count, count);
       __ jcc(Assembler::zero, L_0_count);
-      gen_write_ref_array_pre_barrier(to, count);
+      gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
       __ mov(saved_to, to);          // save 'to'
     }
 
@@ -1084,7 +1086,8 @@
   address generate_conjoint_copy(BasicType t, bool aligned,
                                  Address::ScaleFactor sf,
                                  address nooverlap_target,
-                                 address* entry, const char *name) {
+                                 address* entry, const char *name,
+                                 bool dest_uninitialized = false) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -1128,7 +1131,7 @@
     if (t == T_OBJECT) {
       __ testl(count, count);
       __ jcc(Assembler::zero, L_0_count);
-       gen_write_ref_array_pre_barrier(dst, count);
+      gen_write_ref_array_pre_barrier(dst, count, dest_uninitialized);
     }
 
     // copy from high to low
@@ -1415,7 +1418,7 @@
   //    rax, ==  0  -  success
   //    rax, == -1^K - failure, where K is partial transfer count
   //
-  address generate_checkcast_copy(const char *name, address* entry) {
+  address generate_checkcast_copy(const char *name, address* entry, bool dest_uninitialized = false) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -1476,7 +1479,7 @@
     Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes());
 
     // Copy from low to high addresses, indexed from the end of each array.
-    gen_write_ref_array_pre_barrier(to, count);
+    gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
     __ lea(end_from, end_from_addr);
     __ lea(end_to,   end_to_addr);
     assert(length == count, "");        // else fix next line:
@@ -2039,6 +2042,15 @@
         generate_conjoint_copy(T_OBJECT, true, Address::times_ptr,  entry,
                                &entry_oop_arraycopy, "oop_arraycopy");
 
+    StubRoutines::_oop_disjoint_arraycopy_uninit =
+        generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry,
+                               "oop_disjoint_arraycopy_uninit",
+                               /*dest_uninitialized*/true);
+    StubRoutines::_oop_arraycopy_uninit =
+        generate_conjoint_copy(T_OBJECT, true, Address::times_ptr,  entry,
+                               NULL, "oop_arraycopy_uninit",
+                               /*dest_uninitialized*/true);
+
     StubRoutines::_jlong_disjoint_arraycopy =
         generate_disjoint_long_copy(&entry, "jlong_disjoint_arraycopy");
     StubRoutines::_jlong_arraycopy =
@@ -2052,20 +2064,20 @@
     StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
     StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
 
-    StubRoutines::_arrayof_jint_disjoint_arraycopy  =
-        StubRoutines::_jint_disjoint_arraycopy;
-    StubRoutines::_arrayof_oop_disjoint_arraycopy   =
-        StubRoutines::_oop_disjoint_arraycopy;
-    StubRoutines::_arrayof_jlong_disjoint_arraycopy =
-        StubRoutines::_jlong_disjoint_arraycopy;
+    StubRoutines::_arrayof_jint_disjoint_arraycopy       = StubRoutines::_jint_disjoint_arraycopy;
+    StubRoutines::_arrayof_oop_disjoint_arraycopy        = StubRoutines::_oop_disjoint_arraycopy;
+    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit;
+    StubRoutines::_arrayof_jlong_disjoint_arraycopy      = StubRoutines::_jlong_disjoint_arraycopy;
 
-    StubRoutines::_arrayof_jint_arraycopy  = StubRoutines::_jint_arraycopy;
-    StubRoutines::_arrayof_oop_arraycopy   = StubRoutines::_oop_arraycopy;
-    StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
+    StubRoutines::_arrayof_jint_arraycopy       = StubRoutines::_jint_arraycopy;
+    StubRoutines::_arrayof_oop_arraycopy        = StubRoutines::_oop_arraycopy;
+    StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
+    StubRoutines::_arrayof_jlong_arraycopy      = StubRoutines::_jlong_arraycopy;
 
     StubRoutines::_checkcast_arraycopy =
-        generate_checkcast_copy("checkcast_arraycopy",
-                                  &entry_checkcast_arraycopy);
+        generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
+    StubRoutines::_checkcast_arraycopy_uninit =
+        generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, /*dest_uninitialized*/true);
 
     StubRoutines::_unsafe_arraycopy =
         generate_unsafe_copy("unsafe_arraycopy",
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Tue Mar 01 10:27:15 2011 -0800
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Tue Mar 01 14:56:48 2011 -0800
@@ -1159,34 +1159,35 @@
   // Generate code for an array write pre barrier
   //
   //     addr    -  starting address
-  //     count    -  element count
+  //     count   -  element count
+  //     tmp     - scratch register
   //
   //     Destroy no registers!
   //
-  void  gen_write_ref_array_pre_barrier(Register addr, Register count) {
+  void  gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
     BarrierSet* bs = Universe::heap()->barrier_set();
     switch (bs->kind()) {
       case BarrierSet::G1SATBCT:
       case BarrierSet::G1SATBCTLogging:
-        {
-          __ pusha();                      // push registers
-          if (count == c_rarg0) {
-            if (addr == c_rarg1) {
-              // exactly backwards!!
-              __ xchgptr(c_rarg1, c_rarg0);
-            } else {
-              __ movptr(c_rarg1, count);
-              __ movptr(c_rarg0, addr);
-            }
-
-          } else {
-            __ movptr(c_rarg0, addr);
-            __ movptr(c_rarg1, count);
-          }
-          __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
-          __ popa();
+        // With G1, don't generate the call if we statically know that the target in uninitialized
+        if (!dest_uninitialized) {
+           __ pusha();                      // push registers
+           if (count == c_rarg0) {
+             if (addr == c_rarg1) {
+               // exactly backwards!!
+               __ xchgptr(c_rarg1, c_rarg0);
+             } else {
+               __ movptr(c_rarg1, count);
+               __ movptr(c_rarg0, addr);
+             }
+           } else {
+             __ movptr(c_rarg0, addr);
+             __ movptr(c_rarg1, count);
+           }
+           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
+           __ popa();
         }
-        break;
+         break;
       case BarrierSet::CardTableModRef:
       case BarrierSet::CardTableExtension:
       case BarrierSet::ModRef:
@@ -1769,7 +1770,8 @@
   //   disjoint_int_copy_entry is set to the no-overlap entry point
   //   used by generate_conjoint_int_oop_copy().
   //
-  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry, const char *name) {
+  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry,
+                                         const char *name, bool dest_uninitialized = false) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -1799,7 +1801,7 @@
                       // r9 and r10 may be used to save non-volatile registers
     if (is_oop) {
       __ movq(saved_to, to);
-      gen_write_ref_array_pre_barrier(to, count);
+      gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
     }
 
     // 'from', 'to' and 'count' are now valid
@@ -1860,7 +1862,8 @@
   // cache line boundaries will still be loaded and stored atomicly.
   //
   address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
-                                         address *entry, const char *name) {
+                                         address *entry, const char *name,
+                                         bool dest_uninitialized = false) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -1887,7 +1890,7 @@
 
     if (is_oop) {
       // no registers are destroyed by this call
-      gen_write_ref_array_pre_barrier(to, count);
+      gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
     }
 
     assert_clean_int(count, rax); // Make sure 'count' is clean int.
@@ -1953,7 +1956,8 @@
   //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
   //   no-overlap entry point used by generate_conjoint_long_oop_copy().
   //
-  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry, const char *name) {
+  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry,
+                                          const char *name, bool dest_uninitialized = false) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -1983,7 +1987,7 @@
     // 'from', 'to' and 'qword_count' are now valid
     if (is_oop) {
       // no registers are destroyed by this call
-      gen_write_ref_array_pre_barrier(to, qword_count);
+      gen_write_ref_array_pre_barrier(to, qword_count, dest_uninitialized);
     }
 
     // Copy from low to high addresses.  Use 'to' as scratch.
@@ -2038,8 +2042,9 @@
   //   c_rarg1   - destination array address
   //   c_rarg2   - element count, treated as ssize_t, can be zero
   //
-  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
-                                          address *entry, const char *name) {
+  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop,
+                                          address nooverlap_target, address *entry,
+                                          const char *name, bool dest_uninitialized = false) {
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
@@ -2067,7 +2072,7 @@
       // Save to and count for store barrier
       __ movptr(saved_count, qword_count);
       // No registers are destroyed by this call
-      gen_write_ref_array_pre_barrier(to, saved_count);
+      gen_write_ref_array_pre_barrier(to, saved_count, dest_uninitialized);
     }
 
     __ jmp(L_copy_32_bytes);
@@ -2146,7 +2151,8 @@
   //    rax ==  0  -  success
   //    rax == -1^K - failure, where K is partial transfer count
   //
-  address generate_checkcast_copy(const char *name, address *entry) {
+  address generate_checkcast_copy(const char *name, address *entry,
+                                  bool dest_uninitialized = false) {
 
     Label L_load_element, L_store_element, L_do_card_marks, L_done;
 
@@ -2240,7 +2246,7 @@
     Address from_element_addr(end_from, count, TIMES_OOP, 0);
     Address   to_element_addr(end_to,   count, TIMES_OOP, 0);
 
-    gen_write_ref_array_pre_barrier(to, count);
+    gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
 
     // Copy from low to high addresses, indexed from the end of each array.
     __ lea(end_from, end_from_addr);
@@ -2750,14 +2756,29 @@
                                                                               "oop_disjoint_arraycopy");
       StubRoutines::_oop_arraycopy           = generate_conjoint_int_oop_copy(false, true, entry,
                                                                               &entry_oop_arraycopy, "oop_arraycopy");
+      StubRoutines::_oop_disjoint_arraycopy_uninit  = generate_disjoint_int_oop_copy(false, true, &entry,
+                                                                                     "oop_disjoint_arraycopy_uninit",
+                                                                                     /*dest_uninitialized*/true);
+      StubRoutines::_oop_arraycopy_uninit           = generate_conjoint_int_oop_copy(false, true, entry,
+                                                                                     NULL, "oop_arraycopy_uninit",
+                                                                                     /*dest_uninitialized*/true);
     } else {
       StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, true, &entry,
                                                                                "oop_disjoint_arraycopy");
       StubRoutines::_oop_arraycopy           = generate_conjoint_long_oop_copy(false, true, entry,
                                                                                &entry_oop_arraycopy, "oop_arraycopy");
+      StubRoutines::_oop_disjoint_arraycopy_uninit  = generate_disjoint_long_oop_copy(false, true, &entry,
+                                                                                      "oop_disjoint_arraycopy_uninit",
+                                                                                      /*dest_uninitialized*/true);
+      StubRoutines::_oop_arraycopy_uninit           = generate_conjoint_long_oop_copy(false, true, entry,
+                                                                                      NULL, "oop_arraycopy_uninit",
+                                                                                      /*dest_uninitialized*/true);
     }
 
-    StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
+    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
+    StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
+                                                                        /*dest_uninitialized*/true);
+
     StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
                                                               entry_jbyte_arraycopy,
                                                               entry_jshort_arraycopy,
@@ -2794,6 +2815,9 @@
 
     StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
     StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
+
+    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit    = StubRoutines::_oop_disjoint_arraycopy_uninit;
+    StubRoutines::_arrayof_oop_arraycopy_uninit             = StubRoutines::_oop_arraycopy_uninit;
   }
 
   void generate_math_stubs() {
--- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Tue Mar 01 10:27:15 2011 -0800
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Tue Mar 01 14:56:48 2011 -0800
@@ -83,11 +83,15 @@
   }
 
   template <class T> void write_ref_array_pre_work(T* dst, int count);
-  virtual void write_ref_array_pre(oop* dst, int count) {
-    write_ref_array_pre_work(dst, count);
+  virtual void write_ref_array_pre(oop* dst, int count, bool dest_uninitialized) {
+    if (!dest_uninitialized) {
+      write_ref_array_pre_work(dst, count);
+    }
   }
-  virtual void write_ref_array_pre(narrowOop* dst, int count) {
-    write_ref_array_pre_work(dst, count);
+  virtual void write_ref_array_pre(narrowOop* dst, int count, bool dest_uninitialized) {
+    if (!dest_uninitialized) {
+      write_ref_array_pre_work(dst, count);
+    }
   }
 };
 
--- a/src/share/vm/memory/barrierSet.cpp	Tue Mar 01 10:27:15 2011 -0800
+++ b/src/share/vm/memory/barrierSet.cpp	Tue Mar 01 14:56:48 2011 -0800
@@ -35,9 +35,9 @@
                    start,            count);
 #endif
   if (UseCompressedOops) {
-    Universe::heap()->barrier_set()->write_ref_array_pre((narrowOop*)start, (int)count);
+    Universe::heap()->barrier_set()->write_ref_array_pre((narrowOop*)start, (int)count, false);
   } else {
-    Universe::heap()->barrier_set()->write_ref_array_pre(      (oop*)start, (int)count);
+    Universe::heap()->barrier_set()->write_ref_array_pre(      (oop*)start, (int)count, false);
   }
 }
 
--- a/src/share/vm/memory/barrierSet.hpp	Tue Mar 01 10:27:15 2011 -0800
+++ b/src/share/vm/memory/barrierSet.hpp	Tue Mar 01 14:56:48 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -44,6 +44,10 @@
     Uninit
   };
 
+  enum Flags {
+    None                = 0,
+    TargetUninitialized = 1
+  };
 protected:
   int _max_covered_regions;
   Name _kind;
@@ -128,8 +132,10 @@
   virtual void read_prim_array(MemRegion mr) = 0;
 
   // Below length is the # array elements being written
-  virtual void write_ref_array_pre(      oop* dst, int length) {}
-  virtual void write_ref_array_pre(narrowOop* dst, int length) {}
+  virtual void write_ref_array_pre(oop* dst, int length,
+                                   bool dest_uninitialized = false) {}
+  virtual void write_ref_array_pre(narrowOop* dst, int length,
+                                   bool dest_uninitialized = false) {}
   // Below count is the # array elements being written, starting
   // at the address "start", which may not necessarily be HeapWord-aligned
   inline void write_ref_array(HeapWord* start, size_t count);
--- a/src/share/vm/opto/library_call.cpp	Tue Mar 01 10:27:15 2011 -0800
+++ b/src/share/vm/opto/library_call.cpp	Tue Mar 01 14:56:48 2011 -0800
@@ -97,7 +97,7 @@
                              RegionNode* region);
   Node* generate_current_thread(Node* &tls_output);
   address basictype2arraycopy(BasicType t, Node *src_offset, Node *dest_offset,
-                              bool disjoint_bases, const char* &name);
+                              bool disjoint_bases, const char* &name, bool dest_uninitialized);
   Node* load_mirror_from_klass(Node* klass);
   Node* load_klass_from_mirror_common(Node* mirror, bool never_see_null,
                                       int nargs,
@@ -212,26 +212,26 @@
                                 AllocateNode* alloc,
                                 Node* src,  Node* src_offset,
                                 Node* dest, Node* dest_offset,
-                                Node* dest_size);
+                                Node* dest_size, bool dest_uninitialized);
   void generate_slow_arraycopy(const TypePtr* adr_type,
                                Node* src,  Node* src_offset,
                                Node* dest, Node* dest_offset,
-                               Node* copy_length);
+                               Node* copy_length, bool dest_uninitialized);
   Node* generate_checkcast_arraycopy(const TypePtr* adr_type,
                                      Node* dest_elem_klass,
                                      Node* src,  Node* src_offset,
                                      Node* dest, Node* dest_offset,
-                                     Node* copy_length);
+                                     Node* copy_length, bool dest_uninitialized);
   Node* generate_generic_arraycopy(const TypePtr* adr_type,
                                    Node* src,  Node* src_offset,
                                    Node* dest, Node* dest_offset,
-                                   Node* copy_length);
+                                   Node* copy_length, bool dest_uninitialized);
   void generate_unchecked_arraycopy(const TypePtr* adr_type,
                                     BasicType basic_elem_type,
                                     bool disjoint_bases,
                                     Node* src,  Node* src_offset,
                                     Node* dest, Node* dest_offset,
-                                    Node* copy_length);
+                                    Node* copy_length, bool dest_uninitialized);
   bool inline_unsafe_CAS(BasicType type);
   bool inline_unsafe_ordered_store(BasicType type);
   bool inline_fp_conversions(vmIntrinsics::ID id);
@@ -4092,7 +4092,8 @@
   const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
   bool disjoint_bases = true;
   generate_unchecked_arraycopy(raw_adr_type, T_LONG, disjoint_bases,
-                               src, NULL, dest, NULL, countx);
+                               src, NULL, dest, NULL, countx,
+                               /*dest_uninitialized*/true);
 
   // If necessary, emit some card marks afterwards.  (Non-arrays only.)
   if (card_mark) {
@@ -4306,7 +4307,7 @@
 // Note:  The condition "disjoint" applies also for overlapping copies
 // where an descending copy is permitted (i.e., dest_offset <= src_offset).
 static address
-select_arraycopy_function(BasicType t, bool aligned, bool disjoint, const char* &name) {
+select_arraycopy_function(BasicType t, bool aligned, bool disjoint, const char* &name, bool dest_uninitialized) {
   int selector =
     (aligned  ? COPYFUNC_ALIGNED  : COPYFUNC_UNALIGNED) +
     (disjoint ? COPYFUNC_DISJOINT : COPYFUNC_CONJOINT);
@@ -4315,6 +4316,10 @@
   name = #xxx_arraycopy; \
   return StubRoutines::xxx_arraycopy(); }
 
+#define RETURN_STUB_PARM(xxx_arraycopy, parm) {           \
+  name = #xxx_arraycopy; \
+  return StubRoutines::xxx_arraycopy(parm); }
+
   switch (t) {
   case T_BYTE:
   case T_BOOLEAN:
@@ -4351,10 +4356,10 @@
   case T_ARRAY:
   case T_OBJECT:
     switch (selector) {
-    case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(oop_arraycopy);
-    case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_oop_arraycopy);
-    case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(oop_disjoint_arraycopy);
-    case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_oop_disjoint_arraycopy);
+    case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB_PARM(oop_arraycopy, dest_uninitialized);
+    case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED:    RETURN_STUB_PARM(arrayof_oop_arraycopy, dest_uninitialized);
+    case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB_PARM(oop_disjoint_arraycopy, dest_uninitialized);
+    case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED:    RETURN_STUB_PARM(arrayof_oop_disjoint_arraycopy, dest_uninitialized);
     }
   default:
     ShouldNotReachHere();
@@ -4362,6 +4367,7 @@
   }
 
 #undef RETURN_STUB
+#undef RETURN_STUB_PARM
 }
 
 //------------------------------basictype2arraycopy----------------------------
@@ -4369,7 +4375,8 @@
                                             Node* src_offset,
                                             Node* dest_offset,
                                             bool disjoint_bases,
-                                            const char* &name) {
+                                            const char* &name,
+                                            bool dest_uninitialized) {
   const TypeInt* src_offset_inttype  = gvn().find_int_type(src_offset);;
   const TypeInt* dest_offset_inttype = gvn().find_int_type(dest_offset);;
 
@@ -4395,7 +4402,7 @@
     disjoint = true;
   }
 
-  return select_arraycopy_function(t, aligned, disjoint, name);
+  return select_arraycopy_function(t, aligned, disjoint, name, dest_uninitialized);
 }
 
 
@@ -4451,7 +4458,8 @@
     // The component types are not the same or are not recognized.  Punt.
     // (But, avoid the native method wrapper to JVM_ArrayCopy.)
     generate_slow_arraycopy(TypePtr::BOTTOM,
-                            src, src_offset, dest, dest_offset, length);
+                            src, src_offset, dest, dest_offset, length,
+                            /*dest_uninitialized*/false);
     return true;
   }
 
@@ -4564,7 +4572,7 @@
 
   Node* original_dest      = dest;
   AllocateArrayNode* alloc = NULL;  // used for zeroing, if needed
-  bool  must_clear_dest    = false;
+  bool  dest_uninitialized = false;
 
   // See if this is the initialization of a newly-allocated array.
   // If so, we will take responsibility here for initializing it to zero.
@@ -4587,12 +4595,14 @@
     adr_type = TypeRawPtr::BOTTOM;  // all initializations are into raw memory
     // From this point on, every exit path is responsible for
     // initializing any non-copied parts of the object to zero.
-    must_clear_dest = true;
+    // Also, if this flag is set we make sure that arraycopy interacts properly
+    // with G1, eliding pre-barriers. See CR 6627983.
+    dest_uninitialized = true;
   } else {
     // No zeroing elimination here.
     alloc             = NULL;
     //original_dest   = dest;
-    //must_clear_dest = false;
+    //dest_uninitialized = false;
   }
 
   // Results are placed here:
@@ -4624,10 +4634,10 @@
   Node* checked_value   = NULL;
 
   if (basic_elem_type == T_CONFLICT) {
-    assert(!must_clear_dest, "");
+    assert(!dest_uninitialized, "");
     Node* cv = generate_generic_arraycopy(adr_type,
                                           src, src_offset, dest, dest_offset,
-                                          copy_length);
+                                          copy_length, dest_uninitialized);
     if (cv == NULL)  cv = intcon(-1);  // failure (no stub available)
     checked_control = control();
     checked_i_o     = i_o();
@@ -4647,7 +4657,7 @@
     }
 
     // copy_length is 0.
-    if (!stopped() && must_clear_dest) {
+    if (!stopped() && dest_uninitialized) {
       Node* dest_length = alloc->in(AllocateNode::ALength);
       if (_gvn.eqv_uncast(copy_length, dest_length)
           || _gvn.find_int_con(dest_length, 1) <= 0) {
@@ -4673,7 +4683,7 @@
     result_memory->init_req(zero_path, memory(adr_type));
   }
 
-  if (!stopped() && must_clear_dest) {
+  if (!stopped() && dest_uninitialized) {
     // We have to initialize the *uncopied* part of the array to zero.
     // The copy destination is the slice dest[off..off+len].  The other slices
     // are dest_head = dest[0..off] and dest_tail = dest[off+len..dest.length].
@@ -4709,7 +4719,7 @@
       { PreserveJVMState pjvms(this);
         didit = generate_block_arraycopy(adr_type, basic_elem_type, alloc,
                                          src, src_offset, dest, dest_offset,
-                                         dest_size);
+                                         dest_size, dest_uninitialized);
         if (didit) {
           // Present the results of the block-copying fast call.
           result_region->init_req(bcopy_path, control());
@@ -4785,7 +4795,7 @@
       Node* cv = generate_checkcast_arraycopy(adr_type,
                                               dest_elem_klass,
                                               src, src_offset, dest, dest_offset,
-                                              ConvI2X(copy_length));
+                                              ConvI2X(copy_length), dest_uninitialized);
       if (cv == NULL)  cv = intcon(-1);  // failure (no stub available)
       checked_control = control();
       checked_i_o     = i_o();
@@ -4808,7 +4818,7 @@
     PreserveJVMState pjvms(this);
     generate_unchecked_arraycopy(adr_type, copy_type, disjoint_bases,
                                  src, src_offset, dest, dest_offset,
-                                 ConvI2X(copy_length));
+                                 ConvI2X(copy_length), dest_uninitialized);
 
     // Present the results of the fast call.
     result_region->init_req(fast_path, control());
@@ -4887,7 +4897,7 @@
     set_memory(slow_mem, adr_type);
     set_i_o(slow_i_o);
 
-    if (must_clear_dest) {
+    if (dest_uninitialized) {
       generate_clear_array(adr_type, dest, basic_elem_type,
                            intcon(0), NULL,
                            alloc->in(AllocateNode::AllocSize));
@@ -4895,7 +4905,7 @@
 
     generate_slow_arraycopy(adr_type,
                             src, src_offset, dest, dest_offset,
-                            copy_length);
+                            copy_length, /*dest_uninitialized*/false);
 
     result_region->init_req(slow_call_path, control());
     result_i_o   ->init_req(slow_call_path, i_o());
@@ -5139,7 +5149,7 @@
                                          AllocateNode* alloc,
                                          Node* src,  Node* src_offset,
                                          Node* dest, Node* dest_offset,
-                                         Node* dest_size) {
+                                         Node* dest_size, bool dest_uninitialized) {
   // See if there is an advantage from block transfer.
   int scale = exact_log2(type2aelembytes(basic_elem_type));
   if (scale >= LogBytesPerLong)
@@ -5184,7 +5194,7 @@
 
   bool disjoint_bases = true;   // since alloc != NULL
   generate_unchecked_arraycopy(adr_type, T_LONG, disjoint_bases,
-                               sptr, NULL, dptr, NULL, countx);
+                               sptr, NULL, dptr, NULL, countx, dest_uninitialized);
 
   return true;
 }
@@ -5197,7 +5207,8 @@
 LibraryCallKit::generate_slow_arraycopy(const TypePtr* adr_type,
                                         Node* src,  Node* src_offset,
                                         Node* dest, Node* dest_offset,
-                                        Node* copy_length) {
+                                        Node* copy_length, bool dest_uninitialized) {
+  assert(!dest_uninitialized, "Invariant");
   Node* call = make_runtime_call(RC_NO_LEAF | RC_UNCOMMON,
                                  OptoRuntime::slow_arraycopy_Type(),
                                  OptoRuntime::slow_arraycopy_Java(),
@@ -5215,10 +5226,10 @@
                                              Node* dest_elem_klass,
                                              Node* src,  Node* src_offset,
                                              Node* dest, Node* dest_offset,
-                                             Node* copy_length) {
+                                             Node* copy_length, bool dest_uninitialized) {
   if (stopped())  return NULL;
 
-  address copyfunc_addr = StubRoutines::checkcast_arraycopy();
+  address copyfunc_addr = StubRoutines::checkcast_arraycopy(dest_uninitialized);
   if (copyfunc_addr == NULL) { // Stub was not generated, go slow path.
     return NULL;
   }
@@ -5256,9 +5267,9 @@
 LibraryCallKit::generate_generic_arraycopy(const TypePtr* adr_type,
                                            Node* src,  Node* src_offset,
                                            Node* dest, Node* dest_offset,
-                                           Node* copy_length) {
+                                           Node* copy_length, bool dest_uninitialized) {
+  assert(!dest_uninitialized, "Invariant");
   if (stopped())  return NULL;
-
   address copyfunc_addr = StubRoutines::generic_arraycopy();
   if (copyfunc_addr == NULL) { // Stub was not generated, go slow path.
     return NULL;
@@ -5279,7 +5290,7 @@
                                              bool disjoint_bases,
                                              Node* src,  Node* src_offset,
                                              Node* dest, Node* dest_offset,
-                                             Node* copy_length) {
+                                             Node* copy_length, bool dest_uninitialized) {
   if (stopped())  return;               // nothing to do
 
   Node* src_start  = src;
@@ -5294,7 +5305,7 @@
   const char* copyfunc_name = "arraycopy";
   address     copyfunc_addr =
       basictype2arraycopy(basic_elem_type, src_offset, dest_offset,
-                          disjoint_bases, copyfunc_name);
+                          disjoint_bases, copyfunc_name, dest_uninitialized);
 
   // Call it.  Note that the count_ix value is not scaled to a byte-size.
   make_runtime_call(RC_LEAF|RC_NO_FP,
--- a/src/share/vm/runtime/arguments.cpp	Tue Mar 01 10:27:15 2011 -0800
+++ b/src/share/vm/runtime/arguments.cpp	Tue Mar 01 14:56:48 2011 -0800
@@ -2790,10 +2790,6 @@
   if (!FLAG_IS_DEFAULT(OptoLoopAlignment) && FLAG_IS_DEFAULT(MaxLoopPad)) {
     FLAG_SET_DEFAULT(MaxLoopPad, OptoLoopAlignment-1);
   }
-  // Temporary disable bulk zeroing reduction with G1. See CR 6627983.
-  if (UseG1GC) {
-    FLAG_SET_DEFAULT(ReduceBulkZeroing, false);
-  }
 #endif
 
   // If we are running in a headless jre, force java.awt.headless property
--- a/src/share/vm/runtime/stubRoutines.cpp	Tue Mar 01 10:27:15 2011 -0800
+++ b/src/share/vm/runtime/stubRoutines.cpp	Tue Mar 01 14:56:48 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -80,30 +80,36 @@
 jint    StubRoutines::_fpu_subnormal_bias2[3]                   = { 0, 0, 0 };
 
 // Compiled code entry points default values
-// The dafault functions don't have separate disjoint versions.
+// The default functions don't have separate disjoint versions.
 address StubRoutines::_jbyte_arraycopy          = CAST_FROM_FN_PTR(address, StubRoutines::jbyte_copy);
 address StubRoutines::_jshort_arraycopy         = CAST_FROM_FN_PTR(address, StubRoutines::jshort_copy);
 address StubRoutines::_jint_arraycopy           = CAST_FROM_FN_PTR(address, StubRoutines::jint_copy);
 address StubRoutines::_jlong_arraycopy          = CAST_FROM_FN_PTR(address, StubRoutines::jlong_copy);
 address StubRoutines::_oop_arraycopy            = CAST_FROM_FN_PTR(address, StubRoutines::oop_copy);
+address StubRoutines::_oop_arraycopy_uninit     = CAST_FROM_FN_PTR(address, StubRoutines::oop_copy_uninit);
 address StubRoutines::_jbyte_disjoint_arraycopy          = CAST_FROM_FN_PTR(address, StubRoutines::jbyte_copy);
 address StubRoutines::_jshort_disjoint_arraycopy         = CAST_FROM_FN_PTR(address, StubRoutines::jshort_copy);
 address StubRoutines::_jint_disjoint_arraycopy           = CAST_FROM_FN_PTR(address, StubRoutines::jint_copy);
 address StubRoutines::_jlong_disjoint_arraycopy          = CAST_FROM_FN_PTR(address, StubRoutines::jlong_copy);
 address StubRoutines::_oop_disjoint_arraycopy            = CAST_FROM_FN_PTR(address, StubRoutines::oop_copy);
+address StubRoutines::_oop_disjoint_arraycopy_uninit     = CAST_FROM_FN_PTR(address, StubRoutines::oop_copy_uninit);
 
 address StubRoutines::_arrayof_jbyte_arraycopy  = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jbyte_copy);
 address StubRoutines::_arrayof_jshort_arraycopy = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jshort_copy);
 address StubRoutines::_arrayof_jint_arraycopy   = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jint_copy);
 address StubRoutines::_arrayof_jlong_arraycopy  = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jlong_copy);
 address StubRoutines::_arrayof_oop_arraycopy    = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_oop_copy);
+address StubRoutines::_arrayof_oop_arraycopy_uninit      = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_oop_copy_uninit);
 address StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jbyte_copy);
 address StubRoutines::_arrayof_jshort_disjoint_arraycopy = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jshort_copy);
 address StubRoutines::_arrayof_jint_disjoint_arraycopy   = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jint_copy);
 address StubRoutines::_arrayof_jlong_disjoint_arraycopy  = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_jlong_copy);
-address StubRoutines::_arrayof_oop_disjoint_arraycopy  = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_oop_copy);
+address StubRoutines::_arrayof_oop_disjoint_arraycopy    = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_oop_copy);
+address StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit  = CAST_FROM_FN_PTR(address, StubRoutines::arrayof_oop_copy_uninit);
+
 
 address StubRoutines::_checkcast_arraycopy               = NULL;
+address StubRoutines::_checkcast_arraycopy_uninit        = NULL;
 address StubRoutines::_unsafe_arraycopy                  = NULL;
 address StubRoutines::_generic_arraycopy                 = NULL;
 
@@ -282,12 +288,12 @@
 // Default versions of arraycopy functions
 //
 
-static void gen_arraycopy_barrier_pre(oop* dest, size_t count) {
+static void gen_arraycopy_barrier_pre(oop* dest, size_t count, bool dest_uninitialized) {
     assert(count != 0, "count should be non-zero");
     assert(count <= (size_t)max_intx, "count too large");
     BarrierSet* bs = Universe::heap()->barrier_set();
     assert(bs->has_write_ref_array_pre_opt(), "Must have pre-barrier opt");
-    bs->write_ref_array_pre(dest, (int)count);
+    bs->write_ref_array_pre(dest, (int)count, dest_uninitialized);
 }
 
 static void gen_arraycopy_barrier(oop* dest, size_t count) {
@@ -330,7 +336,17 @@
   SharedRuntime::_oop_array_copy_ctr++;        // Slow-path oop array copy
 #endif // !PRODUCT
   assert(count != 0, "count should be non-zero");
-  gen_arraycopy_barrier_pre(dest, count);
+  gen_arraycopy_barrier_pre(dest, count, /*dest_uninitialized*/false);
+  Copy::conjoint_oops_atomic(src, dest, count);
+  gen_arraycopy_barrier(dest, count);
+JRT_END
+
+JRT_LEAF(void, StubRoutines::oop_copy_uninit(oop* src, oop* dest, size_t count))
+#ifndef PRODUCT
+  SharedRuntime::_oop_array_copy_ctr++;        // Slow-path oop array copy
+#endif // !PRODUCT
+  assert(count != 0, "count should be non-zero");
+  gen_arraycopy_barrier_pre(dest, count, /*dest_uninitialized*/true);
   Copy::conjoint_oops_atomic(src, dest, count);
   gen_arraycopy_barrier(dest, count);
 JRT_END
@@ -368,11 +384,20 @@
   SharedRuntime::_oop_array_copy_ctr++;        // Slow-path oop array copy
 #endif // !PRODUCT
   assert(count != 0, "count should be non-zero");
-  gen_arraycopy_barrier_pre((oop *) dest, count);
+  gen_arraycopy_barrier_pre((oop *) dest, count, /*dest_uninitialized*/false);
   Copy::arrayof_conjoint_oops(src, dest, count);
   gen_arraycopy_barrier((oop *) dest, count);
 JRT_END
 
+JRT_LEAF(void, StubRoutines::arrayof_oop_copy_uninit(HeapWord* src, HeapWord* dest, size_t count))
+#ifndef PRODUCT
+  SharedRuntime::_oop_array_copy_ctr++;        // Slow-path oop array copy
+#endif // !PRODUCT
+  assert(count != 0, "count should be non-zero");
+  gen_arraycopy_barrier_pre((oop *) dest, count, /*dest_uninitialized*/true);
+  Copy::arrayof_conjoint_oops(src, dest, count);
+  gen_arraycopy_barrier((oop *) dest, count);
+JRT_END
 
 address StubRoutines::select_fill_function(BasicType t, bool aligned, const char* &name) {
 #define RETURN_STUB(xxx_fill) { \
--- a/src/share/vm/runtime/stubRoutines.hpp	Tue Mar 01 10:27:15 2011 -0800
+++ b/src/share/vm/runtime/stubRoutines.hpp	Tue Mar 01 14:56:48 2011 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -164,12 +164,12 @@
   static address _jshort_arraycopy;
   static address _jint_arraycopy;
   static address _jlong_arraycopy;
-  static address _oop_arraycopy;
+  static address _oop_arraycopy, _oop_arraycopy_uninit;
   static address _jbyte_disjoint_arraycopy;
   static address _jshort_disjoint_arraycopy;
   static address _jint_disjoint_arraycopy;
   static address _jlong_disjoint_arraycopy;
-  static address _oop_disjoint_arraycopy;
+  static address _oop_disjoint_arraycopy, _oop_disjoint_arraycopy_uninit;
 
   // arraycopy operands aligned on zero'th element boundary
   // These are identical to the ones aligned aligned on an
@@ -179,15 +179,15 @@
   static address _arrayof_jshort_arraycopy;
   static address _arrayof_jint_arraycopy;
   static address _arrayof_jlong_arraycopy;
-  static address _arrayof_oop_arraycopy;
+  static address _arrayof_oop_arraycopy, _arrayof_oop_arraycopy_uninit;
   static address _arrayof_jbyte_disjoint_arraycopy;
   static address _arrayof_jshort_disjoint_arraycopy;
   static address _arrayof_jint_disjoint_arraycopy;
   static address _arrayof_jlong_disjoint_arraycopy;
-  static address _arrayof_oop_disjoint_arraycopy;
+  static address _arrayof_oop_disjoint_arraycopy, _arrayof_oop_disjoint_arraycopy_uninit;
 
   // these are recommended but optional:
-  static address _checkcast_arraycopy;
+  static address _checkcast_arraycopy, _checkcast_arraycopy_uninit;
   static address _unsafe_arraycopy;
   static address _generic_arraycopy;
 
@@ -286,26 +286,36 @@
   static address jshort_arraycopy() { return _jshort_arraycopy; }
   static address jint_arraycopy()   { return _jint_arraycopy; }
   static address jlong_arraycopy()  { return _jlong_arraycopy; }
-  static address oop_arraycopy()    { return _oop_arraycopy; }
+  static address oop_arraycopy(bool dest_uninitialized = false) {
+    return dest_uninitialized ? _oop_arraycopy_uninit : _oop_arraycopy;
+  }
   static address jbyte_disjoint_arraycopy()  { return _jbyte_disjoint_arraycopy; }
   static address jshort_disjoint_arraycopy() { return _jshort_disjoint_arraycopy; }
   static address jint_disjoint_arraycopy()   { return _jint_disjoint_arraycopy; }
   static address jlong_disjoint_arraycopy()  { return _jlong_disjoint_arraycopy; }
-  static address oop_disjoint_arraycopy()    { return _oop_disjoint_arraycopy; }
+  static address oop_disjoint_arraycopy(bool dest_uninitialized = false) {
+    return dest_uninitialized ?  _oop_disjoint_arraycopy_uninit : _oop_disjoint_arraycopy;
+  }
 
   static address arrayof_jbyte_arraycopy()  { return _arrayof_jbyte_arraycopy; }
   static address arrayof_jshort_arraycopy() { return _arrayof_jshort_arraycopy; }
   static address arrayof_jint_arraycopy()   { return _arrayof_jint_arraycopy; }
   static address arrayof_jlong_arraycopy()  { return _arrayof_jlong_arraycopy; }
-  static address arrayof_oop_arraycopy()    { return _arrayof_oop_arraycopy; }
+  static address arrayof_oop_arraycopy(bool dest_uninitialized = false) {
+    return dest_uninitialized ? _arrayof_oop_arraycopy_uninit : _arrayof_oop_arraycopy;
+  }
 
   static address arrayof_jbyte_disjoint_arraycopy()  { return _arrayof_jbyte_disjoint_arraycopy; }
   static address arrayof_jshort_disjoint_arraycopy() { return _arrayof_jshort_disjoint_arraycopy; }
   static address arrayof_jint_disjoint_arraycopy()   { return _arrayof_jint_disjoint_arraycopy; }
   static address arrayof_jlong_disjoint_arraycopy()  { return _arrayof_jlong_disjoint_arraycopy; }
-  static address arrayof_oop_disjoint_arraycopy()    { return _arrayof_oop_disjoint_arraycopy; }
+  static address arrayof_oop_disjoint_arraycopy(bool dest_uninitialized = false) {
+    return dest_uninitialized ? _arrayof_oop_disjoint_arraycopy_uninit : _arrayof_oop_disjoint_arraycopy;
+  }
 
-  static address checkcast_arraycopy()     { return _checkcast_arraycopy; }
+  static address checkcast_arraycopy(bool dest_uninitialized = false) {
+    return dest_uninitialized ? _checkcast_arraycopy_uninit : _checkcast_arraycopy;
+  }
   static address unsafe_arraycopy()        { return _unsafe_arraycopy; }
   static address generic_arraycopy()       { return _generic_arraycopy; }
 
@@ -352,17 +362,19 @@
   // Default versions of the above arraycopy functions for platforms which do
   // not have specialized versions
   //
-  static void jbyte_copy (jbyte*  src, jbyte*  dest, size_t count);
-  static void jshort_copy(jshort* src, jshort* dest, size_t count);
-  static void jint_copy  (jint*   src, jint*   dest, size_t count);
-  static void jlong_copy (jlong*  src, jlong*  dest, size_t count);
-  static void oop_copy   (oop*    src, oop*    dest, size_t count);
+  static void jbyte_copy     (jbyte*  src, jbyte*  dest, size_t count);
+  static void jshort_copy    (jshort* src, jshort* dest, size_t count);
+  static void jint_copy      (jint*   src, jint*   dest, size_t count);
+  static void jlong_copy     (jlong*  src, jlong*  dest, size_t count);
+  static void oop_copy       (oop*    src, oop*    dest, size_t count);
+  static void oop_copy_uninit(oop*    src, oop*    dest, size_t count);
 
-  static void arrayof_jbyte_copy (HeapWord* src, HeapWord* dest, size_t count);
-  static void arrayof_jshort_copy(HeapWord* src, HeapWord* dest, size_t count);
-  static void arrayof_jint_copy  (HeapWord* src, HeapWord* dest, size_t count);
-  static void arrayof_jlong_copy (HeapWord* src, HeapWord* dest, size_t count);
-  static void arrayof_oop_copy   (HeapWord* src, HeapWord* dest, size_t count);
+  static void arrayof_jbyte_copy     (HeapWord* src, HeapWord* dest, size_t count);
+  static void arrayof_jshort_copy    (HeapWord* src, HeapWord* dest, size_t count);
+  static void arrayof_jint_copy      (HeapWord* src, HeapWord* dest, size_t count);
+  static void arrayof_jlong_copy     (HeapWord* src, HeapWord* dest, size_t count);
+  static void arrayof_oop_copy       (HeapWord* src, HeapWord* dest, size_t count);
+  static void arrayof_oop_copy_uninit(HeapWord* src, HeapWord* dest, size_t count);
 };
 
 #endif // SHARE_VM_RUNTIME_STUBROUTINES_HPP