changeset 12835:69944b868a32

8014555: G1: Memory ordering problem with Conc refinement and card marking Summary: Add a StoreLoad barrier in the G1 post-barrier to fix a race with concurrent refinement. Also-reviewed-by: martin.doerr@sap.com Reviewed-by: iveresov, tschatzl, brutisso, roland, kvn
author mgerdin
date Tue, 08 Oct 2013 17:35:51 +0200
parents 04b18a42c2f3
children b4d8a3d4db73
files src/cpu/sparc/vm/c1_Runtime1_sparc.cpp src/cpu/sparc/vm/macroAssembler_sparc.cpp src/cpu/x86/vm/c1_Runtime1_x86.cpp src/cpu/x86/vm/macroAssembler_x86.cpp src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp src/share/vm/gc_implementation/g1/ptrQueue.hpp src/share/vm/opto/graphKit.cpp
diffstat 10 files changed, 118 insertions(+), 30 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Fri Oct 04 13:33:02 2013 +0200
+++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Tue Oct 08 17:35:51 2013 +0200
@@ -37,6 +37,9 @@
 #include "runtime/vframeArray.hpp"
 #include "utilities/macros.hpp"
 #include "vmreg_sparc.inline.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#endif
 
 // Implementation of StubAssembler
 
@@ -912,7 +915,7 @@
         Register tmp2 = G3_scratch;
         jbyte* byte_map_base = ((CardTableModRefBS*)bs)->byte_map_base;
 
-        Label not_already_dirty, restart, refill;
+        Label not_already_dirty, restart, refill, young_card;
 
 #ifdef _LP64
         __ srlx(addr, CardTableModRefBS::card_shift, addr);
@@ -924,9 +927,15 @@
         __ set(rs, cardtable);         // cardtable := <card table base>
         __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]
 
+        __ cmp_and_br_short(tmp, G1SATBCardTableModRefBS::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
+
+        __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
+        __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]
+
         assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code");
         __ cmp_and_br_short(tmp, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
 
+        __ bind(young_card);
         // We didn't take the branch, so we're already dirty: return.
         // Use return-from-leaf
         __ retl();
--- a/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Fri Oct 04 13:33:02 2013 +0200
+++ b/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Tue Oct 08 17:35:51 2013 +0200
@@ -3752,7 +3752,7 @@
 #define __ masm.
   address start = __ pc();
 
-  Label not_already_dirty, restart, refill;
+  Label not_already_dirty, restart, refill, young_card;
 
 #ifdef _LP64
   __ srlx(O0, CardTableModRefBS::card_shift, O0);
@@ -3763,9 +3763,15 @@
   __ set(addrlit, O1); // O1 := <card table base>
   __ ldub(O0, O1, O2); // O2 := [O0 + O1]
 
+  __ cmp_and_br_short(O2, G1SATBCardTableModRefBS::g1_young_card_val(), Assembler::equal, Assembler::pt, young_card);
+
+  __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
+  __ ldub(O0, O1, O2); // O2 := [O0 + O1]
+
   assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code");
   __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
 
+  __ bind(young_card);
   // We didn't take the branch, so we're already dirty: return.
   // Use return-from-leaf
   __ retl();
--- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Fri Oct 04 13:33:02 2013 +0200
+++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Tue Oct 08 17:35:51 2013 +0200
@@ -38,6 +38,9 @@
 #include "runtime/vframeArray.hpp"
 #include "utilities/macros.hpp"
 #include "vmreg_x86.inline.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#endif
 
 
 // Implementation of StubAssembler
@@ -1753,13 +1756,17 @@
         __ leal(card_addr, __ as_Address(ArrayAddress(cardtable, index)));
 #endif
 
-        __ cmpb(Address(card_addr, 0), 0);
+        __ cmpb(Address(card_addr, 0), (int)G1SATBCardTableModRefBS::g1_young_card_val());
+        __ jcc(Assembler::equal, done);
+
+        __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
+        __ cmpb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val());
         __ jcc(Assembler::equal, done);
 
         // storing region crossing non-NULL, card is clean.
         // dirty card and log.
 
-        __ movb(Address(card_addr, 0), 0);
+        __ movb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val());
 
         __ cmpl(queue_index, 0);
         __ jcc(Assembler::equal, runtime);
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Fri Oct 04 13:33:02 2013 +0200
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Tue Oct 08 17:35:51 2013 +0200
@@ -3389,13 +3389,18 @@
   const Register card_addr = tmp;
   lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
 #endif
-  cmpb(Address(card_addr, 0), 0);
+  cmpb(Address(card_addr, 0), (int)G1SATBCardTableModRefBS::g1_young_card_val());
   jcc(Assembler::equal, done);
 
+  membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
+  cmpb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val());
+  jcc(Assembler::equal, done);
+
+
   // storing a region crossing, non-NULL oop, card is clean.
   // dirty card and log.
 
-  movb(Address(card_addr, 0), 0);
+  movb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val());
 
   cmpl(queue_index, 0);
   jcc(Assembler::equal, runtime);
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Oct 04 13:33:02 2013 +0200
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Oct 08 17:35:51 2013 +0200
@@ -6035,7 +6035,11 @@
   // is dirty.
   G1SATBCardTableModRefBS* ct_bs = g1_barrier_set();
   MemRegion mr(hr->bottom(), hr->pre_dummy_top());
-  ct_bs->verify_dirty_region(mr);
+  if (hr->is_young()) {
+    ct_bs->verify_g1_young_region(mr);
+  } else {
+    ct_bs->verify_dirty_region(mr);
+  }
 }
 
 void G1CollectedHeap::verify_dirty_young_list(HeapRegion* head) {
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Fri Oct 04 13:33:02 2013 +0200
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Tue Oct 08 17:35:51 2013 +0200
@@ -29,6 +29,7 @@
 #include "gc_implementation/g1/g1CollectedHeap.hpp"
 #include "gc_implementation/g1/g1AllocRegion.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "utilities/taskqueue.hpp"
 
@@ -134,7 +135,7 @@
   assert(containing_hr->is_in(end - 1), "it should also contain end - 1");
 
   MemRegion mr(start, end);
-  g1_barrier_set()->dirty(mr);
+  g1_barrier_set()->g1_mark_as_young(mr);
 }
 
 inline RefToScanQueue* G1CollectedHeap::task_queue(int i) const {
--- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Fri Oct 04 13:33:02 2013 +0200
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Tue Oct 08 17:35:51 2013 +0200
@@ -70,6 +70,12 @@
   if ((val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val()) {
     return false;
   }
+
+  if  (val == g1_young_gen) {
+    // the card is for a young gen region. We don't need to keep track of all pointers into young
+    return false;
+  }
+
   // Cached bit can be installed either on a clean card or on a claimed card.
   jbyte new_val = val;
   if (val == clean_card_val()) {
@@ -85,6 +91,19 @@
   return true;
 }
 
+void G1SATBCardTableModRefBS::g1_mark_as_young(const MemRegion& mr) {
+  jbyte *const first = byte_for(mr.start());
+  jbyte *const last = byte_after(mr.last());
+
+  memset(first, g1_young_gen, last - first);
+}
+
+#ifndef PRODUCT
+void G1SATBCardTableModRefBS::verify_g1_young_region(MemRegion mr) {
+  verify_region(mr, g1_young_gen,  true);
+}
+#endif
+
 G1SATBCardTableLoggingModRefBS::
 G1SATBCardTableLoggingModRefBS(MemRegion whole_heap,
                                int max_covered_regions) :
@@ -97,7 +116,11 @@
 void
 G1SATBCardTableLoggingModRefBS::write_ref_field_work(void* field,
                                                      oop new_val) {
-  jbyte* byte = byte_for(field);
+  volatile jbyte* byte = byte_for(field);
+  if (*byte == g1_young_gen) {
+    return;
+  }
+  OrderAccess::storeload();
   if (*byte != dirty_card) {
     *byte = dirty_card;
     Thread* thr = Thread::current();
@@ -129,7 +152,7 @@
 
 void
 G1SATBCardTableLoggingModRefBS::invalidate(MemRegion mr, bool whole_heap) {
-  jbyte* byte = byte_for(mr.start());
+  volatile jbyte* byte = byte_for(mr.start());
   jbyte* last_byte = byte_for(mr.last());
   Thread* thr = Thread::current();
   if (whole_heap) {
@@ -138,25 +161,35 @@
       byte++;
     }
   } else {
-    // Enqueue if necessary.
-    if (thr->is_Java_thread()) {
-      JavaThread* jt = (JavaThread*)thr;
-      while (byte <= last_byte) {
-        if (*byte != dirty_card) {
-          *byte = dirty_card;
-          jt->dirty_card_queue().enqueue(byte);
+    // skip all consecutive young cards
+    for (; byte <= last_byte && *byte == g1_young_gen; byte++);
+
+    if (byte <= last_byte) {
+      OrderAccess::storeload();
+      // Enqueue if necessary.
+      if (thr->is_Java_thread()) {
+        JavaThread* jt = (JavaThread*)thr;
+        for (; byte <= last_byte; byte++) {
+          if (*byte == g1_young_gen) {
+            continue;
+          }
+          if (*byte != dirty_card) {
+            *byte = dirty_card;
+            jt->dirty_card_queue().enqueue(byte);
+          }
         }
-        byte++;
-      }
-    } else {
-      MutexLockerEx x(Shared_DirtyCardQ_lock,
-                      Mutex::_no_safepoint_check_flag);
-      while (byte <= last_byte) {
-        if (*byte != dirty_card) {
-          *byte = dirty_card;
-          _dcqs.shared_dirty_card_queue()->enqueue(byte);
+      } else {
+        MutexLockerEx x(Shared_DirtyCardQ_lock,
+                        Mutex::_no_safepoint_check_flag);
+        for (; byte <= last_byte; byte++) {
+          if (*byte == g1_young_gen) {
+            continue;
+          }
+          if (*byte != dirty_card) {
+            *byte = dirty_card;
+            _dcqs.shared_dirty_card_queue()->enqueue(byte);
+          }
         }
-        byte++;
       }
     }
   }
--- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Fri Oct 04 13:33:02 2013 +0200
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Tue Oct 08 17:35:51 2013 +0200
@@ -38,7 +38,14 @@
 // snapshot-at-the-beginning marking.
 
 class G1SATBCardTableModRefBS: public CardTableModRefBSForCTRS {
+protected:
+  enum G1CardValues {
+    g1_young_gen = CT_MR_BS_last_reserved << 1
+  };
+
 public:
+  static int g1_young_card_val()   { return g1_young_gen; }
+
   // Add "pre_val" to a set of objects that may have been disconnected from the
   // pre-marking object graph.
   static void enqueue(oop pre_val);
@@ -118,6 +125,9 @@
       _byte_map[card_index] = val;
   }
 
+  void verify_g1_young_region(MemRegion mr) PRODUCT_RETURN;
+  void g1_mark_as_young(const MemRegion& mr);
+
   bool mark_card_deferred(size_t card_index);
 
   bool is_card_deferred(size_t card_index) {
--- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Fri Oct 04 13:33:02 2013 +0200
+++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Tue Oct 08 17:35:51 2013 +0200
@@ -80,6 +80,10 @@
 
   void reset() { if (_buf != NULL) _index = _sz; }
 
+  void enqueue(volatile void* ptr) {
+    enqueue((void*)(ptr));
+  }
+
   // Enqueues the given "obj".
   void enqueue(void* ptr) {
     if (!_active) return;
--- a/src/share/vm/opto/graphKit.cpp	Fri Oct 04 13:33:02 2013 +0200
+++ b/src/share/vm/opto/graphKit.cpp	Tue Oct 08 17:35:51 2013 +0200
@@ -3713,7 +3713,8 @@
   Node* no_base = __ top();
   float likely  = PROB_LIKELY(0.999);
   float unlikely  = PROB_UNLIKELY(0.999);
-  Node* zero = __ ConI(0);
+  Node* young_card = __ ConI((jint)G1SATBCardTableModRefBS::g1_young_card_val());
+  Node* dirty_card = __ ConI((jint)CardTableModRefBS::dirty_card_val());
   Node* zeroX = __ ConX(0);
 
   // Get the alias_index for raw card-mark memory
@@ -3769,8 +3770,16 @@
         // load the original value of the card
         Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
 
-        __ if_then(card_val, BoolTest::ne, zero); {
-          g1_mark_card(ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
+        __ if_then(card_val, BoolTest::ne, young_card); {
+          sync_kit(ideal);
+          // Use Op_MemBarVolatile to achieve the effect of a StoreLoad barrier.
+          insert_mem_bar(Op_MemBarVolatile, oop_store);
+          __ sync_kit(this);
+
+          Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
+          __ if_then(card_val_reload, BoolTest::ne, dirty_card); {
+            g1_mark_card(ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
+          } __ end_if();
         } __ end_if();
       } __ end_if();
     } __ end_if();