# HG changeset patch # User goetz # Date 1389878751 -3600 # Node ID 3514ee402842eeb8f879dcbc6a610ff29822138e # Parent c668f307a4c0c36d8b9fd587f7133a8c29f4d5af 8029101: PPC64 (part 211): ordering of Independent Reads of Independent Writes Reviewed-by: dholmes, kvn Contributed-by: martin.doerr@sap.com diff -r c668f307a4c0 -r 3514ee402842 src/cpu/ppc/vm/globalDefinitions_ppc.hpp --- a/src/cpu/ppc/vm/globalDefinitions_ppc.hpp Tue Jan 07 17:24:59 2014 +0100 +++ b/src/cpu/ppc/vm/globalDefinitions_ppc.hpp Thu Jan 16 14:25:51 2014 +0100 @@ -37,4 +37,7 @@ // signatures accordingly. const bool CCallingConventionRequiresIntsAsLongs = true; +// The PPC CPUs are NOT multiple-copy-atomic. +#define CPU_NOT_MULTIPLE_COPY_ATOMIC + #endif // CPU_PPC_VM_GLOBALDEFINITIONS_PPC_HPP diff -r c668f307a4c0 -r 3514ee402842 src/share/vm/interpreter/bytecodeInterpreter.cpp --- a/src/share/vm/interpreter/bytecodeInterpreter.cpp Tue Jan 07 17:24:59 2014 +0100 +++ b/src/share/vm/interpreter/bytecodeInterpreter.cpp Thu Jan 16 14:25:51 2014 +0100 @@ -2034,6 +2034,9 @@ TosState tos_type = cache->flag_state(); int field_offset = cache->f2_as_index(); if (cache->is_volatile()) { + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + OrderAccess::fence(); + } if (tos_type == atos) { VERIFY_OOP(obj->obj_field_acquire(field_offset)); SET_STACK_OBJECT(obj->obj_field_acquire(field_offset), -1); diff -r c668f307a4c0 -r 3514ee402842 src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Tue Jan 07 17:24:59 2014 +0100 +++ b/src/share/vm/opto/library_call.cpp Thu Jan 16 14:25:51 2014 +0100 @@ -2627,8 +2627,13 @@ // rough approximation of type. need_mem_bar = true; // For Stores, place a memory ordering barrier now. - if (is_store) + if (is_store) { insert_mem_bar(Op_MemBarRelease); + } else { + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + insert_mem_bar(Op_MemBarVolatile); + } + } } // Memory barrier to prevent normal and 'unsafe' accesses from @@ -2717,10 +2722,13 @@ } if (is_volatile) { - if (!is_store) + if (!is_store) { insert_mem_bar(Op_MemBarAcquire); - else - insert_mem_bar(Op_MemBarVolatile); + } else { + if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { + insert_mem_bar(Op_MemBarVolatile); + } + } } if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder); diff -r c668f307a4c0 -r 3514ee402842 src/share/vm/opto/parse.hpp --- a/src/share/vm/opto/parse.hpp Tue Jan 07 17:24:59 2014 +0100 +++ b/src/share/vm/opto/parse.hpp Thu Jan 16 14:25:51 2014 +0100 @@ -330,7 +330,8 @@ GraphKit _exits; // Record all normal returns and throws here. bool _wrote_final; // Did we write a final field? - bool _count_invocations; // update and test invocation counter + bool _wrote_volatile; // Did we write a volatile field? + bool _count_invocations; // update and test invocation counter bool _method_data_update; // update method data oop Node* _alloc_with_final; // An allocation node with final field @@ -373,6 +374,8 @@ GraphKit& exits() { return _exits; } bool wrote_final() const { return _wrote_final; } void set_wrote_final(bool z) { _wrote_final = z; } + bool wrote_volatile() const { return _wrote_volatile; } + void set_wrote_volatile(bool z) { _wrote_volatile = z; } bool count_invocations() const { return _count_invocations; } bool method_data_update() const { return _method_data_update; } Node* alloc_with_final() const { return _alloc_with_final; } diff -r c668f307a4c0 -r 3514ee402842 src/share/vm/opto/parse1.cpp --- a/src/share/vm/opto/parse1.cpp Tue Jan 07 17:24:59 2014 +0100 +++ b/src/share/vm/opto/parse1.cpp Thu Jan 16 14:25:51 2014 +0100 @@ -390,6 +390,7 @@ _expected_uses = expected_uses; _depth = 1 + (caller->has_method() ? caller->depth() : 0); _wrote_final = false; + _wrote_volatile = false; _alloc_with_final = NULL; _entry_bci = InvocationEntryBci; _tf = NULL; @@ -907,7 +908,13 @@ Node* iophi = _exits.i_o(); _exits.set_i_o(gvn().transform(iophi)); - if (wrote_final()) { + // On PPC64, also add MemBarRelease for constructors which write + // volatile fields. As support_IRIW_for_not_multiple_copy_atomic_cpu + // is set on PPC64, no sync instruction is issued after volatile + // stores. We want to quarantee the same behaviour as on platforms + // with total store order, although this is not required by the Java + // memory model. So as with finals, we add a barrier here. + if (wrote_final() PPC64_ONLY(|| (wrote_volatile() && method()->is_initializer()))) { // This method (which must be a constructor by the rules of Java) // wrote a final. The effects of all initializations must be // committed to memory before any code after the constructor diff -r c668f307a4c0 -r 3514ee402842 src/share/vm/opto/parse3.cpp --- a/src/share/vm/opto/parse3.cpp Tue Jan 07 17:24:59 2014 +0100 +++ b/src/share/vm/opto/parse3.cpp Thu Jan 16 14:25:51 2014 +0100 @@ -227,6 +227,9 @@ } else { type = Type::get_const_basic_type(bt); } + if (support_IRIW_for_not_multiple_copy_atomic_cpu && field->is_volatile()) { + insert_mem_bar(Op_MemBarVolatile); // StoreLoad barrier + } // Build the load. // MemNode::MemOrd mo = is_vol ? MemNode::acquire : MemNode::unordered; @@ -317,7 +320,16 @@ // If reference is volatile, prevent following volatiles ops from // floating up before the volatile write. if (is_vol) { - insert_mem_bar(Op_MemBarVolatile); // Use fat membar + // If not multiple copy atomic, we do the MemBarVolatile before the load. + if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { + insert_mem_bar(Op_MemBarVolatile); // Use fat membar + } + // Remember we wrote a volatile field. + // For not multiple copy atomic cpu (ppc64) a barrier should be issued + // in constructors which have such stores. See do_exits() in parse1.cpp. + if (is_field) { + set_wrote_volatile(true); + } } // If the field is final, the rules of Java say we are in or . diff -r c668f307a4c0 -r 3514ee402842 src/share/vm/prims/unsafe.cpp --- a/src/share/vm/prims/unsafe.cpp Tue Jan 07 17:24:59 2014 +0100 +++ b/src/share/vm/prims/unsafe.cpp Thu Jan 16 14:25:51 2014 +0100 @@ -162,6 +162,9 @@ #define GET_FIELD_VOLATILE(obj, offset, type_name, v) \ oop p = JNIHandles::resolve(obj); \ + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { \ + OrderAccess::fence(); \ + } \ volatile type_name v = OrderAccess::load_acquire((volatile type_name*)index_oop_from_field_offset_long(p, offset)); #define SET_FIELD_VOLATILE(obj, offset, type_name, x) \ diff -r c668f307a4c0 -r 3514ee402842 src/share/vm/utilities/globalDefinitions.hpp --- a/src/share/vm/utilities/globalDefinitions.hpp Tue Jan 07 17:24:59 2014 +0100 +++ b/src/share/vm/utilities/globalDefinitions.hpp Thu Jan 16 14:25:51 2014 +0100 @@ -398,6 +398,17 @@ #define PLATFORM_NATIVE_STACK_WALKING_SUPPORTED 1 #endif +// To assure the IRIW property on processors that are not multiple copy +// atomic, sync instructions must be issued between volatile reads to +// assure their ordering, instead of after volatile stores. +// (See "A Tutorial Introduction to the ARM and POWER Relaxed Memory Models" +// by Luc Maranget, Susmit Sarkar and Peter Sewell, INRIA/Cambridge) +#ifdef CPU_NOT_MULTIPLE_COPY_ATOMIC +const bool support_IRIW_for_not_multiple_copy_atomic_cpu = true; +#else +const bool support_IRIW_for_not_multiple_copy_atomic_cpu = false; +#endif + // The byte alignment to be used by Arena::Amalloc. See bugid 4169348. // Note: this value must be a power of 2