# HG changeset patch # User kvn # Date 1340143976 25200 # Node ID 6f8f439e247d0e8822135e11699caa56305d7b0e # Parent 765ee2d1674b4d6881b05b6347de47824d880142 7177923: SIGBUS on sparc in compiled code for java.util.Calendar.clear() Summary: disable vectorization of a memory access with more elements per vector than one which is used for alignment on sparc Reviewed-by: twisti diff -r 765ee2d1674b -r 6f8f439e247d src/cpu/x86/vm/x86.ad --- a/src/cpu/x86/vm/x86.ad Mon Jun 18 15:17:30 2012 -0700 +++ b/src/cpu/x86/vm/x86.ad Tue Jun 19 15:12:56 2012 -0700 @@ -2061,7 +2061,7 @@ // Integer could be loaded into xmm register directly from memory. instruct Repl2I_mem(vecD dst, memory mem) %{ predicate(n->as_Vector()->length() == 2); - match(Set dst (ReplicateI mem)); + match(Set dst (ReplicateI (LoadVector mem))); format %{ "movd $dst,$mem\n\t" "pshufd $dst,$dst,0x00\t! replicate2I" %} ins_encode %{ @@ -2073,7 +2073,7 @@ instruct Repl4I_mem(vecX dst, memory mem) %{ predicate(n->as_Vector()->length() == 4); - match(Set dst (ReplicateI mem)); + match(Set dst (ReplicateI (LoadVector mem))); format %{ "movd $dst,$mem\n\t" "pshufd $dst,$dst,0x00\t! replicate4I" %} ins_encode %{ @@ -2085,7 +2085,7 @@ instruct Repl8I_mem(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 8); - match(Set dst (ReplicateI mem)); + match(Set dst (ReplicateI (LoadVector mem))); format %{ "movd $dst,$mem\n\t" "pshufd $dst,$dst,0x00\n\t" "vinsertf128h $dst,$dst,$dst\t! replicate8I" %} @@ -2225,7 +2225,7 @@ // Long could be loaded into xmm register directly from memory. instruct Repl2L_mem(vecX dst, memory mem) %{ predicate(n->as_Vector()->length() == 2); - match(Set dst (ReplicateL mem)); + match(Set dst (ReplicateL (LoadVector mem))); format %{ "movq $dst,$mem\n\t" "movlhps $dst,$dst\t! replicate2L" %} ins_encode %{ @@ -2237,7 +2237,7 @@ instruct Repl4L_mem(vecY dst, memory mem) %{ predicate(n->as_Vector()->length() == 4); - match(Set dst (ReplicateL mem)); + match(Set dst (ReplicateL (LoadVector mem))); format %{ "movq $dst,$mem\n\t" "movlhps $dst,$dst\n\t" "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} diff -r 765ee2d1674b -r 6f8f439e247d src/share/vm/opto/c2_globals.hpp --- a/src/share/vm/opto/c2_globals.hpp Mon Jun 18 15:17:30 2012 -0700 +++ b/src/share/vm/opto/c2_globals.hpp Tue Jun 19 15:12:56 2012 -0700 @@ -299,9 +299,12 @@ develop(bool, SuperWordRTDepCheck, false, \ "Enable runtime dependency checks.") \ \ - product(bool, TraceSuperWord, false, \ + notproduct(bool, TraceSuperWord, false, \ "Trace superword transforms") \ \ + notproduct(bool, TraceNewVectors, false, \ + "Trace creation of Vector nodes") \ + \ product_pd(bool, OptoBundling, \ "Generate nops to fill i-cache lines") \ \ diff -r 765ee2d1674b -r 6f8f439e247d src/share/vm/opto/superword.cpp --- a/src/share/vm/opto/superword.cpp Mon Jun 18 15:17:30 2012 -0700 +++ b/src/share/vm/opto/superword.cpp Tue Jun 19 15:12:56 2012 -0700 @@ -222,7 +222,18 @@ // Create initial pack pairs of memory operations for which // alignment is set and vectors will be aligned. bool create_pack = true; - if (memory_alignment(mem_ref, best_iv_adjustment) != 0) { + if (memory_alignment(mem_ref, best_iv_adjustment) == 0) { + if (!Matcher::misaligned_vectors_ok()) { + int vw = vector_width(mem_ref); + int vw_best = vector_width(best_align_to_mem_ref); + if (vw > vw_best) { + // Do not vectorize a memory access with more elements per vector + // if unaligned memory access is not allowed because number of + // iterations in pre-loop will be not enough to align it. + create_pack = false; + } + } + } else { if (same_velt_type(mem_ref, best_align_to_mem_ref)) { // Can't allow vectorization of unaligned memory accesses with the // same type since it could be overlapped accesses to the same array. @@ -357,7 +368,7 @@ for (uint j = 0; j < memops.size(); j++) { MemNode* s = memops.at(j)->as_Mem(); if (s->is_Store()) { - int vw = vector_width_in_bytes(velt_basic_type(s)); + int vw = vector_width_in_bytes(s); assert(vw > 1, "sanity"); SWPointer p(s, this); if (cmp_ct.at(j) > max_ct || @@ -380,7 +391,7 @@ for (uint j = 0; j < memops.size(); j++) { MemNode* s = memops.at(j)->as_Mem(); if (s->is_Load()) { - int vw = vector_width_in_bytes(velt_basic_type(s)); + int vw = vector_width_in_bytes(s); assert(vw > 1, "sanity"); SWPointer p(s, this); if (cmp_ct.at(j) > max_ct || @@ -440,8 +451,7 @@ // If initial offset from start of object is computable, // compute alignment within the vector. - BasicType bt = velt_basic_type(p.mem()); - int vw = vector_width_in_bytes(bt); + int vw = vector_width_in_bytes(p.mem()); assert(vw > 1, "sanity"); if (vw % span == 0) { Node* init_nd = pre_end->init_trip(); @@ -468,8 +478,7 @@ SWPointer align_to_ref_p(mem_ref, this); int offset = align_to_ref_p.offset_in_bytes(); int scale = align_to_ref_p.scale_in_bytes(); - BasicType bt = velt_basic_type(mem_ref); - int vw = vector_width_in_bytes(bt); + int vw = vector_width_in_bytes(mem_ref); assert(vw > 1, "sanity"); int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw; @@ -1361,7 +1370,7 @@ } _igvn._worklist.push(vn); #ifdef ASSERT - if (TraceSuperWord) { + if (TraceNewVectors) { tty->print("new Vector node: "); vn->dump(); } @@ -1401,7 +1410,7 @@ _phase->_igvn.register_new_node_with_optimizer(vn); _phase->set_ctrl(vn, _phase->get_ctrl(opd)); #ifdef ASSERT - if (TraceSuperWord) { + if (TraceNewVectors) { tty->print("new Vector node: "); vn->dump(); } @@ -1424,8 +1433,8 @@ _phase->_igvn.register_new_node_with_optimizer(pk); _phase->set_ctrl(pk, _phase->get_ctrl(opd)); #ifdef ASSERT - if (TraceSuperWord) { - tty->print("new Pack node: "); + if (TraceNewVectors) { + tty->print("new Vector node: "); pk->dump(); } #endif @@ -1764,7 +1773,7 @@ if (!p.valid()) { return bottom_align; } - int vw = vector_width_in_bytes(velt_basic_type(s)); + int vw = vector_width_in_bytes(s); if (vw < 2) { return bottom_align; // No vectors for this type } @@ -1978,12 +1987,12 @@ // N = (V - (e - lim0)) % V // lim = lim0 - (V - (e - lim0)) % V - int vw = vector_width_in_bytes(velt_basic_type(align_to_ref)); - assert(vw > 1, "sanity"); + int vw = vector_width_in_bytes(align_to_ref); int stride = iv_stride(); int scale = align_to_ref_p.scale_in_bytes(); int elt_size = align_to_ref_p.memory_size(); int v_align = vw / elt_size; + assert(v_align > 1, "sanity"); int k = align_to_ref_p.offset_in_bytes() / elt_size; Node *kn = _igvn.intcon(k); diff -r 765ee2d1674b -r 6f8f439e247d src/share/vm/opto/superword.hpp --- a/src/share/vm/opto/superword.hpp Mon Jun 18 15:17:30 2012 -0700 +++ b/src/share/vm/opto/superword.hpp Tue Jun 19 15:12:56 2012 -0700 @@ -264,11 +264,14 @@ _iv = lp->as_CountedLoop()->phi()->as_Phi(); } int iv_stride() { return lp()->as_CountedLoop()->stride_con(); } - int vector_width_in_bytes(BasicType bt) { - return MIN2(ABS(iv_stride())*type2aelembytes(bt), - Matcher::vector_width_in_bytes(bt)); + int vector_width(Node* n) { + BasicType bt = velt_basic_type(n); + return MIN2(ABS(iv_stride()), Matcher::max_vector_size(bt)); } - + int vector_width_in_bytes(Node* n) { + BasicType bt = velt_basic_type(n); + return vector_width(n)*type2aelembytes(bt); + } MemNode* align_to_ref() { return _align_to_ref; } void set_align_to_ref(MemNode* m) { _align_to_ref = m; }