# HG changeset patch
# User Tom Rodriguez <tom.rodriguez@oracle.com>
# Date 1429075207 25200
# Node ID a560c9b81f0f9c63ce366d45657be046aebdfcaf
# Parent  534f0dde281048061acb03abed2761cc7a8002cd
Add suport for oops in vector registers at safepoints

diff -r 534f0dde2810 -r a560c9b81f0f src/cpu/x86/vm/registerMap_x86.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/x86/vm/registerMap_x86.cpp	Tue Apr 14 22:20:07 2015 -0700
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include <runtime/registerMap.hpp>
+#include "vmreg_x86.inline.hpp"
+
+address RegisterMap::pd_location(VMReg reg) const {
+  if (reg->is_XMMRegister()) {
+    int regBase = reg->value() - ConcreteRegisterImpl::max_fpr;
+    if (regBase % 4 == 0) {
+      // Reads of the low and high 16 byte parts should be handled by location itself
+      return NULL;
+    }
+    VMReg baseReg = as_XMMRegister(regBase >> 3)->as_VMReg();
+    intptr_t offset = (reg->value() - baseReg->value()) * 4;
+    if (offset >= 16) {
+      // The high part of YMM registers are saved in a their own area in the frame
+      baseReg = baseReg->next()->next()->next()->next();
+      offset -= 16;
+    }
+    address baseLocation = location(baseReg);
+    if (baseLocation != NULL) {
+      return baseLocation + offset;
+    }
+  }
+  return NULL;
+}
diff -r 534f0dde2810 -r a560c9b81f0f src/cpu/x86/vm/registerMap_x86.hpp
--- a/src/cpu/x86/vm/registerMap_x86.hpp	Tue Apr 14 17:26:29 2015 -0700
+++ b/src/cpu/x86/vm/registerMap_x86.hpp	Tue Apr 14 22:20:07 2015 -0700
@@ -31,11 +31,7 @@
  private:
   // This is the hook for finding a register in an "well-known" location,
   // such as a register block of a predetermined format.
-  // Since there is none, we just return NULL.
-  // See registerMap_sparc.hpp for an example of grabbing registers
-  // from register save areas of a standard layout.
-   address pd_location(VMReg reg) const {return NULL;}
-
+  address pd_location(VMReg reg) const;
   // no PD state to clear or copy:
   void pd_clear() {}
   void pd_initialize() {}
diff -r 534f0dde2810 -r a560c9b81f0f src/cpu/x86/vm/register_x86.cpp
--- a/src/cpu/x86/vm/register_x86.cpp	Tue Apr 14 17:26:29 2015 -0700
+++ b/src/cpu/x86/vm/register_x86.cpp	Tue Apr 14 22:20:07 2015 -0700
@@ -64,3 +64,28 @@
   };
   return is_valid() ? names[encoding()] : "xnoreg";
 }
+
+const char* XMMRegisterImpl::sub_word_name(int i) const {
+  const char* names[number_of_registers * 8] = {
+      "xmm0:0", "xmm0:1", "xmm0:2", "xmm0:3", "xmm0:4", "xmm0:5", "xmm0:6", "xmm0:7",
+      "xmm1:0", "xmm1:1", "xmm1:2", "xmm1:3", "xmm1:4", "xmm1:5", "xmm1:6", "xmm1:7",
+      "xmm2:0", "xmm2:1", "xmm2:2", "xmm2:3", "xmm2:4", "xmm2:5", "xmm2:6", "xmm2:7",
+      "xmm3:0", "xmm3:1", "xmm3:2", "xmm3:3", "xmm3:4", "xmm3:5", "xmm3:6", "xmm3:7",
+      "xmm4:0", "xmm4:1", "xmm4:2", "xmm4:3", "xmm4:4", "xmm4:5", "xmm4:6", "xmm4:7",
+      "xmm5:0", "xmm5:1", "xmm5:2", "xmm5:3", "xmm5:4", "xmm5:5", "xmm5:6", "xmm5:7",
+      "xmm6:0", "xmm6:1", "xmm6:2", "xmm6:3", "xmm6:4", "xmm6:5", "xmm6:6", "xmm6:7",
+      "xmm7:0", "xmm7:1", "xmm7:2", "xmm7:3", "xmm7:4", "xmm7:5", "xmm7:6", "xmm7:7",
+#ifdef AMD64
+      "xmm8:0", "xmm8:1", "xmm8:2", "xmm8:3", "xmm8:4", "xmm8:5", "xmm8:6", "xmm8:7",
+      "xmm9:0", "xmm9:1", "xmm9:2", "xmm9:3", "xmm9:4", "xmm9:5", "xmm9:6", "xmm9:7",
+      "xmm10:0", "xmm10:1", "xmm10:2", "xmm10:3", "xmm10:4", "xmm10:5", "xmm10:6", "xmm10:7",
+      "xmm11:0", "xmm11:1", "xmm11:2", "xmm11:3", "xmm11:4", "xmm11:5", "xmm11:6", "xmm11:7",
+      "xmm12:0", "xmm12:1", "xmm12:2", "xmm12:3", "xmm12:4", "xmm12:5", "xmm12:6", "xmm12:7",
+      "xmm13:0", "xmm13:1", "xmm13:2", "xmm13:3", "xmm13:4", "xmm13:5", "xmm13:6", "xmm13:7",
+      "xmm14:0", "xmm14:1", "xmm14:2", "xmm14:3", "xmm14:4", "xmm14:5", "xmm14:6", "xmm14:7",
+      "xmm15:0", "xmm15:1", "xmm15:2", "xmm15:3", "xmm15:4", "xmm15:5", "xmm15:6", "xmm15:7",
+#endif // AMD64
+  };
+  assert(i >= 0 && i < 8, "offset too large");
+  return is_valid() ? names[encoding() * 8 + i] : "xnoreg";
+}
diff -r 534f0dde2810 -r a560c9b81f0f src/cpu/x86/vm/register_x86.hpp
--- a/src/cpu/x86/vm/register_x86.hpp	Tue Apr 14 17:26:29 2015 -0700
+++ b/src/cpu/x86/vm/register_x86.hpp	Tue Apr 14 22:20:07 2015 -0700
@@ -161,6 +161,7 @@
   int   encoding() const                          { assert(is_valid(), err_msg("invalid register (%d)", (int)(intptr_t)this )); return (intptr_t)this; }
   bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
   const char* name() const;
+  const char* sub_word_name(int offset) const;
 };
 
 
diff -r 534f0dde2810 -r a560c9b81f0f src/cpu/x86/vm/sharedRuntime_x86_64.cpp
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Tue Apr 14 17:26:29 2015 -0700
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Tue Apr 14 22:20:07 2015 -0700
@@ -70,9 +70,32 @@
   // Capture info about frame layout.  Layout offsets are in jint
   // units because compiler frame slots are jints.
 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
+#define DEF_YMM_HI_OFFS(regnum) ymm_hi ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt
   enum layout {
     fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
-    xmm_off       = fpu_state_off + 160/BytesPerInt,            // offset in fxsave save area
+#if defined(COMPILER2) || defined(GRAAL)
+    ymm_off       = fpu_state_off,            // offset in fxsave save area
+    DEF_YMM_HI_OFFS(0),
+    DEF_YMM_HI_OFFS(1),
+    DEF_YMM_HI_OFFS(2),
+    DEF_YMM_HI_OFFS(3),
+    DEF_YMM_HI_OFFS(4),
+    DEF_YMM_HI_OFFS(5),
+    DEF_YMM_HI_OFFS(6),
+    DEF_YMM_HI_OFFS(7),
+    DEF_YMM_HI_OFFS(8),
+    DEF_YMM_HI_OFFS(9),
+    DEF_YMM_HI_OFFS(10),
+    DEF_YMM_HI_OFFS(11),
+    DEF_YMM_HI_OFFS(12),
+    DEF_YMM_HI_OFFS(13),
+    DEF_YMM_HI_OFFS(14),
+    DEF_YMM_HI_OFFS(15),
+    ymm_hi_save_size = 16 * 16 / BytesPerInt,
+#else
+    ymm_hi_save_size = 0,
+#endif
+    xmm_off       = fpu_state_off + 160/BytesPerInt + ymm_hi_save_size,            // offset in fxsave save area
     DEF_XMM_OFFS(0),
     DEF_XMM_OFFS(1),
     DEF_XMM_OFFS(2),
@@ -89,7 +112,7 @@
     DEF_XMM_OFFS(13),
     DEF_XMM_OFFS(14),
     DEF_XMM_OFFS(15),
-    fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt),
+    fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt) + ymm_hi_save_size,
     fpu_stateH_end,
     r15_off, r15H_off,
     r14_off, r14H_off,
@@ -139,19 +162,6 @@
 };
 
 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
-  int vect_words = 0;
-#ifdef COMPILER2
-  if (save_vectors) {
-    assert(UseAVX > 0, "256bit vectors are supported only with AVX");
-    assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
-    // Save upper half of YMM registes
-    vect_words = 16 * 16 / wordSize;
-    additional_frame_words += vect_words;
-  }
-#else
-  assert(!save_vectors, "vectors are generated only by C2");
-#endif
-
   // Always make the frame size 16-byte aligned
   int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
                                      reg_save_size*BytesPerInt, 16);
@@ -172,26 +182,34 @@
   __ enter();          // rsp becomes 16-byte aligned here
   __ push_CPU_state(); // Push a multiple of 16 bytes
 
-  if (vect_words > 0) {
-    assert(vect_words*wordSize == 256, "");
-    __ subptr(rsp, 256); // Save upper half of YMM registes
-    __ vextractf128h(Address(rsp,  0),xmm0);
-    __ vextractf128h(Address(rsp, 16),xmm1);
-    __ vextractf128h(Address(rsp, 32),xmm2);
-    __ vextractf128h(Address(rsp, 48),xmm3);
-    __ vextractf128h(Address(rsp, 64),xmm4);
-    __ vextractf128h(Address(rsp, 80),xmm5);
-    __ vextractf128h(Address(rsp, 96),xmm6);
-    __ vextractf128h(Address(rsp,112),xmm7);
-    __ vextractf128h(Address(rsp,128),xmm8);
-    __ vextractf128h(Address(rsp,144),xmm9);
-    __ vextractf128h(Address(rsp,160),xmm10);
-    __ vextractf128h(Address(rsp,176),xmm11);
-    __ vextractf128h(Address(rsp,192),xmm12);
-    __ vextractf128h(Address(rsp,208),xmm13);
-    __ vextractf128h(Address(rsp,224),xmm14);
-    __ vextractf128h(Address(rsp,240),xmm15);
+#if defined(COMPILER2) || defined(GRAAL)
+  __ subptr(rsp, 256); // Save upper half of YMM registers
+  if (save_vectors) {
+    assert(UseAVX > 0, "256bit vectors are supported only with AVX");
+#ifdef COMPILER2
+    assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
+#endif
+    // Save upper half of YMM registers
+    __ vextractf128h(Address(rsp, ymm_hi0_off * BytesPerInt),  xmm0);
+    __ vextractf128h(Address(rsp, ymm_hi1_off * BytesPerInt),  xmm1);
+    __ vextractf128h(Address(rsp, ymm_hi2_off * BytesPerInt),  xmm2);
+    __ vextractf128h(Address(rsp, ymm_hi3_off * BytesPerInt),  xmm3);
+    __ vextractf128h(Address(rsp, ymm_hi4_off * BytesPerInt),  xmm4);
+    __ vextractf128h(Address(rsp, ymm_hi5_off * BytesPerInt),  xmm5);
+    __ vextractf128h(Address(rsp, ymm_hi6_off * BytesPerInt),  xmm6);
+    __ vextractf128h(Address(rsp, ymm_hi7_off * BytesPerInt),  xmm7);
+    __ vextractf128h(Address(rsp, ymm_hi8_off * BytesPerInt),  xmm8);
+    __ vextractf128h(Address(rsp, ymm_hi9_off * BytesPerInt),  xmm9);
+    __ vextractf128h(Address(rsp, ymm_hi10_off * BytesPerInt), xmm10);
+    __ vextractf128h(Address(rsp, ymm_hi11_off * BytesPerInt), xmm11);
+    __ vextractf128h(Address(rsp, ymm_hi12_off * BytesPerInt), xmm12);
+    __ vextractf128h(Address(rsp, ymm_hi13_off * BytesPerInt), xmm13);
+    __ vextractf128h(Address(rsp, ymm_hi14_off * BytesPerInt), xmm14);
+    __ vextractf128h(Address(rsp, ymm_hi15_off * BytesPerInt), xmm15);
   }
+#else
+  assert(!save_vectors, "vectors are generated only by C2 and Graal");
+#endif
   if (frame::arg_reg_save_area_bytes != 0) {
     // Allocate argument register save area
     __ subptr(rsp, frame::arg_reg_save_area_bytes);
@@ -240,6 +258,28 @@
   map->set_callee_saved(STACK_OFFSET(xmm14_off), xmm14->as_VMReg());
   map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg());
 
+
+#if defined(COMPILER2) || defined(GRAAL)
+  if (save_vectors) {
+    map->set_callee_saved(STACK_OFFSET(ymm_hi0_off ), xmm0->as_VMReg()->next()->next()->next()->next());
+    map->set_callee_saved(STACK_OFFSET(ymm_hi1_off ), xmm1->as_VMReg()->next()->next()->next()->next());
+    map->set_callee_saved(STACK_OFFSET(ymm_hi2_off ), xmm2->as_VMReg()->next()->next()->next()->next());
+    map->set_callee_saved(STACK_OFFSET(ymm_hi3_off ), xmm3->as_VMReg()->next()->next()->next()->next());
+    map->set_callee_saved(STACK_OFFSET(ymm_hi4_off ), xmm4->as_VMReg()->next()->next()->next()->next());
+    map->set_callee_saved(STACK_OFFSET(ymm_hi5_off ), xmm5->as_VMReg()->next()->next()->next()->next());
+    map->set_callee_saved(STACK_OFFSET(ymm_hi6_off ), xmm6->as_VMReg()->next()->next()->next()->next());
+    map->set_callee_saved(STACK_OFFSET(ymm_hi7_off ), xmm7->as_VMReg()->next()->next()->next()->next());
+    map->set_callee_saved(STACK_OFFSET(ymm_hi8_off ), xmm8->as_VMReg()->next()->next()->next()->next());
+    map->set_callee_saved(STACK_OFFSET(ymm_hi9_off ), xmm9->as_VMReg()->next()->next()->next()->next());
+    map->set_callee_saved(STACK_OFFSET(ymm_hi10_off), xmm10->as_VMReg()->next()->next()->next()->next());
+    map->set_callee_saved(STACK_OFFSET(ymm_hi11_off), xmm11->as_VMReg()->next()->next()->next()->next());
+    map->set_callee_saved(STACK_OFFSET(ymm_hi12_off), xmm12->as_VMReg()->next()->next()->next()->next());
+    map->set_callee_saved(STACK_OFFSET(ymm_hi13_off), xmm13->as_VMReg()->next()->next()->next()->next());
+    map->set_callee_saved(STACK_OFFSET(ymm_hi14_off), xmm14->as_VMReg()->next()->next()->next()->next());
+    map->set_callee_saved(STACK_OFFSET(ymm_hi15_off), xmm15->as_VMReg()->next()->next()->next()->next());
+  }
+#endif
+
   // %%% These should all be a waste but we'll keep things as they were for now
   if (true) {
     map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next());
@@ -283,31 +323,33 @@
     // Pop arg register save area
     __ addptr(rsp, frame::arg_reg_save_area_bytes);
   }
-#ifdef COMPILER2
+#if defined(COMPILER2) || defined(GRAAL)
   if (restore_vectors) {
     // Restore upper half of YMM registes.
     assert(UseAVX > 0, "256bit vectors are supported only with AVX");
+#if defined(COMPILER2)
     assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
-    __ vinsertf128h(xmm0, Address(rsp,  0));
-    __ vinsertf128h(xmm1, Address(rsp, 16));
-    __ vinsertf128h(xmm2, Address(rsp, 32));
-    __ vinsertf128h(xmm3, Address(rsp, 48));
-    __ vinsertf128h(xmm4, Address(rsp, 64));
-    __ vinsertf128h(xmm5, Address(rsp, 80));
-    __ vinsertf128h(xmm6, Address(rsp, 96));
-    __ vinsertf128h(xmm7, Address(rsp,112));
-    __ vinsertf128h(xmm8, Address(rsp,128));
-    __ vinsertf128h(xmm9, Address(rsp,144));
-    __ vinsertf128h(xmm10, Address(rsp,160));
-    __ vinsertf128h(xmm11, Address(rsp,176));
-    __ vinsertf128h(xmm12, Address(rsp,192));
-    __ vinsertf128h(xmm13, Address(rsp,208));
-    __ vinsertf128h(xmm14, Address(rsp,224));
-    __ vinsertf128h(xmm15, Address(rsp,240));
-    __ addptr(rsp, 256);
+#endif
+    __ vinsertf128h(xmm0,  Address(rsp, ymm_hi0_off * BytesPerInt));
+    __ vinsertf128h(xmm1,  Address(rsp, ymm_hi1_off * BytesPerInt));
+    __ vinsertf128h(xmm2,  Address(rsp, ymm_hi2_off * BytesPerInt));
+    __ vinsertf128h(xmm3,  Address(rsp, ymm_hi3_off * BytesPerInt));
+    __ vinsertf128h(xmm4,  Address(rsp, ymm_hi4_off * BytesPerInt));
+    __ vinsertf128h(xmm5,  Address(rsp, ymm_hi5_off * BytesPerInt));
+    __ vinsertf128h(xmm6,  Address(rsp, ymm_hi6_off * BytesPerInt));
+    __ vinsertf128h(xmm7,  Address(rsp, ymm_hi7_off * BytesPerInt));
+    __ vinsertf128h(xmm8,  Address(rsp, ymm_hi8_off * BytesPerInt));
+    __ vinsertf128h(xmm9,  Address(rsp, ymm_hi9_off * BytesPerInt));
+    __ vinsertf128h(xmm10, Address(rsp, ymm_hi10_off * BytesPerInt));
+    __ vinsertf128h(xmm11, Address(rsp, ymm_hi11_off * BytesPerInt));
+    __ vinsertf128h(xmm12, Address(rsp, ymm_hi12_off * BytesPerInt));
+    __ vinsertf128h(xmm13, Address(rsp, ymm_hi13_off * BytesPerInt));
+    __ vinsertf128h(xmm14, Address(rsp, ymm_hi14_off * BytesPerInt));
+    __ vinsertf128h(xmm15, Address(rsp, ymm_hi15_off * BytesPerInt));
   }
+  __ addptr(rsp, 256);
 #else
-  assert(!restore_vectors, "vectors are generated only by C2");
+  assert(!restore_vectors, "vectors are generated only by C2 and Graal");
 #endif
   // Recover CPU state
   __ pop_CPU_state();
diff -r 534f0dde2810 -r a560c9b81f0f src/cpu/x86/vm/vmreg_x86.cpp
--- a/src/cpu/x86/vm/vmreg_x86.cpp	Tue Apr 14 17:26:29 2015 -0700
+++ b/src/cpu/x86/vm/vmreg_x86.cpp	Tue Apr 14 22:20:07 2015 -0700
@@ -48,8 +48,9 @@
 
   XMMRegister xreg = ::as_XMMRegister(0);
   for ( ; i < ConcreteRegisterImpl::max_xmm ; ) {
-    for (int j = 0 ; j < 8 ; j++) {
-      regName[i++] = xreg->name();
+    regName[i++] = xreg->name();
+    for (int j = 1 ; j < 8 ; j++) {
+      regName[i++] = xreg->sub_word_name(j);
     }
     xreg = xreg->successor();
   }
diff -r 534f0dde2810 -r a560c9b81f0f src/share/vm/graal/graalEnv.cpp
--- a/src/share/vm/graal/graalEnv.cpp	Tue Apr 14 17:26:29 2015 -0700
+++ b/src/share/vm/graal/graalEnv.cpp	Tue Apr 14 22:20:07 2015 -0700
@@ -546,6 +546,12 @@
         }
       } else {
         nm->set_has_unsafe_access(has_unsafe_access);
+#ifdef TARGET_ARCH_x86
+        // It might be preferable to set this only for methods which
+        // use vector instructions but we currently don't track this
+        // and it probably wouldn't make much difference.
+        nm->set_has_wide_vectors(UseAVX >= 2);
+#endif
 
         // Record successful registration.
         // (Put nm into the task handle *before* publishing to the Java heap.)
diff -r 534f0dde2810 -r a560c9b81f0f src/share/vm/runtime/sharedRuntime.cpp
--- a/src/share/vm/runtime/sharedRuntime.cpp	Tue Apr 14 17:26:29 2015 -0700
+++ b/src/share/vm/runtime/sharedRuntime.cpp	Tue Apr 14 22:20:07 2015 -0700
@@ -111,9 +111,14 @@
   _resolve_virtual_call_blob           = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_virtual_call_C),       "resolve_virtual_call");
   _resolve_static_call_blob            = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_static_call_C),        "resolve_static_call");
 
+#if defined(COMPILER2) || defined(GRAAL)
+  // Vectors are generated only by C2 and Graal.
 #ifdef COMPILER2
-  // Vectors are generated only by C2.
-  if (is_wide_vector(MaxVectorSize)) {
+  bool support_wide = is_wide_vector(MaxVectorSize) || IS_GRAAL_DEFINED;
+#else
+  bool support_wide = true;
+#endif
+  if (support_wide) {
     _polling_page_vectors_safepoint_handler_blob = generate_handler_blob(CAST_FROM_FN_PTR(address, SafepointSynchronize::handle_polling_page_exception), POLL_AT_VECTOR_LOOP);
   }
 #endif // COMPILER2