# HG changeset patch # User Tom Rodriguez # Date 1429075207 25200 # Node ID a560c9b81f0f9c63ce366d45657be046aebdfcaf # Parent 534f0dde281048061acb03abed2761cc7a8002cd Add suport for oops in vector registers at safepoints diff -r 534f0dde2810 -r a560c9b81f0f src/cpu/x86/vm/registerMap_x86.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/x86/vm/registerMap_x86.cpp Tue Apr 14 22:20:07 2015 -0700 @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include +#include "vmreg_x86.inline.hpp" + +address RegisterMap::pd_location(VMReg reg) const { + if (reg->is_XMMRegister()) { + int regBase = reg->value() - ConcreteRegisterImpl::max_fpr; + if (regBase % 4 == 0) { + // Reads of the low and high 16 byte parts should be handled by location itself + return NULL; + } + VMReg baseReg = as_XMMRegister(regBase >> 3)->as_VMReg(); + intptr_t offset = (reg->value() - baseReg->value()) * 4; + if (offset >= 16) { + // The high part of YMM registers are saved in a their own area in the frame + baseReg = baseReg->next()->next()->next()->next(); + offset -= 16; + } + address baseLocation = location(baseReg); + if (baseLocation != NULL) { + return baseLocation + offset; + } + } + return NULL; +} diff -r 534f0dde2810 -r a560c9b81f0f src/cpu/x86/vm/registerMap_x86.hpp --- a/src/cpu/x86/vm/registerMap_x86.hpp Tue Apr 14 17:26:29 2015 -0700 +++ b/src/cpu/x86/vm/registerMap_x86.hpp Tue Apr 14 22:20:07 2015 -0700 @@ -31,11 +31,7 @@ private: // This is the hook for finding a register in an "well-known" location, // such as a register block of a predetermined format. - // Since there is none, we just return NULL. - // See registerMap_sparc.hpp for an example of grabbing registers - // from register save areas of a standard layout. - address pd_location(VMReg reg) const {return NULL;} - + address pd_location(VMReg reg) const; // no PD state to clear or copy: void pd_clear() {} void pd_initialize() {} diff -r 534f0dde2810 -r a560c9b81f0f src/cpu/x86/vm/register_x86.cpp --- a/src/cpu/x86/vm/register_x86.cpp Tue Apr 14 17:26:29 2015 -0700 +++ b/src/cpu/x86/vm/register_x86.cpp Tue Apr 14 22:20:07 2015 -0700 @@ -64,3 +64,28 @@ }; return is_valid() ? names[encoding()] : "xnoreg"; } + +const char* XMMRegisterImpl::sub_word_name(int i) const { + const char* names[number_of_registers * 8] = { + "xmm0:0", "xmm0:1", "xmm0:2", "xmm0:3", "xmm0:4", "xmm0:5", "xmm0:6", "xmm0:7", + "xmm1:0", "xmm1:1", "xmm1:2", "xmm1:3", "xmm1:4", "xmm1:5", "xmm1:6", "xmm1:7", + "xmm2:0", "xmm2:1", "xmm2:2", "xmm2:3", "xmm2:4", "xmm2:5", "xmm2:6", "xmm2:7", + "xmm3:0", "xmm3:1", "xmm3:2", "xmm3:3", "xmm3:4", "xmm3:5", "xmm3:6", "xmm3:7", + "xmm4:0", "xmm4:1", "xmm4:2", "xmm4:3", "xmm4:4", "xmm4:5", "xmm4:6", "xmm4:7", + "xmm5:0", "xmm5:1", "xmm5:2", "xmm5:3", "xmm5:4", "xmm5:5", "xmm5:6", "xmm5:7", + "xmm6:0", "xmm6:1", "xmm6:2", "xmm6:3", "xmm6:4", "xmm6:5", "xmm6:6", "xmm6:7", + "xmm7:0", "xmm7:1", "xmm7:2", "xmm7:3", "xmm7:4", "xmm7:5", "xmm7:6", "xmm7:7", +#ifdef AMD64 + "xmm8:0", "xmm8:1", "xmm8:2", "xmm8:3", "xmm8:4", "xmm8:5", "xmm8:6", "xmm8:7", + "xmm9:0", "xmm9:1", "xmm9:2", "xmm9:3", "xmm9:4", "xmm9:5", "xmm9:6", "xmm9:7", + "xmm10:0", "xmm10:1", "xmm10:2", "xmm10:3", "xmm10:4", "xmm10:5", "xmm10:6", "xmm10:7", + "xmm11:0", "xmm11:1", "xmm11:2", "xmm11:3", "xmm11:4", "xmm11:5", "xmm11:6", "xmm11:7", + "xmm12:0", "xmm12:1", "xmm12:2", "xmm12:3", "xmm12:4", "xmm12:5", "xmm12:6", "xmm12:7", + "xmm13:0", "xmm13:1", "xmm13:2", "xmm13:3", "xmm13:4", "xmm13:5", "xmm13:6", "xmm13:7", + "xmm14:0", "xmm14:1", "xmm14:2", "xmm14:3", "xmm14:4", "xmm14:5", "xmm14:6", "xmm14:7", + "xmm15:0", "xmm15:1", "xmm15:2", "xmm15:3", "xmm15:4", "xmm15:5", "xmm15:6", "xmm15:7", +#endif // AMD64 + }; + assert(i >= 0 && i < 8, "offset too large"); + return is_valid() ? names[encoding() * 8 + i] : "xnoreg"; +} diff -r 534f0dde2810 -r a560c9b81f0f src/cpu/x86/vm/register_x86.hpp --- a/src/cpu/x86/vm/register_x86.hpp Tue Apr 14 17:26:29 2015 -0700 +++ b/src/cpu/x86/vm/register_x86.hpp Tue Apr 14 22:20:07 2015 -0700 @@ -161,6 +161,7 @@ int encoding() const { assert(is_valid(), err_msg("invalid register (%d)", (int)(intptr_t)this )); return (intptr_t)this; } bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } const char* name() const; + const char* sub_word_name(int offset) const; }; diff -r 534f0dde2810 -r a560c9b81f0f src/cpu/x86/vm/sharedRuntime_x86_64.cpp --- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Tue Apr 14 17:26:29 2015 -0700 +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Tue Apr 14 22:20:07 2015 -0700 @@ -70,9 +70,32 @@ // Capture info about frame layout. Layout offsets are in jint // units because compiler frame slots are jints. #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off +#define DEF_YMM_HI_OFFS(regnum) ymm_hi ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt enum layout { fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area - xmm_off = fpu_state_off + 160/BytesPerInt, // offset in fxsave save area +#if defined(COMPILER2) || defined(GRAAL) + ymm_off = fpu_state_off, // offset in fxsave save area + DEF_YMM_HI_OFFS(0), + DEF_YMM_HI_OFFS(1), + DEF_YMM_HI_OFFS(2), + DEF_YMM_HI_OFFS(3), + DEF_YMM_HI_OFFS(4), + DEF_YMM_HI_OFFS(5), + DEF_YMM_HI_OFFS(6), + DEF_YMM_HI_OFFS(7), + DEF_YMM_HI_OFFS(8), + DEF_YMM_HI_OFFS(9), + DEF_YMM_HI_OFFS(10), + DEF_YMM_HI_OFFS(11), + DEF_YMM_HI_OFFS(12), + DEF_YMM_HI_OFFS(13), + DEF_YMM_HI_OFFS(14), + DEF_YMM_HI_OFFS(15), + ymm_hi_save_size = 16 * 16 / BytesPerInt, +#else + ymm_hi_save_size = 0, +#endif + xmm_off = fpu_state_off + 160/BytesPerInt + ymm_hi_save_size, // offset in fxsave save area DEF_XMM_OFFS(0), DEF_XMM_OFFS(1), DEF_XMM_OFFS(2), @@ -89,7 +112,7 @@ DEF_XMM_OFFS(13), DEF_XMM_OFFS(14), DEF_XMM_OFFS(15), - fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt), + fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt) + ymm_hi_save_size, fpu_stateH_end, r15_off, r15H_off, r14_off, r14H_off, @@ -139,19 +162,6 @@ }; OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { - int vect_words = 0; -#ifdef COMPILER2 - if (save_vectors) { - assert(UseAVX > 0, "256bit vectors are supported only with AVX"); - assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); - // Save upper half of YMM registes - vect_words = 16 * 16 / wordSize; - additional_frame_words += vect_words; - } -#else - assert(!save_vectors, "vectors are generated only by C2"); -#endif - // Always make the frame size 16-byte aligned int frame_size_in_bytes = round_to(additional_frame_words*wordSize + reg_save_size*BytesPerInt, 16); @@ -172,26 +182,34 @@ __ enter(); // rsp becomes 16-byte aligned here __ push_CPU_state(); // Push a multiple of 16 bytes - if (vect_words > 0) { - assert(vect_words*wordSize == 256, ""); - __ subptr(rsp, 256); // Save upper half of YMM registes - __ vextractf128h(Address(rsp, 0),xmm0); - __ vextractf128h(Address(rsp, 16),xmm1); - __ vextractf128h(Address(rsp, 32),xmm2); - __ vextractf128h(Address(rsp, 48),xmm3); - __ vextractf128h(Address(rsp, 64),xmm4); - __ vextractf128h(Address(rsp, 80),xmm5); - __ vextractf128h(Address(rsp, 96),xmm6); - __ vextractf128h(Address(rsp,112),xmm7); - __ vextractf128h(Address(rsp,128),xmm8); - __ vextractf128h(Address(rsp,144),xmm9); - __ vextractf128h(Address(rsp,160),xmm10); - __ vextractf128h(Address(rsp,176),xmm11); - __ vextractf128h(Address(rsp,192),xmm12); - __ vextractf128h(Address(rsp,208),xmm13); - __ vextractf128h(Address(rsp,224),xmm14); - __ vextractf128h(Address(rsp,240),xmm15); +#if defined(COMPILER2) || defined(GRAAL) + __ subptr(rsp, 256); // Save upper half of YMM registers + if (save_vectors) { + assert(UseAVX > 0, "256bit vectors are supported only with AVX"); +#ifdef COMPILER2 + assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); +#endif + // Save upper half of YMM registers + __ vextractf128h(Address(rsp, ymm_hi0_off * BytesPerInt), xmm0); + __ vextractf128h(Address(rsp, ymm_hi1_off * BytesPerInt), xmm1); + __ vextractf128h(Address(rsp, ymm_hi2_off * BytesPerInt), xmm2); + __ vextractf128h(Address(rsp, ymm_hi3_off * BytesPerInt), xmm3); + __ vextractf128h(Address(rsp, ymm_hi4_off * BytesPerInt), xmm4); + __ vextractf128h(Address(rsp, ymm_hi5_off * BytesPerInt), xmm5); + __ vextractf128h(Address(rsp, ymm_hi6_off * BytesPerInt), xmm6); + __ vextractf128h(Address(rsp, ymm_hi7_off * BytesPerInt), xmm7); + __ vextractf128h(Address(rsp, ymm_hi8_off * BytesPerInt), xmm8); + __ vextractf128h(Address(rsp, ymm_hi9_off * BytesPerInt), xmm9); + __ vextractf128h(Address(rsp, ymm_hi10_off * BytesPerInt), xmm10); + __ vextractf128h(Address(rsp, ymm_hi11_off * BytesPerInt), xmm11); + __ vextractf128h(Address(rsp, ymm_hi12_off * BytesPerInt), xmm12); + __ vextractf128h(Address(rsp, ymm_hi13_off * BytesPerInt), xmm13); + __ vextractf128h(Address(rsp, ymm_hi14_off * BytesPerInt), xmm14); + __ vextractf128h(Address(rsp, ymm_hi15_off * BytesPerInt), xmm15); } +#else + assert(!save_vectors, "vectors are generated only by C2 and Graal"); +#endif if (frame::arg_reg_save_area_bytes != 0) { // Allocate argument register save area __ subptr(rsp, frame::arg_reg_save_area_bytes); @@ -240,6 +258,28 @@ map->set_callee_saved(STACK_OFFSET(xmm14_off), xmm14->as_VMReg()); map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg()); + +#if defined(COMPILER2) || defined(GRAAL) + if (save_vectors) { + map->set_callee_saved(STACK_OFFSET(ymm_hi0_off ), xmm0->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi1_off ), xmm1->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi2_off ), xmm2->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi3_off ), xmm3->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi4_off ), xmm4->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi5_off ), xmm5->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi6_off ), xmm6->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi7_off ), xmm7->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi8_off ), xmm8->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi9_off ), xmm9->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi10_off), xmm10->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi11_off), xmm11->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi12_off), xmm12->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi13_off), xmm13->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi14_off), xmm14->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi15_off), xmm15->as_VMReg()->next()->next()->next()->next()); + } +#endif + // %%% These should all be a waste but we'll keep things as they were for now if (true) { map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next()); @@ -283,31 +323,33 @@ // Pop arg register save area __ addptr(rsp, frame::arg_reg_save_area_bytes); } -#ifdef COMPILER2 +#if defined(COMPILER2) || defined(GRAAL) if (restore_vectors) { // Restore upper half of YMM registes. assert(UseAVX > 0, "256bit vectors are supported only with AVX"); +#if defined(COMPILER2) assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); - __ vinsertf128h(xmm0, Address(rsp, 0)); - __ vinsertf128h(xmm1, Address(rsp, 16)); - __ vinsertf128h(xmm2, Address(rsp, 32)); - __ vinsertf128h(xmm3, Address(rsp, 48)); - __ vinsertf128h(xmm4, Address(rsp, 64)); - __ vinsertf128h(xmm5, Address(rsp, 80)); - __ vinsertf128h(xmm6, Address(rsp, 96)); - __ vinsertf128h(xmm7, Address(rsp,112)); - __ vinsertf128h(xmm8, Address(rsp,128)); - __ vinsertf128h(xmm9, Address(rsp,144)); - __ vinsertf128h(xmm10, Address(rsp,160)); - __ vinsertf128h(xmm11, Address(rsp,176)); - __ vinsertf128h(xmm12, Address(rsp,192)); - __ vinsertf128h(xmm13, Address(rsp,208)); - __ vinsertf128h(xmm14, Address(rsp,224)); - __ vinsertf128h(xmm15, Address(rsp,240)); - __ addptr(rsp, 256); +#endif + __ vinsertf128h(xmm0, Address(rsp, ymm_hi0_off * BytesPerInt)); + __ vinsertf128h(xmm1, Address(rsp, ymm_hi1_off * BytesPerInt)); + __ vinsertf128h(xmm2, Address(rsp, ymm_hi2_off * BytesPerInt)); + __ vinsertf128h(xmm3, Address(rsp, ymm_hi3_off * BytesPerInt)); + __ vinsertf128h(xmm4, Address(rsp, ymm_hi4_off * BytesPerInt)); + __ vinsertf128h(xmm5, Address(rsp, ymm_hi5_off * BytesPerInt)); + __ vinsertf128h(xmm6, Address(rsp, ymm_hi6_off * BytesPerInt)); + __ vinsertf128h(xmm7, Address(rsp, ymm_hi7_off * BytesPerInt)); + __ vinsertf128h(xmm8, Address(rsp, ymm_hi8_off * BytesPerInt)); + __ vinsertf128h(xmm9, Address(rsp, ymm_hi9_off * BytesPerInt)); + __ vinsertf128h(xmm10, Address(rsp, ymm_hi10_off * BytesPerInt)); + __ vinsertf128h(xmm11, Address(rsp, ymm_hi11_off * BytesPerInt)); + __ vinsertf128h(xmm12, Address(rsp, ymm_hi12_off * BytesPerInt)); + __ vinsertf128h(xmm13, Address(rsp, ymm_hi13_off * BytesPerInt)); + __ vinsertf128h(xmm14, Address(rsp, ymm_hi14_off * BytesPerInt)); + __ vinsertf128h(xmm15, Address(rsp, ymm_hi15_off * BytesPerInt)); } + __ addptr(rsp, 256); #else - assert(!restore_vectors, "vectors are generated only by C2"); + assert(!restore_vectors, "vectors are generated only by C2 and Graal"); #endif // Recover CPU state __ pop_CPU_state(); diff -r 534f0dde2810 -r a560c9b81f0f src/cpu/x86/vm/vmreg_x86.cpp --- a/src/cpu/x86/vm/vmreg_x86.cpp Tue Apr 14 17:26:29 2015 -0700 +++ b/src/cpu/x86/vm/vmreg_x86.cpp Tue Apr 14 22:20:07 2015 -0700 @@ -48,8 +48,9 @@ XMMRegister xreg = ::as_XMMRegister(0); for ( ; i < ConcreteRegisterImpl::max_xmm ; ) { - for (int j = 0 ; j < 8 ; j++) { - regName[i++] = xreg->name(); + regName[i++] = xreg->name(); + for (int j = 1 ; j < 8 ; j++) { + regName[i++] = xreg->sub_word_name(j); } xreg = xreg->successor(); } diff -r 534f0dde2810 -r a560c9b81f0f src/share/vm/graal/graalEnv.cpp --- a/src/share/vm/graal/graalEnv.cpp Tue Apr 14 17:26:29 2015 -0700 +++ b/src/share/vm/graal/graalEnv.cpp Tue Apr 14 22:20:07 2015 -0700 @@ -546,6 +546,12 @@ } } else { nm->set_has_unsafe_access(has_unsafe_access); +#ifdef TARGET_ARCH_x86 + // It might be preferable to set this only for methods which + // use vector instructions but we currently don't track this + // and it probably wouldn't make much difference. + nm->set_has_wide_vectors(UseAVX >= 2); +#endif // Record successful registration. // (Put nm into the task handle *before* publishing to the Java heap.) diff -r 534f0dde2810 -r a560c9b81f0f src/share/vm/runtime/sharedRuntime.cpp --- a/src/share/vm/runtime/sharedRuntime.cpp Tue Apr 14 17:26:29 2015 -0700 +++ b/src/share/vm/runtime/sharedRuntime.cpp Tue Apr 14 22:20:07 2015 -0700 @@ -111,9 +111,14 @@ _resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_virtual_call_C), "resolve_virtual_call"); _resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_static_call_C), "resolve_static_call"); +#if defined(COMPILER2) || defined(GRAAL) + // Vectors are generated only by C2 and Graal. #ifdef COMPILER2 - // Vectors are generated only by C2. - if (is_wide_vector(MaxVectorSize)) { + bool support_wide = is_wide_vector(MaxVectorSize) || IS_GRAAL_DEFINED; +#else + bool support_wide = true; +#endif + if (support_wide) { _polling_page_vectors_safepoint_handler_blob = generate_handler_blob(CAST_FROM_FN_PTR(address, SafepointSynchronize::handle_polling_page_exception), POLL_AT_VECTOR_LOOP); } #endif // COMPILER2