# HG changeset patch # User Thomas Wuerthinger # Date 1433596378 -7200 # Node ID 0dfd3ea90d33fe33772dbeafcbbec340e11c439b # Parent 75daca0c6a0f017d855f7e614d82da3f79372600 Undo changes around saving of ymm registers. diff -r 75daca0c6a0f -r 0dfd3ea90d33 src/cpu/x86/vm/sharedRuntime_x86_64.cpp --- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Fri Jun 05 23:54:02 2015 +0200 +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Sat Jun 06 15:12:58 2015 +0200 @@ -70,9 +70,32 @@ // Capture info about frame layout. Layout offsets are in jint // units because compiler frame slots are jints. #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off +#define DEF_YMM_HI_OFFS(regnum) ymm_hi ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt enum layout { fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area - xmm_off = fpu_state_off + 160/BytesPerInt, // offset in fxsave save area +#if defined(COMPILER2) || defined(JVMCI) + ymm_off = fpu_state_off, // offset in fxsave save area + DEF_YMM_HI_OFFS(0), + DEF_YMM_HI_OFFS(1), + DEF_YMM_HI_OFFS(2), + DEF_YMM_HI_OFFS(3), + DEF_YMM_HI_OFFS(4), + DEF_YMM_HI_OFFS(5), + DEF_YMM_HI_OFFS(6), + DEF_YMM_HI_OFFS(7), + DEF_YMM_HI_OFFS(8), + DEF_YMM_HI_OFFS(9), + DEF_YMM_HI_OFFS(10), + DEF_YMM_HI_OFFS(11), + DEF_YMM_HI_OFFS(12), + DEF_YMM_HI_OFFS(13), + DEF_YMM_HI_OFFS(14), + DEF_YMM_HI_OFFS(15), + ymm_hi_save_size = 16 * 16 / BytesPerInt, +#else + ymm_hi_save_size = 0, +#endif + xmm_off = fpu_state_off + 160/BytesPerInt + ymm_hi_save_size, // offset in fxsave save area DEF_XMM_OFFS(0), DEF_XMM_OFFS(1), DEF_XMM_OFFS(2), @@ -89,7 +112,7 @@ DEF_XMM_OFFS(13), DEF_XMM_OFFS(14), DEF_XMM_OFFS(15), - fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt), + fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt) + ymm_hi_save_size, fpu_stateH_end, r15_off, r15H_off, r14_off, r14H_off, @@ -139,21 +162,6 @@ }; OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { - int vect_words = 0; - int ymmhi_offset = -1; -#if defined(COMPILER2) || defined(JVMCI) - if (save_vectors) { - assert(UseAVX > 0, "256bit vectors are supported only with AVX"); - assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); - // Save upper half of YMM registes - vect_words = 16 * 16 / wordSize; - ymmhi_offset = additional_frame_words; - additional_frame_words += vect_words; - } -#else - assert(!save_vectors, "vectors are generated only by C2 and JVMCI"); -#endif - // Always make the frame size 16-byte aligned int frame_size_in_bytes = round_to(additional_frame_words*wordSize + reg_save_size*BytesPerInt, 16); @@ -174,26 +182,32 @@ __ enter(); // rsp becomes 16-byte aligned here __ push_CPU_state(); // Push a multiple of 16 bytes - if (vect_words > 0) { - assert(vect_words*wordSize == 256, ""); - __ subptr(rsp, 256); // Save upper half of YMM registes - __ vextractf128h(Address(rsp, 0),xmm0); - __ vextractf128h(Address(rsp, 16),xmm1); - __ vextractf128h(Address(rsp, 32),xmm2); - __ vextractf128h(Address(rsp, 48),xmm3); - __ vextractf128h(Address(rsp, 64),xmm4); - __ vextractf128h(Address(rsp, 80),xmm5); - __ vextractf128h(Address(rsp, 96),xmm6); - __ vextractf128h(Address(rsp,112),xmm7); - __ vextractf128h(Address(rsp,128),xmm8); - __ vextractf128h(Address(rsp,144),xmm9); - __ vextractf128h(Address(rsp,160),xmm10); - __ vextractf128h(Address(rsp,176),xmm11); - __ vextractf128h(Address(rsp,192),xmm12); - __ vextractf128h(Address(rsp,208),xmm13); - __ vextractf128h(Address(rsp,224),xmm14); - __ vextractf128h(Address(rsp,240),xmm15); +#if defined(COMPILER2) || defined(JVMCI) + __ subptr(rsp, 256); // Save upper half of YMM registers + if (save_vectors) { + assert(UseAVX > 0, "256bit vectors are supported only with AVX"); + assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); + // Save upper half of YMM registers + __ vextractf128h(Address(rsp, ymm_hi0_off * BytesPerInt), xmm0); + __ vextractf128h(Address(rsp, ymm_hi1_off * BytesPerInt), xmm1); + __ vextractf128h(Address(rsp, ymm_hi2_off * BytesPerInt), xmm2); + __ vextractf128h(Address(rsp, ymm_hi3_off * BytesPerInt), xmm3); + __ vextractf128h(Address(rsp, ymm_hi4_off * BytesPerInt), xmm4); + __ vextractf128h(Address(rsp, ymm_hi5_off * BytesPerInt), xmm5); + __ vextractf128h(Address(rsp, ymm_hi6_off * BytesPerInt), xmm6); + __ vextractf128h(Address(rsp, ymm_hi7_off * BytesPerInt), xmm7); + __ vextractf128h(Address(rsp, ymm_hi8_off * BytesPerInt), xmm8); + __ vextractf128h(Address(rsp, ymm_hi9_off * BytesPerInt), xmm9); + __ vextractf128h(Address(rsp, ymm_hi10_off * BytesPerInt), xmm10); + __ vextractf128h(Address(rsp, ymm_hi11_off * BytesPerInt), xmm11); + __ vextractf128h(Address(rsp, ymm_hi12_off * BytesPerInt), xmm12); + __ vextractf128h(Address(rsp, ymm_hi13_off * BytesPerInt), xmm13); + __ vextractf128h(Address(rsp, ymm_hi14_off * BytesPerInt), xmm14); + __ vextractf128h(Address(rsp, ymm_hi15_off * BytesPerInt), xmm15); } +#else + assert(!save_vectors, "vectors are generated only by C2 and JVMCI"); +#endif if (frame::arg_reg_save_area_bytes != 0) { // Allocate argument register save area __ subptr(rsp, frame::arg_reg_save_area_bytes); @@ -208,7 +222,6 @@ OopMap* map = new OopMap(frame_size_in_slots, 0); #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) -#define YMMHI_STACK_OFFSET(x) VMRegImpl::stack2reg((x) + ymmhi_offset) map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); @@ -246,23 +259,22 @@ #if defined(COMPILER2) || defined(JVMCI) if (save_vectors) { - assert(ymmhi_offset != -1, "save area must exist"); - map->set_callee_saved(YMMHI_STACK_OFFSET(xmm0_off ), xmm0->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(YMMHI_STACK_OFFSET(xmm1_off ), xmm1->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(YMMHI_STACK_OFFSET(xmm2_off ), xmm2->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(YMMHI_STACK_OFFSET(xmm3_off ), xmm3->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(YMMHI_STACK_OFFSET(xmm4_off ), xmm4->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(YMMHI_STACK_OFFSET(xmm5_off ), xmm5->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(YMMHI_STACK_OFFSET(xmm6_off ), xmm6->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(YMMHI_STACK_OFFSET(xmm7_off ), xmm7->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(YMMHI_STACK_OFFSET(xmm8_off ), xmm8->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(YMMHI_STACK_OFFSET(xmm9_off ), xmm9->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(YMMHI_STACK_OFFSET(xmm10_off), xmm10->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(YMMHI_STACK_OFFSET(xmm11_off), xmm11->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(YMMHI_STACK_OFFSET(xmm12_off), xmm12->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(YMMHI_STACK_OFFSET(xmm13_off), xmm13->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(YMMHI_STACK_OFFSET(xmm14_off), xmm14->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(YMMHI_STACK_OFFSET(xmm15_off), xmm15->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi0_off ), xmm0->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi1_off ), xmm1->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi2_off ), xmm2->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi3_off ), xmm3->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi4_off ), xmm4->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi5_off ), xmm5->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi6_off ), xmm6->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi7_off ), xmm7->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi8_off ), xmm8->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi9_off ), xmm9->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi10_off), xmm10->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi11_off), xmm11->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi12_off), xmm12->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi13_off), xmm13->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi14_off), xmm14->as_VMReg()->next()->next()->next()->next()); + map->set_callee_saved(STACK_OFFSET(ymm_hi15_off), xmm15->as_VMReg()->next()->next()->next()->next()); } #endif @@ -314,24 +326,24 @@ // Restore upper half of YMM registes. assert(UseAVX > 0, "256bit vectors are supported only with AVX"); assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); - __ vinsertf128h(xmm0, Address(rsp, 0)); - __ vinsertf128h(xmm1, Address(rsp, 16)); - __ vinsertf128h(xmm2, Address(rsp, 32)); - __ vinsertf128h(xmm3, Address(rsp, 48)); - __ vinsertf128h(xmm4, Address(rsp, 64)); - __ vinsertf128h(xmm5, Address(rsp, 80)); - __ vinsertf128h(xmm6, Address(rsp, 96)); - __ vinsertf128h(xmm7, Address(rsp,112)); - __ vinsertf128h(xmm8, Address(rsp,128)); - __ vinsertf128h(xmm9, Address(rsp,144)); - __ vinsertf128h(xmm10, Address(rsp,160)); - __ vinsertf128h(xmm11, Address(rsp,176)); - __ vinsertf128h(xmm12, Address(rsp,192)); - __ vinsertf128h(xmm13, Address(rsp,208)); - __ vinsertf128h(xmm14, Address(rsp,224)); - __ vinsertf128h(xmm15, Address(rsp,240)); - __ addptr(rsp, 256); + __ vinsertf128h(xmm0, Address(rsp, ymm_hi0_off * BytesPerInt)); + __ vinsertf128h(xmm1, Address(rsp, ymm_hi1_off * BytesPerInt)); + __ vinsertf128h(xmm2, Address(rsp, ymm_hi2_off * BytesPerInt)); + __ vinsertf128h(xmm3, Address(rsp, ymm_hi3_off * BytesPerInt)); + __ vinsertf128h(xmm4, Address(rsp, ymm_hi4_off * BytesPerInt)); + __ vinsertf128h(xmm5, Address(rsp, ymm_hi5_off * BytesPerInt)); + __ vinsertf128h(xmm6, Address(rsp, ymm_hi6_off * BytesPerInt)); + __ vinsertf128h(xmm7, Address(rsp, ymm_hi7_off * BytesPerInt)); + __ vinsertf128h(xmm8, Address(rsp, ymm_hi8_off * BytesPerInt)); + __ vinsertf128h(xmm9, Address(rsp, ymm_hi9_off * BytesPerInt)); + __ vinsertf128h(xmm10, Address(rsp, ymm_hi10_off * BytesPerInt)); + __ vinsertf128h(xmm11, Address(rsp, ymm_hi11_off * BytesPerInt)); + __ vinsertf128h(xmm12, Address(rsp, ymm_hi12_off * BytesPerInt)); + __ vinsertf128h(xmm13, Address(rsp, ymm_hi13_off * BytesPerInt)); + __ vinsertf128h(xmm14, Address(rsp, ymm_hi14_off * BytesPerInt)); + __ vinsertf128h(xmm15, Address(rsp, ymm_hi15_off * BytesPerInt)); } + __ addptr(rsp, 256); #else assert(!restore_vectors, "vectors are generated only by C2 and JVMCI"); #endif @@ -4218,3 +4230,4 @@ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); } #endif // COMPILER2 +