# HG changeset patch # User Tom Rodriguez # Date 1433446567 25200 # Node ID b3d5463a8362043e32285acb8120a86b4a1d514f # Parent c2e90b2b3fcc45966bf2a415462cb2060a73b1f8 Anti-delta register saving portion of a560c9b81f0f diff -r c2e90b2b3fcc -r b3d5463a8362 src/cpu/x86/vm/sharedRuntime_x86_64.cpp --- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Thu Jun 04 12:50:21 2015 -0700 +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Thu Jun 04 12:36:07 2015 -0700 @@ -70,32 +70,9 @@ // Capture info about frame layout. Layout offsets are in jint // units because compiler frame slots are jints. #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off -#define DEF_YMM_HI_OFFS(regnum) ymm_hi ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt enum layout { fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area -#if defined(COMPILER2) || defined(JVMCI) - ymm_off = fpu_state_off, // offset in fxsave save area - DEF_YMM_HI_OFFS(0), - DEF_YMM_HI_OFFS(1), - DEF_YMM_HI_OFFS(2), - DEF_YMM_HI_OFFS(3), - DEF_YMM_HI_OFFS(4), - DEF_YMM_HI_OFFS(5), - DEF_YMM_HI_OFFS(6), - DEF_YMM_HI_OFFS(7), - DEF_YMM_HI_OFFS(8), - DEF_YMM_HI_OFFS(9), - DEF_YMM_HI_OFFS(10), - DEF_YMM_HI_OFFS(11), - DEF_YMM_HI_OFFS(12), - DEF_YMM_HI_OFFS(13), - DEF_YMM_HI_OFFS(14), - DEF_YMM_HI_OFFS(15), - ymm_hi_save_size = 16 * 16 / BytesPerInt, -#else - ymm_hi_save_size = 0, -#endif - xmm_off = fpu_state_off + 160/BytesPerInt + ymm_hi_save_size, // offset in fxsave save area + xmm_off = fpu_state_off + 160/BytesPerInt, // offset in fxsave save area DEF_XMM_OFFS(0), DEF_XMM_OFFS(1), DEF_XMM_OFFS(2), @@ -112,7 +89,7 @@ DEF_XMM_OFFS(13), DEF_XMM_OFFS(14), DEF_XMM_OFFS(15), - fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt) + ymm_hi_save_size, + fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt), fpu_stateH_end, r15_off, r15H_off, r14_off, r14H_off, @@ -162,6 +139,19 @@ }; OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { + int vect_words = 0; +#ifdef COMPILER2 + if (save_vectors) { + assert(UseAVX > 0, "256bit vectors are supported only with AVX"); + assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); + // Save upper half of YMM registes + vect_words = 16 * 16 / wordSize; + additional_frame_words += vect_words; + } +#else + assert(!save_vectors, "vectors are generated only by C2"); +#endif + // Always make the frame size 16-byte aligned int frame_size_in_bytes = round_to(additional_frame_words*wordSize + reg_save_size*BytesPerInt, 16); @@ -182,32 +172,26 @@ __ enter(); // rsp becomes 16-byte aligned here __ push_CPU_state(); // Push a multiple of 16 bytes -#if defined(COMPILER2) || defined(JVMCI) - __ subptr(rsp, 256); // Save upper half of YMM registers - if (save_vectors) { - assert(UseAVX > 0, "256bit vectors are supported only with AVX"); - assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); - // Save upper half of YMM registers - __ vextractf128h(Address(rsp, ymm_hi0_off * BytesPerInt), xmm0); - __ vextractf128h(Address(rsp, ymm_hi1_off * BytesPerInt), xmm1); - __ vextractf128h(Address(rsp, ymm_hi2_off * BytesPerInt), xmm2); - __ vextractf128h(Address(rsp, ymm_hi3_off * BytesPerInt), xmm3); - __ vextractf128h(Address(rsp, ymm_hi4_off * BytesPerInt), xmm4); - __ vextractf128h(Address(rsp, ymm_hi5_off * BytesPerInt), xmm5); - __ vextractf128h(Address(rsp, ymm_hi6_off * BytesPerInt), xmm6); - __ vextractf128h(Address(rsp, ymm_hi7_off * BytesPerInt), xmm7); - __ vextractf128h(Address(rsp, ymm_hi8_off * BytesPerInt), xmm8); - __ vextractf128h(Address(rsp, ymm_hi9_off * BytesPerInt), xmm9); - __ vextractf128h(Address(rsp, ymm_hi10_off * BytesPerInt), xmm10); - __ vextractf128h(Address(rsp, ymm_hi11_off * BytesPerInt), xmm11); - __ vextractf128h(Address(rsp, ymm_hi12_off * BytesPerInt), xmm12); - __ vextractf128h(Address(rsp, ymm_hi13_off * BytesPerInt), xmm13); - __ vextractf128h(Address(rsp, ymm_hi14_off * BytesPerInt), xmm14); - __ vextractf128h(Address(rsp, ymm_hi15_off * BytesPerInt), xmm15); + if (vect_words > 0) { + assert(vect_words*wordSize == 256, ""); + __ subptr(rsp, 256); // Save upper half of YMM registes + __ vextractf128h(Address(rsp, 0),xmm0); + __ vextractf128h(Address(rsp, 16),xmm1); + __ vextractf128h(Address(rsp, 32),xmm2); + __ vextractf128h(Address(rsp, 48),xmm3); + __ vextractf128h(Address(rsp, 64),xmm4); + __ vextractf128h(Address(rsp, 80),xmm5); + __ vextractf128h(Address(rsp, 96),xmm6); + __ vextractf128h(Address(rsp,112),xmm7); + __ vextractf128h(Address(rsp,128),xmm8); + __ vextractf128h(Address(rsp,144),xmm9); + __ vextractf128h(Address(rsp,160),xmm10); + __ vextractf128h(Address(rsp,176),xmm11); + __ vextractf128h(Address(rsp,192),xmm12); + __ vextractf128h(Address(rsp,208),xmm13); + __ vextractf128h(Address(rsp,224),xmm14); + __ vextractf128h(Address(rsp,240),xmm15); } -#else - assert(!save_vectors, "vectors are generated only by C2 and JVMCI"); -#endif if (frame::arg_reg_save_area_bytes != 0) { // Allocate argument register save area __ subptr(rsp, frame::arg_reg_save_area_bytes); @@ -256,28 +240,6 @@ map->set_callee_saved(STACK_OFFSET(xmm14_off), xmm14->as_VMReg()); map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg()); - -#if defined(COMPILER2) || defined(JVMCI) - if (save_vectors) { - map->set_callee_saved(STACK_OFFSET(ymm_hi0_off ), xmm0->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(STACK_OFFSET(ymm_hi1_off ), xmm1->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(STACK_OFFSET(ymm_hi2_off ), xmm2->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(STACK_OFFSET(ymm_hi3_off ), xmm3->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(STACK_OFFSET(ymm_hi4_off ), xmm4->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(STACK_OFFSET(ymm_hi5_off ), xmm5->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(STACK_OFFSET(ymm_hi6_off ), xmm6->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(STACK_OFFSET(ymm_hi7_off ), xmm7->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(STACK_OFFSET(ymm_hi8_off ), xmm8->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(STACK_OFFSET(ymm_hi9_off ), xmm9->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(STACK_OFFSET(ymm_hi10_off), xmm10->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(STACK_OFFSET(ymm_hi11_off), xmm11->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(STACK_OFFSET(ymm_hi12_off), xmm12->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(STACK_OFFSET(ymm_hi13_off), xmm13->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(STACK_OFFSET(ymm_hi14_off), xmm14->as_VMReg()->next()->next()->next()->next()); - map->set_callee_saved(STACK_OFFSET(ymm_hi15_off), xmm15->as_VMReg()->next()->next()->next()->next()); - } -#endif - // %%% These should all be a waste but we'll keep things as they were for now if (true) { map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next()); @@ -321,31 +283,31 @@ // Pop arg register save area __ addptr(rsp, frame::arg_reg_save_area_bytes); } -#if defined(COMPILER2) || defined(JVMCI) +#ifdef COMPILER2 if (restore_vectors) { // Restore upper half of YMM registes. assert(UseAVX > 0, "256bit vectors are supported only with AVX"); assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); - __ vinsertf128h(xmm0, Address(rsp, ymm_hi0_off * BytesPerInt)); - __ vinsertf128h(xmm1, Address(rsp, ymm_hi1_off * BytesPerInt)); - __ vinsertf128h(xmm2, Address(rsp, ymm_hi2_off * BytesPerInt)); - __ vinsertf128h(xmm3, Address(rsp, ymm_hi3_off * BytesPerInt)); - __ vinsertf128h(xmm4, Address(rsp, ymm_hi4_off * BytesPerInt)); - __ vinsertf128h(xmm5, Address(rsp, ymm_hi5_off * BytesPerInt)); - __ vinsertf128h(xmm6, Address(rsp, ymm_hi6_off * BytesPerInt)); - __ vinsertf128h(xmm7, Address(rsp, ymm_hi7_off * BytesPerInt)); - __ vinsertf128h(xmm8, Address(rsp, ymm_hi8_off * BytesPerInt)); - __ vinsertf128h(xmm9, Address(rsp, ymm_hi9_off * BytesPerInt)); - __ vinsertf128h(xmm10, Address(rsp, ymm_hi10_off * BytesPerInt)); - __ vinsertf128h(xmm11, Address(rsp, ymm_hi11_off * BytesPerInt)); - __ vinsertf128h(xmm12, Address(rsp, ymm_hi12_off * BytesPerInt)); - __ vinsertf128h(xmm13, Address(rsp, ymm_hi13_off * BytesPerInt)); - __ vinsertf128h(xmm14, Address(rsp, ymm_hi14_off * BytesPerInt)); - __ vinsertf128h(xmm15, Address(rsp, ymm_hi15_off * BytesPerInt)); + __ vinsertf128h(xmm0, Address(rsp, 0)); + __ vinsertf128h(xmm1, Address(rsp, 16)); + __ vinsertf128h(xmm2, Address(rsp, 32)); + __ vinsertf128h(xmm3, Address(rsp, 48)); + __ vinsertf128h(xmm4, Address(rsp, 64)); + __ vinsertf128h(xmm5, Address(rsp, 80)); + __ vinsertf128h(xmm6, Address(rsp, 96)); + __ vinsertf128h(xmm7, Address(rsp,112)); + __ vinsertf128h(xmm8, Address(rsp,128)); + __ vinsertf128h(xmm9, Address(rsp,144)); + __ vinsertf128h(xmm10, Address(rsp,160)); + __ vinsertf128h(xmm11, Address(rsp,176)); + __ vinsertf128h(xmm12, Address(rsp,192)); + __ vinsertf128h(xmm13, Address(rsp,208)); + __ vinsertf128h(xmm14, Address(rsp,224)); + __ vinsertf128h(xmm15, Address(rsp,240)); + __ addptr(rsp, 256); } - __ addptr(rsp, 256); #else - assert(!restore_vectors, "vectors are generated only by C2 and JVMCI"); + assert(!restore_vectors, "vectors are generated only by C2"); #endif // Recover CPU state __ pop_CPU_state();