comparison src/cpu/x86/vm/sharedRuntime_x86_64.cpp @ 21736:b3d5463a8362

Anti-delta register saving portion of a560c9b81f0f
author Tom Rodriguez <tom.rodriguez@oracle.com>
date Thu, 04 Jun 2015 12:36:07 -0700
parents be896a1983c0
children c85c89f6b2d1
comparison
equal deleted inserted replaced
21734:c2e90b2b3fcc 21736:b3d5463a8362
68 68
69 class RegisterSaver { 69 class RegisterSaver {
70 // Capture info about frame layout. Layout offsets are in jint 70 // Capture info about frame layout. Layout offsets are in jint
71 // units because compiler frame slots are jints. 71 // units because compiler frame slots are jints.
72 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off 72 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
73 #define DEF_YMM_HI_OFFS(regnum) ymm_hi ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt
74 enum layout { 73 enum layout {
75 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area 74 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
76 #if defined(COMPILER2) || defined(JVMCI) 75 xmm_off = fpu_state_off + 160/BytesPerInt, // offset in fxsave save area
77 ymm_off = fpu_state_off, // offset in fxsave save area
78 DEF_YMM_HI_OFFS(0),
79 DEF_YMM_HI_OFFS(1),
80 DEF_YMM_HI_OFFS(2),
81 DEF_YMM_HI_OFFS(3),
82 DEF_YMM_HI_OFFS(4),
83 DEF_YMM_HI_OFFS(5),
84 DEF_YMM_HI_OFFS(6),
85 DEF_YMM_HI_OFFS(7),
86 DEF_YMM_HI_OFFS(8),
87 DEF_YMM_HI_OFFS(9),
88 DEF_YMM_HI_OFFS(10),
89 DEF_YMM_HI_OFFS(11),
90 DEF_YMM_HI_OFFS(12),
91 DEF_YMM_HI_OFFS(13),
92 DEF_YMM_HI_OFFS(14),
93 DEF_YMM_HI_OFFS(15),
94 ymm_hi_save_size = 16 * 16 / BytesPerInt,
95 #else
96 ymm_hi_save_size = 0,
97 #endif
98 xmm_off = fpu_state_off + 160/BytesPerInt + ymm_hi_save_size, // offset in fxsave save area
99 DEF_XMM_OFFS(0), 76 DEF_XMM_OFFS(0),
100 DEF_XMM_OFFS(1), 77 DEF_XMM_OFFS(1),
101 DEF_XMM_OFFS(2), 78 DEF_XMM_OFFS(2),
102 DEF_XMM_OFFS(3), 79 DEF_XMM_OFFS(3),
103 DEF_XMM_OFFS(4), 80 DEF_XMM_OFFS(4),
110 DEF_XMM_OFFS(11), 87 DEF_XMM_OFFS(11),
111 DEF_XMM_OFFS(12), 88 DEF_XMM_OFFS(12),
112 DEF_XMM_OFFS(13), 89 DEF_XMM_OFFS(13),
113 DEF_XMM_OFFS(14), 90 DEF_XMM_OFFS(14),
114 DEF_XMM_OFFS(15), 91 DEF_XMM_OFFS(15),
115 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt) + ymm_hi_save_size, 92 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt),
116 fpu_stateH_end, 93 fpu_stateH_end,
117 r15_off, r15H_off, 94 r15_off, r15H_off,
118 r14_off, r14H_off, 95 r14_off, r14H_off,
119 r13_off, r13H_off, 96 r13_off, r13H_off,
120 r12_off, r12H_off, 97 r12_off, r12H_off,
160 // all the other values have already been extracted. 137 // all the other values have already been extracted.
161 static void restore_result_registers(MacroAssembler* masm); 138 static void restore_result_registers(MacroAssembler* masm);
162 }; 139 };
163 140
164 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { 141 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
142 int vect_words = 0;
143 #ifdef COMPILER2
144 if (save_vectors) {
145 assert(UseAVX > 0, "256bit vectors are supported only with AVX");
146 assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
147 // Save upper half of YMM registes
148 vect_words = 16 * 16 / wordSize;
149 additional_frame_words += vect_words;
150 }
151 #else
152 assert(!save_vectors, "vectors are generated only by C2");
153 #endif
154
165 // Always make the frame size 16-byte aligned 155 // Always make the frame size 16-byte aligned
166 int frame_size_in_bytes = round_to(additional_frame_words*wordSize + 156 int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
167 reg_save_size*BytesPerInt, 16); 157 reg_save_size*BytesPerInt, 16);
168 // OopMap frame size is in compiler stack slots (jint's) not bytes or words 158 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
169 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; 159 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
180 // to be under the return like a normal enter. 170 // to be under the return like a normal enter.
181 171
182 __ enter(); // rsp becomes 16-byte aligned here 172 __ enter(); // rsp becomes 16-byte aligned here
183 __ push_CPU_state(); // Push a multiple of 16 bytes 173 __ push_CPU_state(); // Push a multiple of 16 bytes
184 174
185 #if defined(COMPILER2) || defined(JVMCI) 175 if (vect_words > 0) {
186 __ subptr(rsp, 256); // Save upper half of YMM registers 176 assert(vect_words*wordSize == 256, "");
187 if (save_vectors) { 177 __ subptr(rsp, 256); // Save upper half of YMM registes
188 assert(UseAVX > 0, "256bit vectors are supported only with AVX"); 178 __ vextractf128h(Address(rsp, 0),xmm0);
189 assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); 179 __ vextractf128h(Address(rsp, 16),xmm1);
190 // Save upper half of YMM registers 180 __ vextractf128h(Address(rsp, 32),xmm2);
191 __ vextractf128h(Address(rsp, ymm_hi0_off * BytesPerInt), xmm0); 181 __ vextractf128h(Address(rsp, 48),xmm3);
192 __ vextractf128h(Address(rsp, ymm_hi1_off * BytesPerInt), xmm1); 182 __ vextractf128h(Address(rsp, 64),xmm4);
193 __ vextractf128h(Address(rsp, ymm_hi2_off * BytesPerInt), xmm2); 183 __ vextractf128h(Address(rsp, 80),xmm5);
194 __ vextractf128h(Address(rsp, ymm_hi3_off * BytesPerInt), xmm3); 184 __ vextractf128h(Address(rsp, 96),xmm6);
195 __ vextractf128h(Address(rsp, ymm_hi4_off * BytesPerInt), xmm4); 185 __ vextractf128h(Address(rsp,112),xmm7);
196 __ vextractf128h(Address(rsp, ymm_hi5_off * BytesPerInt), xmm5); 186 __ vextractf128h(Address(rsp,128),xmm8);
197 __ vextractf128h(Address(rsp, ymm_hi6_off * BytesPerInt), xmm6); 187 __ vextractf128h(Address(rsp,144),xmm9);
198 __ vextractf128h(Address(rsp, ymm_hi7_off * BytesPerInt), xmm7); 188 __ vextractf128h(Address(rsp,160),xmm10);
199 __ vextractf128h(Address(rsp, ymm_hi8_off * BytesPerInt), xmm8); 189 __ vextractf128h(Address(rsp,176),xmm11);
200 __ vextractf128h(Address(rsp, ymm_hi9_off * BytesPerInt), xmm9); 190 __ vextractf128h(Address(rsp,192),xmm12);
201 __ vextractf128h(Address(rsp, ymm_hi10_off * BytesPerInt), xmm10); 191 __ vextractf128h(Address(rsp,208),xmm13);
202 __ vextractf128h(Address(rsp, ymm_hi11_off * BytesPerInt), xmm11); 192 __ vextractf128h(Address(rsp,224),xmm14);
203 __ vextractf128h(Address(rsp, ymm_hi12_off * BytesPerInt), xmm12); 193 __ vextractf128h(Address(rsp,240),xmm15);
204 __ vextractf128h(Address(rsp, ymm_hi13_off * BytesPerInt), xmm13); 194 }
205 __ vextractf128h(Address(rsp, ymm_hi14_off * BytesPerInt), xmm14);
206 __ vextractf128h(Address(rsp, ymm_hi15_off * BytesPerInt), xmm15);
207 }
208 #else
209 assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
210 #endif
211 if (frame::arg_reg_save_area_bytes != 0) { 195 if (frame::arg_reg_save_area_bytes != 0) {
212 // Allocate argument register save area 196 // Allocate argument register save area
213 __ subptr(rsp, frame::arg_reg_save_area_bytes); 197 __ subptr(rsp, frame::arg_reg_save_area_bytes);
214 } 198 }
215 199
253 map->set_callee_saved(STACK_OFFSET(xmm11_off), xmm11->as_VMReg()); 237 map->set_callee_saved(STACK_OFFSET(xmm11_off), xmm11->as_VMReg());
254 map->set_callee_saved(STACK_OFFSET(xmm12_off), xmm12->as_VMReg()); 238 map->set_callee_saved(STACK_OFFSET(xmm12_off), xmm12->as_VMReg());
255 map->set_callee_saved(STACK_OFFSET(xmm13_off), xmm13->as_VMReg()); 239 map->set_callee_saved(STACK_OFFSET(xmm13_off), xmm13->as_VMReg());
256 map->set_callee_saved(STACK_OFFSET(xmm14_off), xmm14->as_VMReg()); 240 map->set_callee_saved(STACK_OFFSET(xmm14_off), xmm14->as_VMReg());
257 map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg()); 241 map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg());
258
259
260 #if defined(COMPILER2) || defined(JVMCI)
261 if (save_vectors) {
262 map->set_callee_saved(STACK_OFFSET(ymm_hi0_off ), xmm0->as_VMReg()->next()->next()->next()->next());
263 map->set_callee_saved(STACK_OFFSET(ymm_hi1_off ), xmm1->as_VMReg()->next()->next()->next()->next());
264 map->set_callee_saved(STACK_OFFSET(ymm_hi2_off ), xmm2->as_VMReg()->next()->next()->next()->next());
265 map->set_callee_saved(STACK_OFFSET(ymm_hi3_off ), xmm3->as_VMReg()->next()->next()->next()->next());
266 map->set_callee_saved(STACK_OFFSET(ymm_hi4_off ), xmm4->as_VMReg()->next()->next()->next()->next());
267 map->set_callee_saved(STACK_OFFSET(ymm_hi5_off ), xmm5->as_VMReg()->next()->next()->next()->next());
268 map->set_callee_saved(STACK_OFFSET(ymm_hi6_off ), xmm6->as_VMReg()->next()->next()->next()->next());
269 map->set_callee_saved(STACK_OFFSET(ymm_hi7_off ), xmm7->as_VMReg()->next()->next()->next()->next());
270 map->set_callee_saved(STACK_OFFSET(ymm_hi8_off ), xmm8->as_VMReg()->next()->next()->next()->next());
271 map->set_callee_saved(STACK_OFFSET(ymm_hi9_off ), xmm9->as_VMReg()->next()->next()->next()->next());
272 map->set_callee_saved(STACK_OFFSET(ymm_hi10_off), xmm10->as_VMReg()->next()->next()->next()->next());
273 map->set_callee_saved(STACK_OFFSET(ymm_hi11_off), xmm11->as_VMReg()->next()->next()->next()->next());
274 map->set_callee_saved(STACK_OFFSET(ymm_hi12_off), xmm12->as_VMReg()->next()->next()->next()->next());
275 map->set_callee_saved(STACK_OFFSET(ymm_hi13_off), xmm13->as_VMReg()->next()->next()->next()->next());
276 map->set_callee_saved(STACK_OFFSET(ymm_hi14_off), xmm14->as_VMReg()->next()->next()->next()->next());
277 map->set_callee_saved(STACK_OFFSET(ymm_hi15_off), xmm15->as_VMReg()->next()->next()->next()->next());
278 }
279 #endif
280 242
281 // %%% These should all be a waste but we'll keep things as they were for now 243 // %%% These should all be a waste but we'll keep things as they were for now
282 if (true) { 244 if (true) {
283 map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next()); 245 map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next());
284 map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next()); 246 map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next());
319 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { 281 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
320 if (frame::arg_reg_save_area_bytes != 0) { 282 if (frame::arg_reg_save_area_bytes != 0) {
321 // Pop arg register save area 283 // Pop arg register save area
322 __ addptr(rsp, frame::arg_reg_save_area_bytes); 284 __ addptr(rsp, frame::arg_reg_save_area_bytes);
323 } 285 }
324 #if defined(COMPILER2) || defined(JVMCI) 286 #ifdef COMPILER2
325 if (restore_vectors) { 287 if (restore_vectors) {
326 // Restore upper half of YMM registes. 288 // Restore upper half of YMM registes.
327 assert(UseAVX > 0, "256bit vectors are supported only with AVX"); 289 assert(UseAVX > 0, "256bit vectors are supported only with AVX");
328 assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); 290 assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
329 __ vinsertf128h(xmm0, Address(rsp, ymm_hi0_off * BytesPerInt)); 291 __ vinsertf128h(xmm0, Address(rsp, 0));
330 __ vinsertf128h(xmm1, Address(rsp, ymm_hi1_off * BytesPerInt)); 292 __ vinsertf128h(xmm1, Address(rsp, 16));
331 __ vinsertf128h(xmm2, Address(rsp, ymm_hi2_off * BytesPerInt)); 293 __ vinsertf128h(xmm2, Address(rsp, 32));
332 __ vinsertf128h(xmm3, Address(rsp, ymm_hi3_off * BytesPerInt)); 294 __ vinsertf128h(xmm3, Address(rsp, 48));
333 __ vinsertf128h(xmm4, Address(rsp, ymm_hi4_off * BytesPerInt)); 295 __ vinsertf128h(xmm4, Address(rsp, 64));
334 __ vinsertf128h(xmm5, Address(rsp, ymm_hi5_off * BytesPerInt)); 296 __ vinsertf128h(xmm5, Address(rsp, 80));
335 __ vinsertf128h(xmm6, Address(rsp, ymm_hi6_off * BytesPerInt)); 297 __ vinsertf128h(xmm6, Address(rsp, 96));
336 __ vinsertf128h(xmm7, Address(rsp, ymm_hi7_off * BytesPerInt)); 298 __ vinsertf128h(xmm7, Address(rsp,112));
337 __ vinsertf128h(xmm8, Address(rsp, ymm_hi8_off * BytesPerInt)); 299 __ vinsertf128h(xmm8, Address(rsp,128));
338 __ vinsertf128h(xmm9, Address(rsp, ymm_hi9_off * BytesPerInt)); 300 __ vinsertf128h(xmm9, Address(rsp,144));
339 __ vinsertf128h(xmm10, Address(rsp, ymm_hi10_off * BytesPerInt)); 301 __ vinsertf128h(xmm10, Address(rsp,160));
340 __ vinsertf128h(xmm11, Address(rsp, ymm_hi11_off * BytesPerInt)); 302 __ vinsertf128h(xmm11, Address(rsp,176));
341 __ vinsertf128h(xmm12, Address(rsp, ymm_hi12_off * BytesPerInt)); 303 __ vinsertf128h(xmm12, Address(rsp,192));
342 __ vinsertf128h(xmm13, Address(rsp, ymm_hi13_off * BytesPerInt)); 304 __ vinsertf128h(xmm13, Address(rsp,208));
343 __ vinsertf128h(xmm14, Address(rsp, ymm_hi14_off * BytesPerInt)); 305 __ vinsertf128h(xmm14, Address(rsp,224));
344 __ vinsertf128h(xmm15, Address(rsp, ymm_hi15_off * BytesPerInt)); 306 __ vinsertf128h(xmm15, Address(rsp,240));
345 } 307 __ addptr(rsp, 256);
346 __ addptr(rsp, 256); 308 }
347 #else 309 #else
348 assert(!restore_vectors, "vectors are generated only by C2 and JVMCI"); 310 assert(!restore_vectors, "vectors are generated only by C2");
349 #endif 311 #endif
350 // Recover CPU state 312 // Recover CPU state
351 __ pop_CPU_state(); 313 __ pop_CPU_state();
352 // Get the rbp described implicitly by the calling convention (no oopMap) 314 // Get the rbp described implicitly by the calling convention (no oopMap)
353 __ pop(rbp); 315 __ pop(rbp);