comparison src/cpu/x86/vm/sharedRuntime_x86_64.cpp @ 21761:0dfd3ea90d33

Undo changes around saving of ymm registers.
author Thomas Wuerthinger <thomas.wuerthinger@oracle.com>
date Sat, 06 Jun 2015 15:12:58 +0200
parents c85c89f6b2d1
children 525c4df9428b
comparison
equal deleted inserted replaced
21759:75daca0c6a0f 21761:0dfd3ea90d33
68 68
69 class RegisterSaver { 69 class RegisterSaver {
70 // Capture info about frame layout. Layout offsets are in jint 70 // Capture info about frame layout. Layout offsets are in jint
71 // units because compiler frame slots are jints. 71 // units because compiler frame slots are jints.
72 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off 72 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
73 #define DEF_YMM_HI_OFFS(regnum) ymm_hi ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt
73 enum layout { 74 enum layout {
74 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area 75 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
75 xmm_off = fpu_state_off + 160/BytesPerInt, // offset in fxsave save area 76 #if defined(COMPILER2) || defined(JVMCI)
77 ymm_off = fpu_state_off, // offset in fxsave save area
78 DEF_YMM_HI_OFFS(0),
79 DEF_YMM_HI_OFFS(1),
80 DEF_YMM_HI_OFFS(2),
81 DEF_YMM_HI_OFFS(3),
82 DEF_YMM_HI_OFFS(4),
83 DEF_YMM_HI_OFFS(5),
84 DEF_YMM_HI_OFFS(6),
85 DEF_YMM_HI_OFFS(7),
86 DEF_YMM_HI_OFFS(8),
87 DEF_YMM_HI_OFFS(9),
88 DEF_YMM_HI_OFFS(10),
89 DEF_YMM_HI_OFFS(11),
90 DEF_YMM_HI_OFFS(12),
91 DEF_YMM_HI_OFFS(13),
92 DEF_YMM_HI_OFFS(14),
93 DEF_YMM_HI_OFFS(15),
94 ymm_hi_save_size = 16 * 16 / BytesPerInt,
95 #else
96 ymm_hi_save_size = 0,
97 #endif
98 xmm_off = fpu_state_off + 160/BytesPerInt + ymm_hi_save_size, // offset in fxsave save area
76 DEF_XMM_OFFS(0), 99 DEF_XMM_OFFS(0),
77 DEF_XMM_OFFS(1), 100 DEF_XMM_OFFS(1),
78 DEF_XMM_OFFS(2), 101 DEF_XMM_OFFS(2),
79 DEF_XMM_OFFS(3), 102 DEF_XMM_OFFS(3),
80 DEF_XMM_OFFS(4), 103 DEF_XMM_OFFS(4),
87 DEF_XMM_OFFS(11), 110 DEF_XMM_OFFS(11),
88 DEF_XMM_OFFS(12), 111 DEF_XMM_OFFS(12),
89 DEF_XMM_OFFS(13), 112 DEF_XMM_OFFS(13),
90 DEF_XMM_OFFS(14), 113 DEF_XMM_OFFS(14),
91 DEF_XMM_OFFS(15), 114 DEF_XMM_OFFS(15),
92 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt), 115 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt) + ymm_hi_save_size,
93 fpu_stateH_end, 116 fpu_stateH_end,
94 r15_off, r15H_off, 117 r15_off, r15H_off,
95 r14_off, r14H_off, 118 r14_off, r14H_off,
96 r13_off, r13H_off, 119 r13_off, r13H_off,
97 r12_off, r12H_off, 120 r12_off, r12H_off,
137 // all the other values have already been extracted. 160 // all the other values have already been extracted.
138 static void restore_result_registers(MacroAssembler* masm); 161 static void restore_result_registers(MacroAssembler* masm);
139 }; 162 };
140 163
141 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { 164 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
142 int vect_words = 0;
143 int ymmhi_offset = -1;
144 #if defined(COMPILER2) || defined(JVMCI)
145 if (save_vectors) {
146 assert(UseAVX > 0, "256bit vectors are supported only with AVX");
147 assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
148 // Save upper half of YMM registes
149 vect_words = 16 * 16 / wordSize;
150 ymmhi_offset = additional_frame_words;
151 additional_frame_words += vect_words;
152 }
153 #else
154 assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
155 #endif
156
157 // Always make the frame size 16-byte aligned 165 // Always make the frame size 16-byte aligned
158 int frame_size_in_bytes = round_to(additional_frame_words*wordSize + 166 int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
159 reg_save_size*BytesPerInt, 16); 167 reg_save_size*BytesPerInt, 16);
160 // OopMap frame size is in compiler stack slots (jint's) not bytes or words 168 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
161 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; 169 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
172 // to be under the return like a normal enter. 180 // to be under the return like a normal enter.
173 181
174 __ enter(); // rsp becomes 16-byte aligned here 182 __ enter(); // rsp becomes 16-byte aligned here
175 __ push_CPU_state(); // Push a multiple of 16 bytes 183 __ push_CPU_state(); // Push a multiple of 16 bytes
176 184
177 if (vect_words > 0) { 185 #if defined(COMPILER2) || defined(JVMCI)
178 assert(vect_words*wordSize == 256, ""); 186 __ subptr(rsp, 256); // Save upper half of YMM registers
179 __ subptr(rsp, 256); // Save upper half of YMM registes 187 if (save_vectors) {
180 __ vextractf128h(Address(rsp, 0),xmm0); 188 assert(UseAVX > 0, "256bit vectors are supported only with AVX");
181 __ vextractf128h(Address(rsp, 16),xmm1); 189 assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
182 __ vextractf128h(Address(rsp, 32),xmm2); 190 // Save upper half of YMM registers
183 __ vextractf128h(Address(rsp, 48),xmm3); 191 __ vextractf128h(Address(rsp, ymm_hi0_off * BytesPerInt), xmm0);
184 __ vextractf128h(Address(rsp, 64),xmm4); 192 __ vextractf128h(Address(rsp, ymm_hi1_off * BytesPerInt), xmm1);
185 __ vextractf128h(Address(rsp, 80),xmm5); 193 __ vextractf128h(Address(rsp, ymm_hi2_off * BytesPerInt), xmm2);
186 __ vextractf128h(Address(rsp, 96),xmm6); 194 __ vextractf128h(Address(rsp, ymm_hi3_off * BytesPerInt), xmm3);
187 __ vextractf128h(Address(rsp,112),xmm7); 195 __ vextractf128h(Address(rsp, ymm_hi4_off * BytesPerInt), xmm4);
188 __ vextractf128h(Address(rsp,128),xmm8); 196 __ vextractf128h(Address(rsp, ymm_hi5_off * BytesPerInt), xmm5);
189 __ vextractf128h(Address(rsp,144),xmm9); 197 __ vextractf128h(Address(rsp, ymm_hi6_off * BytesPerInt), xmm6);
190 __ vextractf128h(Address(rsp,160),xmm10); 198 __ vextractf128h(Address(rsp, ymm_hi7_off * BytesPerInt), xmm7);
191 __ vextractf128h(Address(rsp,176),xmm11); 199 __ vextractf128h(Address(rsp, ymm_hi8_off * BytesPerInt), xmm8);
192 __ vextractf128h(Address(rsp,192),xmm12); 200 __ vextractf128h(Address(rsp, ymm_hi9_off * BytesPerInt), xmm9);
193 __ vextractf128h(Address(rsp,208),xmm13); 201 __ vextractf128h(Address(rsp, ymm_hi10_off * BytesPerInt), xmm10);
194 __ vextractf128h(Address(rsp,224),xmm14); 202 __ vextractf128h(Address(rsp, ymm_hi11_off * BytesPerInt), xmm11);
195 __ vextractf128h(Address(rsp,240),xmm15); 203 __ vextractf128h(Address(rsp, ymm_hi12_off * BytesPerInt), xmm12);
196 } 204 __ vextractf128h(Address(rsp, ymm_hi13_off * BytesPerInt), xmm13);
205 __ vextractf128h(Address(rsp, ymm_hi14_off * BytesPerInt), xmm14);
206 __ vextractf128h(Address(rsp, ymm_hi15_off * BytesPerInt), xmm15);
207 }
208 #else
209 assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
210 #endif
197 if (frame::arg_reg_save_area_bytes != 0) { 211 if (frame::arg_reg_save_area_bytes != 0) {
198 // Allocate argument register save area 212 // Allocate argument register save area
199 __ subptr(rsp, frame::arg_reg_save_area_bytes); 213 __ subptr(rsp, frame::arg_reg_save_area_bytes);
200 } 214 }
201 215
206 220
207 OopMapSet *oop_maps = new OopMapSet(); 221 OopMapSet *oop_maps = new OopMapSet();
208 OopMap* map = new OopMap(frame_size_in_slots, 0); 222 OopMap* map = new OopMap(frame_size_in_slots, 0);
209 223
210 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) 224 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
211 #define YMMHI_STACK_OFFSET(x) VMRegImpl::stack2reg((x) + ymmhi_offset)
212 225
213 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); 226 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
214 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); 227 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
215 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg()); 228 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg());
216 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg()); 229 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg());
244 map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg()); 257 map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg());
245 258
246 259
247 #if defined(COMPILER2) || defined(JVMCI) 260 #if defined(COMPILER2) || defined(JVMCI)
248 if (save_vectors) { 261 if (save_vectors) {
249 assert(ymmhi_offset != -1, "save area must exist"); 262 map->set_callee_saved(STACK_OFFSET(ymm_hi0_off ), xmm0->as_VMReg()->next()->next()->next()->next());
250 map->set_callee_saved(YMMHI_STACK_OFFSET(xmm0_off ), xmm0->as_VMReg()->next()->next()->next()->next()); 263 map->set_callee_saved(STACK_OFFSET(ymm_hi1_off ), xmm1->as_VMReg()->next()->next()->next()->next());
251 map->set_callee_saved(YMMHI_STACK_OFFSET(xmm1_off ), xmm1->as_VMReg()->next()->next()->next()->next()); 264 map->set_callee_saved(STACK_OFFSET(ymm_hi2_off ), xmm2->as_VMReg()->next()->next()->next()->next());
252 map->set_callee_saved(YMMHI_STACK_OFFSET(xmm2_off ), xmm2->as_VMReg()->next()->next()->next()->next()); 265 map->set_callee_saved(STACK_OFFSET(ymm_hi3_off ), xmm3->as_VMReg()->next()->next()->next()->next());
253 map->set_callee_saved(YMMHI_STACK_OFFSET(xmm3_off ), xmm3->as_VMReg()->next()->next()->next()->next()); 266 map->set_callee_saved(STACK_OFFSET(ymm_hi4_off ), xmm4->as_VMReg()->next()->next()->next()->next());
254 map->set_callee_saved(YMMHI_STACK_OFFSET(xmm4_off ), xmm4->as_VMReg()->next()->next()->next()->next()); 267 map->set_callee_saved(STACK_OFFSET(ymm_hi5_off ), xmm5->as_VMReg()->next()->next()->next()->next());
255 map->set_callee_saved(YMMHI_STACK_OFFSET(xmm5_off ), xmm5->as_VMReg()->next()->next()->next()->next()); 268 map->set_callee_saved(STACK_OFFSET(ymm_hi6_off ), xmm6->as_VMReg()->next()->next()->next()->next());
256 map->set_callee_saved(YMMHI_STACK_OFFSET(xmm6_off ), xmm6->as_VMReg()->next()->next()->next()->next()); 269 map->set_callee_saved(STACK_OFFSET(ymm_hi7_off ), xmm7->as_VMReg()->next()->next()->next()->next());
257 map->set_callee_saved(YMMHI_STACK_OFFSET(xmm7_off ), xmm7->as_VMReg()->next()->next()->next()->next()); 270 map->set_callee_saved(STACK_OFFSET(ymm_hi8_off ), xmm8->as_VMReg()->next()->next()->next()->next());
258 map->set_callee_saved(YMMHI_STACK_OFFSET(xmm8_off ), xmm8->as_VMReg()->next()->next()->next()->next()); 271 map->set_callee_saved(STACK_OFFSET(ymm_hi9_off ), xmm9->as_VMReg()->next()->next()->next()->next());
259 map->set_callee_saved(YMMHI_STACK_OFFSET(xmm9_off ), xmm9->as_VMReg()->next()->next()->next()->next()); 272 map->set_callee_saved(STACK_OFFSET(ymm_hi10_off), xmm10->as_VMReg()->next()->next()->next()->next());
260 map->set_callee_saved(YMMHI_STACK_OFFSET(xmm10_off), xmm10->as_VMReg()->next()->next()->next()->next()); 273 map->set_callee_saved(STACK_OFFSET(ymm_hi11_off), xmm11->as_VMReg()->next()->next()->next()->next());
261 map->set_callee_saved(YMMHI_STACK_OFFSET(xmm11_off), xmm11->as_VMReg()->next()->next()->next()->next()); 274 map->set_callee_saved(STACK_OFFSET(ymm_hi12_off), xmm12->as_VMReg()->next()->next()->next()->next());
262 map->set_callee_saved(YMMHI_STACK_OFFSET(xmm12_off), xmm12->as_VMReg()->next()->next()->next()->next()); 275 map->set_callee_saved(STACK_OFFSET(ymm_hi13_off), xmm13->as_VMReg()->next()->next()->next()->next());
263 map->set_callee_saved(YMMHI_STACK_OFFSET(xmm13_off), xmm13->as_VMReg()->next()->next()->next()->next()); 276 map->set_callee_saved(STACK_OFFSET(ymm_hi14_off), xmm14->as_VMReg()->next()->next()->next()->next());
264 map->set_callee_saved(YMMHI_STACK_OFFSET(xmm14_off), xmm14->as_VMReg()->next()->next()->next()->next()); 277 map->set_callee_saved(STACK_OFFSET(ymm_hi15_off), xmm15->as_VMReg()->next()->next()->next()->next());
265 map->set_callee_saved(YMMHI_STACK_OFFSET(xmm15_off), xmm15->as_VMReg()->next()->next()->next()->next());
266 } 278 }
267 #endif 279 #endif
268 280
269 // %%% These should all be a waste but we'll keep things as they were for now 281 // %%% These should all be a waste but we'll keep things as they were for now
270 if (true) { 282 if (true) {
312 #if defined(COMPILER2) || defined(JVMCI) 324 #if defined(COMPILER2) || defined(JVMCI)
313 if (restore_vectors) { 325 if (restore_vectors) {
314 // Restore upper half of YMM registes. 326 // Restore upper half of YMM registes.
315 assert(UseAVX > 0, "256bit vectors are supported only with AVX"); 327 assert(UseAVX > 0, "256bit vectors are supported only with AVX");
316 assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); 328 assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
317 __ vinsertf128h(xmm0, Address(rsp, 0)); 329 __ vinsertf128h(xmm0, Address(rsp, ymm_hi0_off * BytesPerInt));
318 __ vinsertf128h(xmm1, Address(rsp, 16)); 330 __ vinsertf128h(xmm1, Address(rsp, ymm_hi1_off * BytesPerInt));
319 __ vinsertf128h(xmm2, Address(rsp, 32)); 331 __ vinsertf128h(xmm2, Address(rsp, ymm_hi2_off * BytesPerInt));
320 __ vinsertf128h(xmm3, Address(rsp, 48)); 332 __ vinsertf128h(xmm3, Address(rsp, ymm_hi3_off * BytesPerInt));
321 __ vinsertf128h(xmm4, Address(rsp, 64)); 333 __ vinsertf128h(xmm4, Address(rsp, ymm_hi4_off * BytesPerInt));
322 __ vinsertf128h(xmm5, Address(rsp, 80)); 334 __ vinsertf128h(xmm5, Address(rsp, ymm_hi5_off * BytesPerInt));
323 __ vinsertf128h(xmm6, Address(rsp, 96)); 335 __ vinsertf128h(xmm6, Address(rsp, ymm_hi6_off * BytesPerInt));
324 __ vinsertf128h(xmm7, Address(rsp,112)); 336 __ vinsertf128h(xmm7, Address(rsp, ymm_hi7_off * BytesPerInt));
325 __ vinsertf128h(xmm8, Address(rsp,128)); 337 __ vinsertf128h(xmm8, Address(rsp, ymm_hi8_off * BytesPerInt));
326 __ vinsertf128h(xmm9, Address(rsp,144)); 338 __ vinsertf128h(xmm9, Address(rsp, ymm_hi9_off * BytesPerInt));
327 __ vinsertf128h(xmm10, Address(rsp,160)); 339 __ vinsertf128h(xmm10, Address(rsp, ymm_hi10_off * BytesPerInt));
328 __ vinsertf128h(xmm11, Address(rsp,176)); 340 __ vinsertf128h(xmm11, Address(rsp, ymm_hi11_off * BytesPerInt));
329 __ vinsertf128h(xmm12, Address(rsp,192)); 341 __ vinsertf128h(xmm12, Address(rsp, ymm_hi12_off * BytesPerInt));
330 __ vinsertf128h(xmm13, Address(rsp,208)); 342 __ vinsertf128h(xmm13, Address(rsp, ymm_hi13_off * BytesPerInt));
331 __ vinsertf128h(xmm14, Address(rsp,224)); 343 __ vinsertf128h(xmm14, Address(rsp, ymm_hi14_off * BytesPerInt));
332 __ vinsertf128h(xmm15, Address(rsp,240)); 344 __ vinsertf128h(xmm15, Address(rsp, ymm_hi15_off * BytesPerInt));
333 __ addptr(rsp, 256); 345 }
334 } 346 __ addptr(rsp, 256);
335 #else 347 #else
336 assert(!restore_vectors, "vectors are generated only by C2 and JVMCI"); 348 assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
337 #endif 349 #endif
338 // Recover CPU state 350 // Recover CPU state
339 __ pop_CPU_state(); 351 __ pop_CPU_state();
4216 4228
4217 // Set exception blob 4229 // Set exception blob
4218 _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); 4230 _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
4219 } 4231 }
4220 #endif // COMPILER2 4232 #endif // COMPILER2
4233