comparison src/cpu/x86/vm/sharedRuntime_x86_64.cpp @ 21884:525c4df9428b

Corrected ymm save location description
author Tom Rodriguez <tom.rodriguez@oracle.com>
date Tue, 09 Jun 2015 12:04:47 -0700
parents 0dfd3ea90d33
children c28cb37b2e1d
comparison
equal deleted inserted replaced
21883:2d97ac4c3df5 21884:525c4df9428b
68 68
69 class RegisterSaver { 69 class RegisterSaver {
70 // Capture info about frame layout. Layout offsets are in jint 70 // Capture info about frame layout. Layout offsets are in jint
71 // units because compiler frame slots are jints. 71 // units because compiler frame slots are jints.
72 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off 72 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
73 #define DEF_YMM_HI_OFFS(regnum) ymm_hi ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt
74 enum layout { 73 enum layout {
75 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area 74 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
76 #if defined(COMPILER2) || defined(JVMCI) 75 xmm_off = fpu_state_off + 160/BytesPerInt, // offset in fxsave save area
77 ymm_off = fpu_state_off, // offset in fxsave save area
78 DEF_YMM_HI_OFFS(0),
79 DEF_YMM_HI_OFFS(1),
80 DEF_YMM_HI_OFFS(2),
81 DEF_YMM_HI_OFFS(3),
82 DEF_YMM_HI_OFFS(4),
83 DEF_YMM_HI_OFFS(5),
84 DEF_YMM_HI_OFFS(6),
85 DEF_YMM_HI_OFFS(7),
86 DEF_YMM_HI_OFFS(8),
87 DEF_YMM_HI_OFFS(9),
88 DEF_YMM_HI_OFFS(10),
89 DEF_YMM_HI_OFFS(11),
90 DEF_YMM_HI_OFFS(12),
91 DEF_YMM_HI_OFFS(13),
92 DEF_YMM_HI_OFFS(14),
93 DEF_YMM_HI_OFFS(15),
94 ymm_hi_save_size = 16 * 16 / BytesPerInt,
95 #else
96 ymm_hi_save_size = 0,
97 #endif
98 xmm_off = fpu_state_off + 160/BytesPerInt + ymm_hi_save_size, // offset in fxsave save area
99 DEF_XMM_OFFS(0), 76 DEF_XMM_OFFS(0),
100 DEF_XMM_OFFS(1), 77 DEF_XMM_OFFS(1),
101 DEF_XMM_OFFS(2), 78 DEF_XMM_OFFS(2),
102 DEF_XMM_OFFS(3), 79 DEF_XMM_OFFS(3),
103 DEF_XMM_OFFS(4), 80 DEF_XMM_OFFS(4),
110 DEF_XMM_OFFS(11), 87 DEF_XMM_OFFS(11),
111 DEF_XMM_OFFS(12), 88 DEF_XMM_OFFS(12),
112 DEF_XMM_OFFS(13), 89 DEF_XMM_OFFS(13),
113 DEF_XMM_OFFS(14), 90 DEF_XMM_OFFS(14),
114 DEF_XMM_OFFS(15), 91 DEF_XMM_OFFS(15),
115 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt) + ymm_hi_save_size, 92 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt),
116 fpu_stateH_end, 93 fpu_stateH_end,
117 r15_off, r15H_off, 94 r15_off, r15H_off,
118 r14_off, r14H_off, 95 r14_off, r14H_off,
119 r13_off, r13H_off, 96 r13_off, r13H_off,
120 r12_off, r12H_off, 97 r12_off, r12H_off,
160 // all the other values have already been extracted. 137 // all the other values have already been extracted.
161 static void restore_result_registers(MacroAssembler* masm); 138 static void restore_result_registers(MacroAssembler* masm);
162 }; 139 };
163 140
164 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { 141 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
142 int vect_words = 0;
143 int ymmhi_offset = -1;
144 #if defined(COMPILER2) || defined(JVMCI)
145 if (save_vectors) {
146 assert(UseAVX > 0, "256bit vectors are supported only with AVX");
147 assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
148 // Save upper half of YMM registes
149 vect_words = 16 * 16 / wordSize;
150 ymmhi_offset = additional_frame_words;
151 additional_frame_words += vect_words;
152 }
153 #else
154 assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
155 #endif
156
165 // Always make the frame size 16-byte aligned 157 // Always make the frame size 16-byte aligned
166 int frame_size_in_bytes = round_to(additional_frame_words*wordSize + 158 int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
167 reg_save_size*BytesPerInt, 16); 159 reg_save_size*BytesPerInt, 16);
168 // OopMap frame size is in compiler stack slots (jint's) not bytes or words 160 // OopMap frame size is in compiler stack slots (jint's) not bytes or words
169 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; 161 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
180 // to be under the return like a normal enter. 172 // to be under the return like a normal enter.
181 173
182 __ enter(); // rsp becomes 16-byte aligned here 174 __ enter(); // rsp becomes 16-byte aligned here
183 __ push_CPU_state(); // Push a multiple of 16 bytes 175 __ push_CPU_state(); // Push a multiple of 16 bytes
184 176
185 #if defined(COMPILER2) || defined(JVMCI) 177 if (vect_words > 0) {
186 __ subptr(rsp, 256); // Save upper half of YMM registers 178 assert(vect_words*wordSize == 256, "");
187 if (save_vectors) { 179 __ subptr(rsp, 256); // Save upper half of YMM registes
188 assert(UseAVX > 0, "256bit vectors are supported only with AVX"); 180 __ vextractf128h(Address(rsp, 0),xmm0);
189 assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); 181 __ vextractf128h(Address(rsp, 16),xmm1);
190 // Save upper half of YMM registers 182 __ vextractf128h(Address(rsp, 32),xmm2);
191 __ vextractf128h(Address(rsp, ymm_hi0_off * BytesPerInt), xmm0); 183 __ vextractf128h(Address(rsp, 48),xmm3);
192 __ vextractf128h(Address(rsp, ymm_hi1_off * BytesPerInt), xmm1); 184 __ vextractf128h(Address(rsp, 64),xmm4);
193 __ vextractf128h(Address(rsp, ymm_hi2_off * BytesPerInt), xmm2); 185 __ vextractf128h(Address(rsp, 80),xmm5);
194 __ vextractf128h(Address(rsp, ymm_hi3_off * BytesPerInt), xmm3); 186 __ vextractf128h(Address(rsp, 96),xmm6);
195 __ vextractf128h(Address(rsp, ymm_hi4_off * BytesPerInt), xmm4); 187 __ vextractf128h(Address(rsp,112),xmm7);
196 __ vextractf128h(Address(rsp, ymm_hi5_off * BytesPerInt), xmm5); 188 __ vextractf128h(Address(rsp,128),xmm8);
197 __ vextractf128h(Address(rsp, ymm_hi6_off * BytesPerInt), xmm6); 189 __ vextractf128h(Address(rsp,144),xmm9);
198 __ vextractf128h(Address(rsp, ymm_hi7_off * BytesPerInt), xmm7); 190 __ vextractf128h(Address(rsp,160),xmm10);
199 __ vextractf128h(Address(rsp, ymm_hi8_off * BytesPerInt), xmm8); 191 __ vextractf128h(Address(rsp,176),xmm11);
200 __ vextractf128h(Address(rsp, ymm_hi9_off * BytesPerInt), xmm9); 192 __ vextractf128h(Address(rsp,192),xmm12);
201 __ vextractf128h(Address(rsp, ymm_hi10_off * BytesPerInt), xmm10); 193 __ vextractf128h(Address(rsp,208),xmm13);
202 __ vextractf128h(Address(rsp, ymm_hi11_off * BytesPerInt), xmm11); 194 __ vextractf128h(Address(rsp,224),xmm14);
203 __ vextractf128h(Address(rsp, ymm_hi12_off * BytesPerInt), xmm12); 195 __ vextractf128h(Address(rsp,240),xmm15);
204 __ vextractf128h(Address(rsp, ymm_hi13_off * BytesPerInt), xmm13); 196 }
205 __ vextractf128h(Address(rsp, ymm_hi14_off * BytesPerInt), xmm14);
206 __ vextractf128h(Address(rsp, ymm_hi15_off * BytesPerInt), xmm15);
207 }
208 #else
209 assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
210 #endif
211 if (frame::arg_reg_save_area_bytes != 0) { 197 if (frame::arg_reg_save_area_bytes != 0) {
212 // Allocate argument register save area 198 // Allocate argument register save area
213 __ subptr(rsp, frame::arg_reg_save_area_bytes); 199 __ subptr(rsp, frame::arg_reg_save_area_bytes);
214 } 200 }
215 201
220 206
221 OopMapSet *oop_maps = new OopMapSet(); 207 OopMapSet *oop_maps = new OopMapSet();
222 OopMap* map = new OopMap(frame_size_in_slots, 0); 208 OopMap* map = new OopMap(frame_size_in_slots, 0);
223 209
224 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) 210 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
211 #define YMMHI_STACK_OFFSET(x) VMRegImpl::stack2reg((x / VMRegImpl::stack_slot_size) + ymmhi_offset)
225 212
226 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); 213 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
227 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); 214 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
228 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg()); 215 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg());
229 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg()); 216 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg());
257 map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg()); 244 map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg());
258 245
259 246
260 #if defined(COMPILER2) || defined(JVMCI) 247 #if defined(COMPILER2) || defined(JVMCI)
261 if (save_vectors) { 248 if (save_vectors) {
262 map->set_callee_saved(STACK_OFFSET(ymm_hi0_off ), xmm0->as_VMReg()->next()->next()->next()->next()); 249 assert(ymmhi_offset != -1, "save area must exist");
263 map->set_callee_saved(STACK_OFFSET(ymm_hi1_off ), xmm1->as_VMReg()->next()->next()->next()->next()); 250 map->set_callee_saved(YMMHI_STACK_OFFSET( 0), xmm0->as_VMReg()->next()->next()->next()->next());
264 map->set_callee_saved(STACK_OFFSET(ymm_hi2_off ), xmm2->as_VMReg()->next()->next()->next()->next()); 251 map->set_callee_saved(YMMHI_STACK_OFFSET( 16), xmm1->as_VMReg()->next()->next()->next()->next());
265 map->set_callee_saved(STACK_OFFSET(ymm_hi3_off ), xmm3->as_VMReg()->next()->next()->next()->next()); 252 map->set_callee_saved(YMMHI_STACK_OFFSET( 32), xmm2->as_VMReg()->next()->next()->next()->next());
266 map->set_callee_saved(STACK_OFFSET(ymm_hi4_off ), xmm4->as_VMReg()->next()->next()->next()->next()); 253 map->set_callee_saved(YMMHI_STACK_OFFSET( 48), xmm3->as_VMReg()->next()->next()->next()->next());
267 map->set_callee_saved(STACK_OFFSET(ymm_hi5_off ), xmm5->as_VMReg()->next()->next()->next()->next()); 254 map->set_callee_saved(YMMHI_STACK_OFFSET( 64), xmm4->as_VMReg()->next()->next()->next()->next());
268 map->set_callee_saved(STACK_OFFSET(ymm_hi6_off ), xmm6->as_VMReg()->next()->next()->next()->next()); 255 map->set_callee_saved(YMMHI_STACK_OFFSET( 80), xmm5->as_VMReg()->next()->next()->next()->next());
269 map->set_callee_saved(STACK_OFFSET(ymm_hi7_off ), xmm7->as_VMReg()->next()->next()->next()->next()); 256 map->set_callee_saved(YMMHI_STACK_OFFSET( 96), xmm6->as_VMReg()->next()->next()->next()->next());
270 map->set_callee_saved(STACK_OFFSET(ymm_hi8_off ), xmm8->as_VMReg()->next()->next()->next()->next()); 257 map->set_callee_saved(YMMHI_STACK_OFFSET(112), xmm7->as_VMReg()->next()->next()->next()->next());
271 map->set_callee_saved(STACK_OFFSET(ymm_hi9_off ), xmm9->as_VMReg()->next()->next()->next()->next()); 258 map->set_callee_saved(YMMHI_STACK_OFFSET(128), xmm8->as_VMReg()->next()->next()->next()->next());
272 map->set_callee_saved(STACK_OFFSET(ymm_hi10_off), xmm10->as_VMReg()->next()->next()->next()->next()); 259 map->set_callee_saved(YMMHI_STACK_OFFSET(144), xmm9->as_VMReg()->next()->next()->next()->next());
273 map->set_callee_saved(STACK_OFFSET(ymm_hi11_off), xmm11->as_VMReg()->next()->next()->next()->next()); 260 map->set_callee_saved(YMMHI_STACK_OFFSET(160), xmm10->as_VMReg()->next()->next()->next()->next());
274 map->set_callee_saved(STACK_OFFSET(ymm_hi12_off), xmm12->as_VMReg()->next()->next()->next()->next()); 261 map->set_callee_saved(YMMHI_STACK_OFFSET(176), xmm11->as_VMReg()->next()->next()->next()->next());
275 map->set_callee_saved(STACK_OFFSET(ymm_hi13_off), xmm13->as_VMReg()->next()->next()->next()->next()); 262 map->set_callee_saved(YMMHI_STACK_OFFSET(192), xmm12->as_VMReg()->next()->next()->next()->next());
276 map->set_callee_saved(STACK_OFFSET(ymm_hi14_off), xmm14->as_VMReg()->next()->next()->next()->next()); 263 map->set_callee_saved(YMMHI_STACK_OFFSET(208), xmm13->as_VMReg()->next()->next()->next()->next());
277 map->set_callee_saved(STACK_OFFSET(ymm_hi15_off), xmm15->as_VMReg()->next()->next()->next()->next()); 264 map->set_callee_saved(YMMHI_STACK_OFFSET(224), xmm14->as_VMReg()->next()->next()->next()->next());
265 map->set_callee_saved(YMMHI_STACK_OFFSET(240), xmm15->as_VMReg()->next()->next()->next()->next());
278 } 266 }
279 #endif 267 #endif
280 268
281 // %%% These should all be a waste but we'll keep things as they were for now 269 // %%% These should all be a waste but we'll keep things as they were for now
282 if (true) { 270 if (true) {
324 #if defined(COMPILER2) || defined(JVMCI) 312 #if defined(COMPILER2) || defined(JVMCI)
325 if (restore_vectors) { 313 if (restore_vectors) {
326 // Restore upper half of YMM registes. 314 // Restore upper half of YMM registes.
327 assert(UseAVX > 0, "256bit vectors are supported only with AVX"); 315 assert(UseAVX > 0, "256bit vectors are supported only with AVX");
328 assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); 316 assert(MaxVectorSize == 32, "only 256bit vectors are supported now");
329 __ vinsertf128h(xmm0, Address(rsp, ymm_hi0_off * BytesPerInt)); 317 __ vinsertf128h(xmm0, Address(rsp, 0));
330 __ vinsertf128h(xmm1, Address(rsp, ymm_hi1_off * BytesPerInt)); 318 __ vinsertf128h(xmm1, Address(rsp, 16));
331 __ vinsertf128h(xmm2, Address(rsp, ymm_hi2_off * BytesPerInt)); 319 __ vinsertf128h(xmm2, Address(rsp, 32));
332 __ vinsertf128h(xmm3, Address(rsp, ymm_hi3_off * BytesPerInt)); 320 __ vinsertf128h(xmm3, Address(rsp, 48));
333 __ vinsertf128h(xmm4, Address(rsp, ymm_hi4_off * BytesPerInt)); 321 __ vinsertf128h(xmm4, Address(rsp, 64));
334 __ vinsertf128h(xmm5, Address(rsp, ymm_hi5_off * BytesPerInt)); 322 __ vinsertf128h(xmm5, Address(rsp, 80));
335 __ vinsertf128h(xmm6, Address(rsp, ymm_hi6_off * BytesPerInt)); 323 __ vinsertf128h(xmm6, Address(rsp, 96));
336 __ vinsertf128h(xmm7, Address(rsp, ymm_hi7_off * BytesPerInt)); 324 __ vinsertf128h(xmm7, Address(rsp,112));
337 __ vinsertf128h(xmm8, Address(rsp, ymm_hi8_off * BytesPerInt)); 325 __ vinsertf128h(xmm8, Address(rsp,128));
338 __ vinsertf128h(xmm9, Address(rsp, ymm_hi9_off * BytesPerInt)); 326 __ vinsertf128h(xmm9, Address(rsp,144));
339 __ vinsertf128h(xmm10, Address(rsp, ymm_hi10_off * BytesPerInt)); 327 __ vinsertf128h(xmm10, Address(rsp,160));
340 __ vinsertf128h(xmm11, Address(rsp, ymm_hi11_off * BytesPerInt)); 328 __ vinsertf128h(xmm11, Address(rsp,176));
341 __ vinsertf128h(xmm12, Address(rsp, ymm_hi12_off * BytesPerInt)); 329 __ vinsertf128h(xmm12, Address(rsp,192));
342 __ vinsertf128h(xmm13, Address(rsp, ymm_hi13_off * BytesPerInt)); 330 __ vinsertf128h(xmm13, Address(rsp,208));
343 __ vinsertf128h(xmm14, Address(rsp, ymm_hi14_off * BytesPerInt)); 331 __ vinsertf128h(xmm14, Address(rsp,224));
344 __ vinsertf128h(xmm15, Address(rsp, ymm_hi15_off * BytesPerInt)); 332 __ vinsertf128h(xmm15, Address(rsp,240));
345 } 333 __ addptr(rsp, 256);
346 __ addptr(rsp, 256); 334 }
347 #else 335 #else
348 assert(!restore_vectors, "vectors are generated only by C2 and JVMCI"); 336 assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
349 #endif 337 #endif
350 // Recover CPU state 338 // Recover CPU state
351 __ pop_CPU_state(); 339 __ pop_CPU_state();
4228 4216
4229 // Set exception blob 4217 // Set exception blob
4230 _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); 4218 _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
4231 } 4219 }
4232 #endif // COMPILER2 4220 #endif // COMPILER2
4233