Mercurial > hg > graal-jvmci-8
comparison src/cpu/x86/vm/sharedRuntime_x86_64.cpp @ 21884:525c4df9428b
Corrected ymm save location description
author | Tom Rodriguez <tom.rodriguez@oracle.com> |
---|---|
date | Tue, 09 Jun 2015 12:04:47 -0700 |
parents | 0dfd3ea90d33 |
children | c28cb37b2e1d |
comparison
equal
deleted
inserted
replaced
21883:2d97ac4c3df5 | 21884:525c4df9428b |
---|---|
68 | 68 |
69 class RegisterSaver { | 69 class RegisterSaver { |
70 // Capture info about frame layout. Layout offsets are in jint | 70 // Capture info about frame layout. Layout offsets are in jint |
71 // units because compiler frame slots are jints. | 71 // units because compiler frame slots are jints. |
72 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off | 72 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off |
73 #define DEF_YMM_HI_OFFS(regnum) ymm_hi ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt | |
74 enum layout { | 73 enum layout { |
75 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area | 74 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area |
76 #if defined(COMPILER2) || defined(JVMCI) | 75 xmm_off = fpu_state_off + 160/BytesPerInt, // offset in fxsave save area |
77 ymm_off = fpu_state_off, // offset in fxsave save area | |
78 DEF_YMM_HI_OFFS(0), | |
79 DEF_YMM_HI_OFFS(1), | |
80 DEF_YMM_HI_OFFS(2), | |
81 DEF_YMM_HI_OFFS(3), | |
82 DEF_YMM_HI_OFFS(4), | |
83 DEF_YMM_HI_OFFS(5), | |
84 DEF_YMM_HI_OFFS(6), | |
85 DEF_YMM_HI_OFFS(7), | |
86 DEF_YMM_HI_OFFS(8), | |
87 DEF_YMM_HI_OFFS(9), | |
88 DEF_YMM_HI_OFFS(10), | |
89 DEF_YMM_HI_OFFS(11), | |
90 DEF_YMM_HI_OFFS(12), | |
91 DEF_YMM_HI_OFFS(13), | |
92 DEF_YMM_HI_OFFS(14), | |
93 DEF_YMM_HI_OFFS(15), | |
94 ymm_hi_save_size = 16 * 16 / BytesPerInt, | |
95 #else | |
96 ymm_hi_save_size = 0, | |
97 #endif | |
98 xmm_off = fpu_state_off + 160/BytesPerInt + ymm_hi_save_size, // offset in fxsave save area | |
99 DEF_XMM_OFFS(0), | 76 DEF_XMM_OFFS(0), |
100 DEF_XMM_OFFS(1), | 77 DEF_XMM_OFFS(1), |
101 DEF_XMM_OFFS(2), | 78 DEF_XMM_OFFS(2), |
102 DEF_XMM_OFFS(3), | 79 DEF_XMM_OFFS(3), |
103 DEF_XMM_OFFS(4), | 80 DEF_XMM_OFFS(4), |
110 DEF_XMM_OFFS(11), | 87 DEF_XMM_OFFS(11), |
111 DEF_XMM_OFFS(12), | 88 DEF_XMM_OFFS(12), |
112 DEF_XMM_OFFS(13), | 89 DEF_XMM_OFFS(13), |
113 DEF_XMM_OFFS(14), | 90 DEF_XMM_OFFS(14), |
114 DEF_XMM_OFFS(15), | 91 DEF_XMM_OFFS(15), |
115 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt) + ymm_hi_save_size, | 92 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt), |
116 fpu_stateH_end, | 93 fpu_stateH_end, |
117 r15_off, r15H_off, | 94 r15_off, r15H_off, |
118 r14_off, r14H_off, | 95 r14_off, r14H_off, |
119 r13_off, r13H_off, | 96 r13_off, r13H_off, |
120 r12_off, r12H_off, | 97 r12_off, r12H_off, |
160 // all the other values have already been extracted. | 137 // all the other values have already been extracted. |
161 static void restore_result_registers(MacroAssembler* masm); | 138 static void restore_result_registers(MacroAssembler* masm); |
162 }; | 139 }; |
163 | 140 |
164 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { | 141 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { |
142 int vect_words = 0; | |
143 int ymmhi_offset = -1; | |
144 #if defined(COMPILER2) || defined(JVMCI) | |
145 if (save_vectors) { | |
146 assert(UseAVX > 0, "256bit vectors are supported only with AVX"); | |
147 assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); | |
148 // Save upper half of YMM registes | |
149 vect_words = 16 * 16 / wordSize; | |
150 ymmhi_offset = additional_frame_words; | |
151 additional_frame_words += vect_words; | |
152 } | |
153 #else | |
154 assert(!save_vectors, "vectors are generated only by C2 and JVMCI"); | |
155 #endif | |
156 | |
165 // Always make the frame size 16-byte aligned | 157 // Always make the frame size 16-byte aligned |
166 int frame_size_in_bytes = round_to(additional_frame_words*wordSize + | 158 int frame_size_in_bytes = round_to(additional_frame_words*wordSize + |
167 reg_save_size*BytesPerInt, 16); | 159 reg_save_size*BytesPerInt, 16); |
168 // OopMap frame size is in compiler stack slots (jint's) not bytes or words | 160 // OopMap frame size is in compiler stack slots (jint's) not bytes or words |
169 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; | 161 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; |
180 // to be under the return like a normal enter. | 172 // to be under the return like a normal enter. |
181 | 173 |
182 __ enter(); // rsp becomes 16-byte aligned here | 174 __ enter(); // rsp becomes 16-byte aligned here |
183 __ push_CPU_state(); // Push a multiple of 16 bytes | 175 __ push_CPU_state(); // Push a multiple of 16 bytes |
184 | 176 |
185 #if defined(COMPILER2) || defined(JVMCI) | 177 if (vect_words > 0) { |
186 __ subptr(rsp, 256); // Save upper half of YMM registers | 178 assert(vect_words*wordSize == 256, ""); |
187 if (save_vectors) { | 179 __ subptr(rsp, 256); // Save upper half of YMM registes |
188 assert(UseAVX > 0, "256bit vectors are supported only with AVX"); | 180 __ vextractf128h(Address(rsp, 0),xmm0); |
189 assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); | 181 __ vextractf128h(Address(rsp, 16),xmm1); |
190 // Save upper half of YMM registers | 182 __ vextractf128h(Address(rsp, 32),xmm2); |
191 __ vextractf128h(Address(rsp, ymm_hi0_off * BytesPerInt), xmm0); | 183 __ vextractf128h(Address(rsp, 48),xmm3); |
192 __ vextractf128h(Address(rsp, ymm_hi1_off * BytesPerInt), xmm1); | 184 __ vextractf128h(Address(rsp, 64),xmm4); |
193 __ vextractf128h(Address(rsp, ymm_hi2_off * BytesPerInt), xmm2); | 185 __ vextractf128h(Address(rsp, 80),xmm5); |
194 __ vextractf128h(Address(rsp, ymm_hi3_off * BytesPerInt), xmm3); | 186 __ vextractf128h(Address(rsp, 96),xmm6); |
195 __ vextractf128h(Address(rsp, ymm_hi4_off * BytesPerInt), xmm4); | 187 __ vextractf128h(Address(rsp,112),xmm7); |
196 __ vextractf128h(Address(rsp, ymm_hi5_off * BytesPerInt), xmm5); | 188 __ vextractf128h(Address(rsp,128),xmm8); |
197 __ vextractf128h(Address(rsp, ymm_hi6_off * BytesPerInt), xmm6); | 189 __ vextractf128h(Address(rsp,144),xmm9); |
198 __ vextractf128h(Address(rsp, ymm_hi7_off * BytesPerInt), xmm7); | 190 __ vextractf128h(Address(rsp,160),xmm10); |
199 __ vextractf128h(Address(rsp, ymm_hi8_off * BytesPerInt), xmm8); | 191 __ vextractf128h(Address(rsp,176),xmm11); |
200 __ vextractf128h(Address(rsp, ymm_hi9_off * BytesPerInt), xmm9); | 192 __ vextractf128h(Address(rsp,192),xmm12); |
201 __ vextractf128h(Address(rsp, ymm_hi10_off * BytesPerInt), xmm10); | 193 __ vextractf128h(Address(rsp,208),xmm13); |
202 __ vextractf128h(Address(rsp, ymm_hi11_off * BytesPerInt), xmm11); | 194 __ vextractf128h(Address(rsp,224),xmm14); |
203 __ vextractf128h(Address(rsp, ymm_hi12_off * BytesPerInt), xmm12); | 195 __ vextractf128h(Address(rsp,240),xmm15); |
204 __ vextractf128h(Address(rsp, ymm_hi13_off * BytesPerInt), xmm13); | 196 } |
205 __ vextractf128h(Address(rsp, ymm_hi14_off * BytesPerInt), xmm14); | |
206 __ vextractf128h(Address(rsp, ymm_hi15_off * BytesPerInt), xmm15); | |
207 } | |
208 #else | |
209 assert(!save_vectors, "vectors are generated only by C2 and JVMCI"); | |
210 #endif | |
211 if (frame::arg_reg_save_area_bytes != 0) { | 197 if (frame::arg_reg_save_area_bytes != 0) { |
212 // Allocate argument register save area | 198 // Allocate argument register save area |
213 __ subptr(rsp, frame::arg_reg_save_area_bytes); | 199 __ subptr(rsp, frame::arg_reg_save_area_bytes); |
214 } | 200 } |
215 | 201 |
220 | 206 |
221 OopMapSet *oop_maps = new OopMapSet(); | 207 OopMapSet *oop_maps = new OopMapSet(); |
222 OopMap* map = new OopMap(frame_size_in_slots, 0); | 208 OopMap* map = new OopMap(frame_size_in_slots, 0); |
223 | 209 |
224 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) | 210 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) |
211 #define YMMHI_STACK_OFFSET(x) VMRegImpl::stack2reg((x / VMRegImpl::stack_slot_size) + ymmhi_offset) | |
225 | 212 |
226 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); | 213 map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); |
227 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); | 214 map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); |
228 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg()); | 215 map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg()); |
229 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg()); | 216 map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg()); |
257 map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg()); | 244 map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg()); |
258 | 245 |
259 | 246 |
260 #if defined(COMPILER2) || defined(JVMCI) | 247 #if defined(COMPILER2) || defined(JVMCI) |
261 if (save_vectors) { | 248 if (save_vectors) { |
262 map->set_callee_saved(STACK_OFFSET(ymm_hi0_off ), xmm0->as_VMReg()->next()->next()->next()->next()); | 249 assert(ymmhi_offset != -1, "save area must exist"); |
263 map->set_callee_saved(STACK_OFFSET(ymm_hi1_off ), xmm1->as_VMReg()->next()->next()->next()->next()); | 250 map->set_callee_saved(YMMHI_STACK_OFFSET( 0), xmm0->as_VMReg()->next()->next()->next()->next()); |
264 map->set_callee_saved(STACK_OFFSET(ymm_hi2_off ), xmm2->as_VMReg()->next()->next()->next()->next()); | 251 map->set_callee_saved(YMMHI_STACK_OFFSET( 16), xmm1->as_VMReg()->next()->next()->next()->next()); |
265 map->set_callee_saved(STACK_OFFSET(ymm_hi3_off ), xmm3->as_VMReg()->next()->next()->next()->next()); | 252 map->set_callee_saved(YMMHI_STACK_OFFSET( 32), xmm2->as_VMReg()->next()->next()->next()->next()); |
266 map->set_callee_saved(STACK_OFFSET(ymm_hi4_off ), xmm4->as_VMReg()->next()->next()->next()->next()); | 253 map->set_callee_saved(YMMHI_STACK_OFFSET( 48), xmm3->as_VMReg()->next()->next()->next()->next()); |
267 map->set_callee_saved(STACK_OFFSET(ymm_hi5_off ), xmm5->as_VMReg()->next()->next()->next()->next()); | 254 map->set_callee_saved(YMMHI_STACK_OFFSET( 64), xmm4->as_VMReg()->next()->next()->next()->next()); |
268 map->set_callee_saved(STACK_OFFSET(ymm_hi6_off ), xmm6->as_VMReg()->next()->next()->next()->next()); | 255 map->set_callee_saved(YMMHI_STACK_OFFSET( 80), xmm5->as_VMReg()->next()->next()->next()->next()); |
269 map->set_callee_saved(STACK_OFFSET(ymm_hi7_off ), xmm7->as_VMReg()->next()->next()->next()->next()); | 256 map->set_callee_saved(YMMHI_STACK_OFFSET( 96), xmm6->as_VMReg()->next()->next()->next()->next()); |
270 map->set_callee_saved(STACK_OFFSET(ymm_hi8_off ), xmm8->as_VMReg()->next()->next()->next()->next()); | 257 map->set_callee_saved(YMMHI_STACK_OFFSET(112), xmm7->as_VMReg()->next()->next()->next()->next()); |
271 map->set_callee_saved(STACK_OFFSET(ymm_hi9_off ), xmm9->as_VMReg()->next()->next()->next()->next()); | 258 map->set_callee_saved(YMMHI_STACK_OFFSET(128), xmm8->as_VMReg()->next()->next()->next()->next()); |
272 map->set_callee_saved(STACK_OFFSET(ymm_hi10_off), xmm10->as_VMReg()->next()->next()->next()->next()); | 259 map->set_callee_saved(YMMHI_STACK_OFFSET(144), xmm9->as_VMReg()->next()->next()->next()->next()); |
273 map->set_callee_saved(STACK_OFFSET(ymm_hi11_off), xmm11->as_VMReg()->next()->next()->next()->next()); | 260 map->set_callee_saved(YMMHI_STACK_OFFSET(160), xmm10->as_VMReg()->next()->next()->next()->next()); |
274 map->set_callee_saved(STACK_OFFSET(ymm_hi12_off), xmm12->as_VMReg()->next()->next()->next()->next()); | 261 map->set_callee_saved(YMMHI_STACK_OFFSET(176), xmm11->as_VMReg()->next()->next()->next()->next()); |
275 map->set_callee_saved(STACK_OFFSET(ymm_hi13_off), xmm13->as_VMReg()->next()->next()->next()->next()); | 262 map->set_callee_saved(YMMHI_STACK_OFFSET(192), xmm12->as_VMReg()->next()->next()->next()->next()); |
276 map->set_callee_saved(STACK_OFFSET(ymm_hi14_off), xmm14->as_VMReg()->next()->next()->next()->next()); | 263 map->set_callee_saved(YMMHI_STACK_OFFSET(208), xmm13->as_VMReg()->next()->next()->next()->next()); |
277 map->set_callee_saved(STACK_OFFSET(ymm_hi15_off), xmm15->as_VMReg()->next()->next()->next()->next()); | 264 map->set_callee_saved(YMMHI_STACK_OFFSET(224), xmm14->as_VMReg()->next()->next()->next()->next()); |
265 map->set_callee_saved(YMMHI_STACK_OFFSET(240), xmm15->as_VMReg()->next()->next()->next()->next()); | |
278 } | 266 } |
279 #endif | 267 #endif |
280 | 268 |
281 // %%% These should all be a waste but we'll keep things as they were for now | 269 // %%% These should all be a waste but we'll keep things as they were for now |
282 if (true) { | 270 if (true) { |
324 #if defined(COMPILER2) || defined(JVMCI) | 312 #if defined(COMPILER2) || defined(JVMCI) |
325 if (restore_vectors) { | 313 if (restore_vectors) { |
326 // Restore upper half of YMM registes. | 314 // Restore upper half of YMM registes. |
327 assert(UseAVX > 0, "256bit vectors are supported only with AVX"); | 315 assert(UseAVX > 0, "256bit vectors are supported only with AVX"); |
328 assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); | 316 assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); |
329 __ vinsertf128h(xmm0, Address(rsp, ymm_hi0_off * BytesPerInt)); | 317 __ vinsertf128h(xmm0, Address(rsp, 0)); |
330 __ vinsertf128h(xmm1, Address(rsp, ymm_hi1_off * BytesPerInt)); | 318 __ vinsertf128h(xmm1, Address(rsp, 16)); |
331 __ vinsertf128h(xmm2, Address(rsp, ymm_hi2_off * BytesPerInt)); | 319 __ vinsertf128h(xmm2, Address(rsp, 32)); |
332 __ vinsertf128h(xmm3, Address(rsp, ymm_hi3_off * BytesPerInt)); | 320 __ vinsertf128h(xmm3, Address(rsp, 48)); |
333 __ vinsertf128h(xmm4, Address(rsp, ymm_hi4_off * BytesPerInt)); | 321 __ vinsertf128h(xmm4, Address(rsp, 64)); |
334 __ vinsertf128h(xmm5, Address(rsp, ymm_hi5_off * BytesPerInt)); | 322 __ vinsertf128h(xmm5, Address(rsp, 80)); |
335 __ vinsertf128h(xmm6, Address(rsp, ymm_hi6_off * BytesPerInt)); | 323 __ vinsertf128h(xmm6, Address(rsp, 96)); |
336 __ vinsertf128h(xmm7, Address(rsp, ymm_hi7_off * BytesPerInt)); | 324 __ vinsertf128h(xmm7, Address(rsp,112)); |
337 __ vinsertf128h(xmm8, Address(rsp, ymm_hi8_off * BytesPerInt)); | 325 __ vinsertf128h(xmm8, Address(rsp,128)); |
338 __ vinsertf128h(xmm9, Address(rsp, ymm_hi9_off * BytesPerInt)); | 326 __ vinsertf128h(xmm9, Address(rsp,144)); |
339 __ vinsertf128h(xmm10, Address(rsp, ymm_hi10_off * BytesPerInt)); | 327 __ vinsertf128h(xmm10, Address(rsp,160)); |
340 __ vinsertf128h(xmm11, Address(rsp, ymm_hi11_off * BytesPerInt)); | 328 __ vinsertf128h(xmm11, Address(rsp,176)); |
341 __ vinsertf128h(xmm12, Address(rsp, ymm_hi12_off * BytesPerInt)); | 329 __ vinsertf128h(xmm12, Address(rsp,192)); |
342 __ vinsertf128h(xmm13, Address(rsp, ymm_hi13_off * BytesPerInt)); | 330 __ vinsertf128h(xmm13, Address(rsp,208)); |
343 __ vinsertf128h(xmm14, Address(rsp, ymm_hi14_off * BytesPerInt)); | 331 __ vinsertf128h(xmm14, Address(rsp,224)); |
344 __ vinsertf128h(xmm15, Address(rsp, ymm_hi15_off * BytesPerInt)); | 332 __ vinsertf128h(xmm15, Address(rsp,240)); |
345 } | 333 __ addptr(rsp, 256); |
346 __ addptr(rsp, 256); | 334 } |
347 #else | 335 #else |
348 assert(!restore_vectors, "vectors are generated only by C2 and JVMCI"); | 336 assert(!restore_vectors, "vectors are generated only by C2 and JVMCI"); |
349 #endif | 337 #endif |
350 // Recover CPU state | 338 // Recover CPU state |
351 __ pop_CPU_state(); | 339 __ pop_CPU_state(); |
4228 | 4216 |
4229 // Set exception blob | 4217 // Set exception blob |
4230 _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); | 4218 _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); |
4231 } | 4219 } |
4232 #endif // COMPILER2 | 4220 #endif // COMPILER2 |
4233 |