Mercurial > hg > truffle
comparison src/cpu/x86/vm/sharedRuntime_x86_64.cpp @ 0:a61af66fc99e jdk7-b24
Initial load
author | duke |
---|---|
date | Sat, 01 Dec 2007 00:00:00 +0000 |
parents | |
children | ba764ed4b6f2 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a61af66fc99e |
---|---|
1 /* | |
2 * Copyright 2003-2007 Sun Microsystems, Inc. All Rights Reserved. | |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
20 * CA 95054 USA or visit www.sun.com if you need additional information or | |
21 * have any questions. | |
22 * | |
23 */ | |
24 | |
25 #include "incls/_precompiled.incl" | |
26 #include "incls/_sharedRuntime_x86_64.cpp.incl" | |
27 | |
28 DeoptimizationBlob *SharedRuntime::_deopt_blob; | |
29 #ifdef COMPILER2 | |
30 UncommonTrapBlob *SharedRuntime::_uncommon_trap_blob; | |
31 ExceptionBlob *OptoRuntime::_exception_blob; | |
32 #endif // COMPILER2 | |
33 | |
34 SafepointBlob *SharedRuntime::_polling_page_safepoint_handler_blob; | |
35 SafepointBlob *SharedRuntime::_polling_page_return_handler_blob; | |
36 RuntimeStub* SharedRuntime::_wrong_method_blob; | |
37 RuntimeStub* SharedRuntime::_ic_miss_blob; | |
38 RuntimeStub* SharedRuntime::_resolve_opt_virtual_call_blob; | |
39 RuntimeStub* SharedRuntime::_resolve_virtual_call_blob; | |
40 RuntimeStub* SharedRuntime::_resolve_static_call_blob; | |
41 | |
42 #define __ masm-> | |
43 | |
44 class SimpleRuntimeFrame { | |
45 | |
46 public: | |
47 | |
48 // Most of the runtime stubs have this simple frame layout. | |
49 // This class exists to make the layout shared in one place. | |
50 // Offsets are for compiler stack slots, which are jints. | |
51 enum layout { | |
52 // The frame sender code expects that rbp will be in the "natural" place and | |
53 // will override any oopMap setting for it. We must therefore force the layout | |
54 // so that it agrees with the frame sender code. | |
55 rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt, | |
56 rbp_off2, | |
57 return_off, return_off2, | |
58 framesize | |
59 }; | |
60 }; | |
61 | |
62 class RegisterSaver { | |
63 // Capture info about frame layout. Layout offsets are in jint | |
64 // units because compiler frame slots are jints. | |
65 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off | |
66 enum layout { | |
67 fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area | |
68 xmm_off = fpu_state_off + 160/BytesPerInt, // offset in fxsave save area | |
69 DEF_XMM_OFFS(0), | |
70 DEF_XMM_OFFS(1), | |
71 DEF_XMM_OFFS(2), | |
72 DEF_XMM_OFFS(3), | |
73 DEF_XMM_OFFS(4), | |
74 DEF_XMM_OFFS(5), | |
75 DEF_XMM_OFFS(6), | |
76 DEF_XMM_OFFS(7), | |
77 DEF_XMM_OFFS(8), | |
78 DEF_XMM_OFFS(9), | |
79 DEF_XMM_OFFS(10), | |
80 DEF_XMM_OFFS(11), | |
81 DEF_XMM_OFFS(12), | |
82 DEF_XMM_OFFS(13), | |
83 DEF_XMM_OFFS(14), | |
84 DEF_XMM_OFFS(15), | |
85 fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt), | |
86 fpu_stateH_end, | |
87 r15_off, r15H_off, | |
88 r14_off, r14H_off, | |
89 r13_off, r13H_off, | |
90 r12_off, r12H_off, | |
91 r11_off, r11H_off, | |
92 r10_off, r10H_off, | |
93 r9_off, r9H_off, | |
94 r8_off, r8H_off, | |
95 rdi_off, rdiH_off, | |
96 rsi_off, rsiH_off, | |
97 ignore_off, ignoreH_off, // extra copy of rbp | |
98 rsp_off, rspH_off, | |
99 rbx_off, rbxH_off, | |
100 rdx_off, rdxH_off, | |
101 rcx_off, rcxH_off, | |
102 rax_off, raxH_off, | |
103 // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state | |
104 align_off, alignH_off, | |
105 flags_off, flagsH_off, | |
106 // The frame sender code expects that rbp will be in the "natural" place and | |
107 // will override any oopMap setting for it. We must therefore force the layout | |
108 // so that it agrees with the frame sender code. | |
109 rbp_off, rbpH_off, // copy of rbp we will restore | |
110 return_off, returnH_off, // slot for return address | |
111 reg_save_size // size in compiler stack slots | |
112 }; | |
113 | |
114 public: | |
115 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); | |
116 static void restore_live_registers(MacroAssembler* masm); | |
117 | |
118 // Offsets into the register save area | |
119 // Used by deoptimization when it is managing result register | |
120 // values on its own | |
121 | |
122 static int rax_offset_in_bytes(void) { return BytesPerInt * rax_off; } | |
123 static int rbx_offset_in_bytes(void) { return BytesPerInt * rbx_off; } | |
124 static int xmm0_offset_in_bytes(void) { return BytesPerInt * xmm0_off; } | |
125 static int return_offset_in_bytes(void) { return BytesPerInt * return_off; } | |
126 | |
127 // During deoptimization only the result registers need to be restored, | |
128 // all the other values have already been extracted. | |
129 static void restore_result_registers(MacroAssembler* masm); | |
130 }; | |
131 | |
132 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { | |
133 | |
134 // Always make the frame size 16-byte aligned | |
135 int frame_size_in_bytes = round_to(additional_frame_words*wordSize + | |
136 reg_save_size*BytesPerInt, 16); | |
137 // OopMap frame size is in compiler stack slots (jint's) not bytes or words | |
138 int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; | |
139 // The caller will allocate additional_frame_words | |
140 int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt; | |
141 // CodeBlob frame size is in words. | |
142 int frame_size_in_words = frame_size_in_bytes / wordSize; | |
143 *total_frame_words = frame_size_in_words; | |
144 | |
145 // Save registers, fpu state, and flags. | |
146 // We assume caller has already pushed the return address onto the | |
147 // stack, so rsp is 8-byte aligned here. | |
148 // We push rpb twice in this sequence because we want the real rbp | |
149 // to be under the return like a normal enter. | |
150 | |
151 __ enter(); // rsp becomes 16-byte aligned here | |
152 __ push_CPU_state(); // Push a multiple of 16 bytes | |
153 if (frame::arg_reg_save_area_bytes != 0) { | |
154 // Allocate argument register save area | |
155 __ subq(rsp, frame::arg_reg_save_area_bytes); | |
156 } | |
157 | |
158 // Set an oopmap for the call site. This oopmap will map all | |
159 // oop-registers and debug-info registers as callee-saved. This | |
160 // will allow deoptimization at this safepoint to find all possible | |
161 // debug-info recordings, as well as let GC find all oops. | |
162 | |
163 OopMapSet *oop_maps = new OopMapSet(); | |
164 OopMap* map = new OopMap(frame_size_in_slots, 0); | |
165 map->set_callee_saved(VMRegImpl::stack2reg( rax_off + additional_frame_slots), rax->as_VMReg()); | |
166 map->set_callee_saved(VMRegImpl::stack2reg( rcx_off + additional_frame_slots), rcx->as_VMReg()); | |
167 map->set_callee_saved(VMRegImpl::stack2reg( rdx_off + additional_frame_slots), rdx->as_VMReg()); | |
168 map->set_callee_saved(VMRegImpl::stack2reg( rbx_off + additional_frame_slots), rbx->as_VMReg()); | |
169 // rbp location is known implicitly by the frame sender code, needs no oopmap | |
170 // and the location where rbp was saved by is ignored | |
171 map->set_callee_saved(VMRegImpl::stack2reg( rsi_off + additional_frame_slots), rsi->as_VMReg()); | |
172 map->set_callee_saved(VMRegImpl::stack2reg( rdi_off + additional_frame_slots), rdi->as_VMReg()); | |
173 map->set_callee_saved(VMRegImpl::stack2reg( r8_off + additional_frame_slots), r8->as_VMReg()); | |
174 map->set_callee_saved(VMRegImpl::stack2reg( r9_off + additional_frame_slots), r9->as_VMReg()); | |
175 map->set_callee_saved(VMRegImpl::stack2reg( r10_off + additional_frame_slots), r10->as_VMReg()); | |
176 map->set_callee_saved(VMRegImpl::stack2reg( r11_off + additional_frame_slots), r11->as_VMReg()); | |
177 map->set_callee_saved(VMRegImpl::stack2reg( r12_off + additional_frame_slots), r12->as_VMReg()); | |
178 map->set_callee_saved(VMRegImpl::stack2reg( r13_off + additional_frame_slots), r13->as_VMReg()); | |
179 map->set_callee_saved(VMRegImpl::stack2reg( r14_off + additional_frame_slots), r14->as_VMReg()); | |
180 map->set_callee_saved(VMRegImpl::stack2reg( r15_off + additional_frame_slots), r15->as_VMReg()); | |
181 map->set_callee_saved(VMRegImpl::stack2reg(xmm0_off + additional_frame_slots), xmm0->as_VMReg()); | |
182 map->set_callee_saved(VMRegImpl::stack2reg(xmm1_off + additional_frame_slots), xmm1->as_VMReg()); | |
183 map->set_callee_saved(VMRegImpl::stack2reg(xmm2_off + additional_frame_slots), xmm2->as_VMReg()); | |
184 map->set_callee_saved(VMRegImpl::stack2reg(xmm3_off + additional_frame_slots), xmm3->as_VMReg()); | |
185 map->set_callee_saved(VMRegImpl::stack2reg(xmm4_off + additional_frame_slots), xmm4->as_VMReg()); | |
186 map->set_callee_saved(VMRegImpl::stack2reg(xmm5_off + additional_frame_slots), xmm5->as_VMReg()); | |
187 map->set_callee_saved(VMRegImpl::stack2reg(xmm6_off + additional_frame_slots), xmm6->as_VMReg()); | |
188 map->set_callee_saved(VMRegImpl::stack2reg(xmm7_off + additional_frame_slots), xmm7->as_VMReg()); | |
189 map->set_callee_saved(VMRegImpl::stack2reg(xmm8_off + additional_frame_slots), xmm8->as_VMReg()); | |
190 map->set_callee_saved(VMRegImpl::stack2reg(xmm9_off + additional_frame_slots), xmm9->as_VMReg()); | |
191 map->set_callee_saved(VMRegImpl::stack2reg(xmm10_off + additional_frame_slots), xmm10->as_VMReg()); | |
192 map->set_callee_saved(VMRegImpl::stack2reg(xmm11_off + additional_frame_slots), xmm11->as_VMReg()); | |
193 map->set_callee_saved(VMRegImpl::stack2reg(xmm12_off + additional_frame_slots), xmm12->as_VMReg()); | |
194 map->set_callee_saved(VMRegImpl::stack2reg(xmm13_off + additional_frame_slots), xmm13->as_VMReg()); | |
195 map->set_callee_saved(VMRegImpl::stack2reg(xmm14_off + additional_frame_slots), xmm14->as_VMReg()); | |
196 map->set_callee_saved(VMRegImpl::stack2reg(xmm15_off + additional_frame_slots), xmm15->as_VMReg()); | |
197 | |
198 // %%% These should all be a waste but we'll keep things as they were for now | |
199 if (true) { | |
200 map->set_callee_saved(VMRegImpl::stack2reg( raxH_off + additional_frame_slots), | |
201 rax->as_VMReg()->next()); | |
202 map->set_callee_saved(VMRegImpl::stack2reg( rcxH_off + additional_frame_slots), | |
203 rcx->as_VMReg()->next()); | |
204 map->set_callee_saved(VMRegImpl::stack2reg( rdxH_off + additional_frame_slots), | |
205 rdx->as_VMReg()->next()); | |
206 map->set_callee_saved(VMRegImpl::stack2reg( rbxH_off + additional_frame_slots), | |
207 rbx->as_VMReg()->next()); | |
208 // rbp location is known implicitly by the frame sender code, needs no oopmap | |
209 map->set_callee_saved(VMRegImpl::stack2reg( rsiH_off + additional_frame_slots), | |
210 rsi->as_VMReg()->next()); | |
211 map->set_callee_saved(VMRegImpl::stack2reg( rdiH_off + additional_frame_slots), | |
212 rdi->as_VMReg()->next()); | |
213 map->set_callee_saved(VMRegImpl::stack2reg( r8H_off + additional_frame_slots), | |
214 r8->as_VMReg()->next()); | |
215 map->set_callee_saved(VMRegImpl::stack2reg( r9H_off + additional_frame_slots), | |
216 r9->as_VMReg()->next()); | |
217 map->set_callee_saved(VMRegImpl::stack2reg( r10H_off + additional_frame_slots), | |
218 r10->as_VMReg()->next()); | |
219 map->set_callee_saved(VMRegImpl::stack2reg( r11H_off + additional_frame_slots), | |
220 r11->as_VMReg()->next()); | |
221 map->set_callee_saved(VMRegImpl::stack2reg( r12H_off + additional_frame_slots), | |
222 r12->as_VMReg()->next()); | |
223 map->set_callee_saved(VMRegImpl::stack2reg( r13H_off + additional_frame_slots), | |
224 r13->as_VMReg()->next()); | |
225 map->set_callee_saved(VMRegImpl::stack2reg( r14H_off + additional_frame_slots), | |
226 r14->as_VMReg()->next()); | |
227 map->set_callee_saved(VMRegImpl::stack2reg( r15H_off + additional_frame_slots), | |
228 r15->as_VMReg()->next()); | |
229 map->set_callee_saved(VMRegImpl::stack2reg(xmm0H_off + additional_frame_slots), | |
230 xmm0->as_VMReg()->next()); | |
231 map->set_callee_saved(VMRegImpl::stack2reg(xmm1H_off + additional_frame_slots), | |
232 xmm1->as_VMReg()->next()); | |
233 map->set_callee_saved(VMRegImpl::stack2reg(xmm2H_off + additional_frame_slots), | |
234 xmm2->as_VMReg()->next()); | |
235 map->set_callee_saved(VMRegImpl::stack2reg(xmm3H_off + additional_frame_slots), | |
236 xmm3->as_VMReg()->next()); | |
237 map->set_callee_saved(VMRegImpl::stack2reg(xmm4H_off + additional_frame_slots), | |
238 xmm4->as_VMReg()->next()); | |
239 map->set_callee_saved(VMRegImpl::stack2reg(xmm5H_off + additional_frame_slots), | |
240 xmm5->as_VMReg()->next()); | |
241 map->set_callee_saved(VMRegImpl::stack2reg(xmm6H_off + additional_frame_slots), | |
242 xmm6->as_VMReg()->next()); | |
243 map->set_callee_saved(VMRegImpl::stack2reg(xmm7H_off + additional_frame_slots), | |
244 xmm7->as_VMReg()->next()); | |
245 map->set_callee_saved(VMRegImpl::stack2reg(xmm8H_off + additional_frame_slots), | |
246 xmm8->as_VMReg()->next()); | |
247 map->set_callee_saved(VMRegImpl::stack2reg(xmm9H_off + additional_frame_slots), | |
248 xmm9->as_VMReg()->next()); | |
249 map->set_callee_saved(VMRegImpl::stack2reg(xmm10H_off + additional_frame_slots), | |
250 xmm10->as_VMReg()->next()); | |
251 map->set_callee_saved(VMRegImpl::stack2reg(xmm11H_off + additional_frame_slots), | |
252 xmm11->as_VMReg()->next()); | |
253 map->set_callee_saved(VMRegImpl::stack2reg(xmm12H_off + additional_frame_slots), | |
254 xmm12->as_VMReg()->next()); | |
255 map->set_callee_saved(VMRegImpl::stack2reg(xmm13H_off + additional_frame_slots), | |
256 xmm13->as_VMReg()->next()); | |
257 map->set_callee_saved(VMRegImpl::stack2reg(xmm14H_off + additional_frame_slots), | |
258 xmm14->as_VMReg()->next()); | |
259 map->set_callee_saved(VMRegImpl::stack2reg(xmm15H_off + additional_frame_slots), | |
260 xmm15->as_VMReg()->next()); | |
261 } | |
262 | |
263 return map; | |
264 } | |
265 | |
266 void RegisterSaver::restore_live_registers(MacroAssembler* masm) { | |
267 if (frame::arg_reg_save_area_bytes != 0) { | |
268 // Pop arg register save area | |
269 __ addq(rsp, frame::arg_reg_save_area_bytes); | |
270 } | |
271 // Recover CPU state | |
272 __ pop_CPU_state(); | |
273 // Get the rbp described implicitly by the calling convention (no oopMap) | |
274 __ popq(rbp); | |
275 } | |
276 | |
277 void RegisterSaver::restore_result_registers(MacroAssembler* masm) { | |
278 | |
279 // Just restore result register. Only used by deoptimization. By | |
280 // now any callee save register that needs to be restored to a c2 | |
281 // caller of the deoptee has been extracted into the vframeArray | |
282 // and will be stuffed into the c2i adapter we create for later | |
283 // restoration so only result registers need to be restored here. | |
284 | |
285 // Restore fp result register | |
286 __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes())); | |
287 // Restore integer result register | |
288 __ movq(rax, Address(rsp, rax_offset_in_bytes())); | |
289 // Pop all of the register save are off the stack except the return address | |
290 __ addq(rsp, return_offset_in_bytes()); | |
291 } | |
292 | |
293 // The java_calling_convention describes stack locations as ideal slots on | |
294 // a frame with no abi restrictions. Since we must observe abi restrictions | |
295 // (like the placement of the register window) the slots must be biased by | |
296 // the following value. | |
297 static int reg2offset_in(VMReg r) { | |
298 // Account for saved rbp and return address | |
299 // This should really be in_preserve_stack_slots | |
300 return (r->reg2stack() + 4) * VMRegImpl::stack_slot_size; | |
301 } | |
302 | |
303 static int reg2offset_out(VMReg r) { | |
304 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; | |
305 } | |
306 | |
307 // --------------------------------------------------------------------------- | |
308 // Read the array of BasicTypes from a signature, and compute where the | |
309 // arguments should go. Values in the VMRegPair regs array refer to 4-byte | |
310 // quantities. Values less than VMRegImpl::stack0 are registers, those above | |
311 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer | |
312 // as framesizes are fixed. | |
313 // VMRegImpl::stack0 refers to the first slot 0(sp). | |
314 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register | |
315 // up to RegisterImpl::number_of_registers) are the 64-bit | |
316 // integer registers. | |
317 | |
318 // Note: the INPUTS in sig_bt are in units of Java argument words, which are | |
319 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit | |
320 // units regardless of build. Of course for i486 there is no 64 bit build | |
321 | |
322 // The Java calling convention is a "shifted" version of the C ABI. | |
323 // By skipping the first C ABI register we can call non-static jni methods | |
324 // with small numbers of arguments without having to shuffle the arguments | |
325 // at all. Since we control the java ABI we ought to at least get some | |
326 // advantage out of it. | |
327 | |
328 int SharedRuntime::java_calling_convention(const BasicType *sig_bt, | |
329 VMRegPair *regs, | |
330 int total_args_passed, | |
331 int is_outgoing) { | |
332 | |
333 // Create the mapping between argument positions and | |
334 // registers. | |
335 static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { | |
336 j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5 | |
337 }; | |
338 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { | |
339 j_farg0, j_farg1, j_farg2, j_farg3, | |
340 j_farg4, j_farg5, j_farg6, j_farg7 | |
341 }; | |
342 | |
343 | |
344 uint int_args = 0; | |
345 uint fp_args = 0; | |
346 uint stk_args = 0; // inc by 2 each time | |
347 | |
348 for (int i = 0; i < total_args_passed; i++) { | |
349 switch (sig_bt[i]) { | |
350 case T_BOOLEAN: | |
351 case T_CHAR: | |
352 case T_BYTE: | |
353 case T_SHORT: | |
354 case T_INT: | |
355 if (int_args < Argument::n_int_register_parameters_j) { | |
356 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); | |
357 } else { | |
358 regs[i].set1(VMRegImpl::stack2reg(stk_args)); | |
359 stk_args += 2; | |
360 } | |
361 break; | |
362 case T_VOID: | |
363 // halves of T_LONG or T_DOUBLE | |
364 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); | |
365 regs[i].set_bad(); | |
366 break; | |
367 case T_LONG: | |
368 assert(sig_bt[i + 1] == T_VOID, "expecting half"); | |
369 // fall through | |
370 case T_OBJECT: | |
371 case T_ARRAY: | |
372 case T_ADDRESS: | |
373 if (int_args < Argument::n_int_register_parameters_j) { | |
374 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); | |
375 } else { | |
376 regs[i].set2(VMRegImpl::stack2reg(stk_args)); | |
377 stk_args += 2; | |
378 } | |
379 break; | |
380 case T_FLOAT: | |
381 if (fp_args < Argument::n_float_register_parameters_j) { | |
382 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); | |
383 } else { | |
384 regs[i].set1(VMRegImpl::stack2reg(stk_args)); | |
385 stk_args += 2; | |
386 } | |
387 break; | |
388 case T_DOUBLE: | |
389 assert(sig_bt[i + 1] == T_VOID, "expecting half"); | |
390 if (fp_args < Argument::n_float_register_parameters_j) { | |
391 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); | |
392 } else { | |
393 regs[i].set2(VMRegImpl::stack2reg(stk_args)); | |
394 stk_args += 2; | |
395 } | |
396 break; | |
397 default: | |
398 ShouldNotReachHere(); | |
399 break; | |
400 } | |
401 } | |
402 | |
403 return round_to(stk_args, 2); | |
404 } | |
405 | |
406 // Patch the callers callsite with entry to compiled code if it exists. | |
407 static void patch_callers_callsite(MacroAssembler *masm) { | |
408 Label L; | |
409 __ verify_oop(rbx); | |
410 __ cmpq(Address(rbx, in_bytes(methodOopDesc::code_offset())), (int)NULL_WORD); | |
411 __ jcc(Assembler::equal, L); | |
412 | |
413 // Save the current stack pointer | |
414 __ movq(r13, rsp); | |
415 // Schedule the branch target address early. | |
416 // Call into the VM to patch the caller, then jump to compiled callee | |
417 // rax isn't live so capture return address while we easily can | |
418 __ movq(rax, Address(rsp, 0)); | |
419 | |
420 // align stack so push_CPU_state doesn't fault | |
421 __ andq(rsp, -(StackAlignmentInBytes)); | |
422 __ push_CPU_state(); | |
423 | |
424 | |
425 __ verify_oop(rbx); | |
426 // VM needs caller's callsite | |
427 // VM needs target method | |
428 // This needs to be a long call since we will relocate this adapter to | |
429 // the codeBuffer and it may not reach | |
430 | |
431 // Allocate argument register save area | |
432 if (frame::arg_reg_save_area_bytes != 0) { | |
433 __ subq(rsp, frame::arg_reg_save_area_bytes); | |
434 } | |
435 __ movq(c_rarg0, rbx); | |
436 __ movq(c_rarg1, rax); | |
437 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); | |
438 | |
439 // De-allocate argument register save area | |
440 if (frame::arg_reg_save_area_bytes != 0) { | |
441 __ addq(rsp, frame::arg_reg_save_area_bytes); | |
442 } | |
443 | |
444 __ pop_CPU_state(); | |
445 // restore sp | |
446 __ movq(rsp, r13); | |
447 __ bind(L); | |
448 } | |
449 | |
450 // Helper function to put tags in interpreter stack. | |
451 static void tag_stack(MacroAssembler *masm, const BasicType sig, int st_off) { | |
452 if (TaggedStackInterpreter) { | |
453 int tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(0); | |
454 if (sig == T_OBJECT || sig == T_ARRAY) { | |
455 __ mov64(Address(rsp, tag_offset), frame::TagReference); | |
456 } else if (sig == T_LONG || sig == T_DOUBLE) { | |
457 int next_tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(1); | |
458 __ mov64(Address(rsp, next_tag_offset), frame::TagValue); | |
459 __ mov64(Address(rsp, tag_offset), frame::TagValue); | |
460 } else { | |
461 __ mov64(Address(rsp, tag_offset), frame::TagValue); | |
462 } | |
463 } | |
464 } | |
465 | |
466 | |
467 static void gen_c2i_adapter(MacroAssembler *masm, | |
468 int total_args_passed, | |
469 int comp_args_on_stack, | |
470 const BasicType *sig_bt, | |
471 const VMRegPair *regs, | |
472 Label& skip_fixup) { | |
473 // Before we get into the guts of the C2I adapter, see if we should be here | |
474 // at all. We've come from compiled code and are attempting to jump to the | |
475 // interpreter, which means the caller made a static call to get here | |
476 // (vcalls always get a compiled target if there is one). Check for a | |
477 // compiled target. If there is one, we need to patch the caller's call. | |
478 patch_callers_callsite(masm); | |
479 | |
480 __ bind(skip_fixup); | |
481 | |
482 // Since all args are passed on the stack, total_args_passed * | |
483 // Interpreter::stackElementSize is the space we need. Plus 1 because | |
484 // we also account for the return address location since | |
485 // we store it first rather than hold it in rax across all the shuffling | |
486 | |
487 int extraspace = (total_args_passed * Interpreter::stackElementSize()) + wordSize; | |
488 | |
489 // stack is aligned, keep it that way | |
490 extraspace = round_to(extraspace, 2*wordSize); | |
491 | |
492 // Get return address | |
493 __ popq(rax); | |
494 | |
495 // set senderSP value | |
496 __ movq(r13, rsp); | |
497 | |
498 __ subq(rsp, extraspace); | |
499 | |
500 // Store the return address in the expected location | |
501 __ movq(Address(rsp, 0), rax); | |
502 | |
503 // Now write the args into the outgoing interpreter space | |
504 for (int i = 0; i < total_args_passed; i++) { | |
505 if (sig_bt[i] == T_VOID) { | |
506 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); | |
507 continue; | |
508 } | |
509 | |
510 // offset to start parameters | |
511 int st_off = (total_args_passed - i) * Interpreter::stackElementSize() + | |
512 Interpreter::value_offset_in_bytes(); | |
513 int next_off = st_off - Interpreter::stackElementSize(); | |
514 | |
515 // Say 4 args: | |
516 // i st_off | |
517 // 0 32 T_LONG | |
518 // 1 24 T_VOID | |
519 // 2 16 T_OBJECT | |
520 // 3 8 T_BOOL | |
521 // - 0 return address | |
522 // | |
523 // However to make thing extra confusing. Because we can fit a long/double in | |
524 // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter | |
525 // leaves one slot empty and only stores to a single slot. In this case the | |
526 // slot that is occupied is the T_VOID slot. See I said it was confusing. | |
527 | |
528 VMReg r_1 = regs[i].first(); | |
529 VMReg r_2 = regs[i].second(); | |
530 if (!r_1->is_valid()) { | |
531 assert(!r_2->is_valid(), ""); | |
532 continue; | |
533 } | |
534 if (r_1->is_stack()) { | |
535 // memory to memory use rax | |
536 int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; | |
537 if (!r_2->is_valid()) { | |
538 // sign extend?? | |
539 __ movl(rax, Address(rsp, ld_off)); | |
540 __ movq(Address(rsp, st_off), rax); | |
541 tag_stack(masm, sig_bt[i], st_off); | |
542 | |
543 } else { | |
544 | |
545 __ movq(rax, Address(rsp, ld_off)); | |
546 | |
547 // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG | |
548 // T_DOUBLE and T_LONG use two slots in the interpreter | |
549 if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { | |
550 // ld_off == LSW, ld_off+wordSize == MSW | |
551 // st_off == MSW, next_off == LSW | |
552 __ movq(Address(rsp, next_off), rax); | |
553 #ifdef ASSERT | |
554 // Overwrite the unused slot with known junk | |
555 __ mov64(rax, CONST64(0xdeadffffdeadaaaa)); | |
556 __ movq(Address(rsp, st_off), rax); | |
557 #endif /* ASSERT */ | |
558 tag_stack(masm, sig_bt[i], next_off); | |
559 } else { | |
560 __ movq(Address(rsp, st_off), rax); | |
561 tag_stack(masm, sig_bt[i], st_off); | |
562 } | |
563 } | |
564 } else if (r_1->is_Register()) { | |
565 Register r = r_1->as_Register(); | |
566 if (!r_2->is_valid()) { | |
567 // must be only an int (or less ) so move only 32bits to slot | |
568 // why not sign extend?? | |
569 __ movl(Address(rsp, st_off), r); | |
570 tag_stack(masm, sig_bt[i], st_off); | |
571 } else { | |
572 // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG | |
573 // T_DOUBLE and T_LONG use two slots in the interpreter | |
574 if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { | |
575 // long/double in gpr | |
576 #ifdef ASSERT | |
577 // Overwrite the unused slot with known junk | |
578 __ mov64(rax, CONST64(0xdeadffffdeadaaab)); | |
579 __ movq(Address(rsp, st_off), rax); | |
580 #endif /* ASSERT */ | |
581 __ movq(Address(rsp, next_off), r); | |
582 tag_stack(masm, sig_bt[i], next_off); | |
583 } else { | |
584 __ movq(Address(rsp, st_off), r); | |
585 tag_stack(masm, sig_bt[i], st_off); | |
586 } | |
587 } | |
588 } else { | |
589 assert(r_1->is_XMMRegister(), ""); | |
590 if (!r_2->is_valid()) { | |
591 // only a float use just part of the slot | |
592 __ movflt(Address(rsp, st_off), r_1->as_XMMRegister()); | |
593 tag_stack(masm, sig_bt[i], st_off); | |
594 } else { | |
595 #ifdef ASSERT | |
596 // Overwrite the unused slot with known junk | |
597 __ mov64(rax, CONST64(0xdeadffffdeadaaac)); | |
598 __ movq(Address(rsp, st_off), rax); | |
599 #endif /* ASSERT */ | |
600 __ movdbl(Address(rsp, next_off), r_1->as_XMMRegister()); | |
601 tag_stack(masm, sig_bt[i], next_off); | |
602 } | |
603 } | |
604 } | |
605 | |
606 // Schedule the branch target address early. | |
607 __ movq(rcx, Address(rbx, in_bytes(methodOopDesc::interpreter_entry_offset()))); | |
608 __ jmp(rcx); | |
609 } | |
610 | |
611 static void gen_i2c_adapter(MacroAssembler *masm, | |
612 int total_args_passed, | |
613 int comp_args_on_stack, | |
614 const BasicType *sig_bt, | |
615 const VMRegPair *regs) { | |
616 | |
617 // | |
618 // We will only enter here from an interpreted frame and never from after | |
619 // passing thru a c2i. Azul allowed this but we do not. If we lose the | |
620 // race and use a c2i we will remain interpreted for the race loser(s). | |
621 // This removes all sorts of headaches on the x86 side and also eliminates | |
622 // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. | |
623 | |
624 | |
625 // Note: r13 contains the senderSP on entry. We must preserve it since | |
626 // we may do a i2c -> c2i transition if we lose a race where compiled | |
627 // code goes non-entrant while we get args ready. | |
628 // In addition we use r13 to locate all the interpreter args as | |
629 // we must align the stack to 16 bytes on an i2c entry else we | |
630 // lose alignment we expect in all compiled code and register | |
631 // save code can segv when fxsave instructions find improperly | |
632 // aligned stack pointer. | |
633 | |
634 __ movq(rax, Address(rsp, 0)); | |
635 | |
636 // Cut-out for having no stack args. Since up to 2 int/oop args are passed | |
637 // in registers, we will occasionally have no stack args. | |
638 int comp_words_on_stack = 0; | |
639 if (comp_args_on_stack) { | |
640 // Sig words on the stack are greater-than VMRegImpl::stack0. Those in | |
641 // registers are below. By subtracting stack0, we either get a negative | |
642 // number (all values in registers) or the maximum stack slot accessed. | |
643 | |
644 // Convert 4-byte c2 stack slots to words. | |
645 comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; | |
646 // Round up to miminum stack alignment, in wordSize | |
647 comp_words_on_stack = round_to(comp_words_on_stack, 2); | |
648 __ subq(rsp, comp_words_on_stack * wordSize); | |
649 } | |
650 | |
651 | |
652 // Ensure compiled code always sees stack at proper alignment | |
653 __ andq(rsp, -16); | |
654 | |
655 // push the return address and misalign the stack that youngest frame always sees | |
656 // as far as the placement of the call instruction | |
657 __ pushq(rax); | |
658 | |
659 // Will jump to the compiled code just as if compiled code was doing it. | |
660 // Pre-load the register-jump target early, to schedule it better. | |
661 __ movq(r11, Address(rbx, in_bytes(methodOopDesc::from_compiled_offset()))); | |
662 | |
663 // Now generate the shuffle code. Pick up all register args and move the | |
664 // rest through the floating point stack top. | |
665 for (int i = 0; i < total_args_passed; i++) { | |
666 if (sig_bt[i] == T_VOID) { | |
667 // Longs and doubles are passed in native word order, but misaligned | |
668 // in the 32-bit build. | |
669 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); | |
670 continue; | |
671 } | |
672 | |
673 // Pick up 0, 1 or 2 words from SP+offset. | |
674 | |
675 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), | |
676 "scrambled load targets?"); | |
677 // Load in argument order going down. | |
678 // int ld_off = (total_args_passed + comp_words_on_stack -i)*wordSize; | |
679 // base ld_off on r13 (sender_sp) as the stack alignment makes offsets from rsp | |
680 // unpredictable | |
681 int ld_off = ((total_args_passed - 1) - i)*Interpreter::stackElementSize(); | |
682 | |
683 // Point to interpreter value (vs. tag) | |
684 int next_off = ld_off - Interpreter::stackElementSize(); | |
685 // | |
686 // | |
687 // | |
688 VMReg r_1 = regs[i].first(); | |
689 VMReg r_2 = regs[i].second(); | |
690 if (!r_1->is_valid()) { | |
691 assert(!r_2->is_valid(), ""); | |
692 continue; | |
693 } | |
694 if (r_1->is_stack()) { | |
695 // Convert stack slot to an SP offset (+ wordSize to account for return address ) | |
696 int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize; | |
697 if (!r_2->is_valid()) { | |
698 // sign extend??? | |
699 __ movl(rax, Address(r13, ld_off)); | |
700 __ movq(Address(rsp, st_off), rax); | |
701 } else { | |
702 // | |
703 // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE | |
704 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case | |
705 // So we must adjust where to pick up the data to match the interpreter. | |
706 // | |
707 // Interpreter local[n] == MSW, local[n+1] == LSW however locals | |
708 // are accessed as negative so LSW is at LOW address | |
709 | |
710 // ld_off is MSW so get LSW | |
711 const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? | |
712 next_off : ld_off; | |
713 __ movq(rax, Address(r13, offset)); | |
714 // st_off is LSW (i.e. reg.first()) | |
715 __ movq(Address(rsp, st_off), rax); | |
716 } | |
717 } else if (r_1->is_Register()) { // Register argument | |
718 Register r = r_1->as_Register(); | |
719 assert(r != rax, "must be different"); | |
720 if (r_2->is_valid()) { | |
721 // | |
722 // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE | |
723 // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case | |
724 // So we must adjust where to pick up the data to match the interpreter. | |
725 | |
726 const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? | |
727 next_off : ld_off; | |
728 | |
729 // this can be a misaligned move | |
730 __ movq(r, Address(r13, offset)); | |
731 } else { | |
732 // sign extend and use a full word? | |
733 __ movl(r, Address(r13, ld_off)); | |
734 } | |
735 } else { | |
736 if (!r_2->is_valid()) { | |
737 __ movflt(r_1->as_XMMRegister(), Address(r13, ld_off)); | |
738 } else { | |
739 __ movdbl(r_1->as_XMMRegister(), Address(r13, next_off)); | |
740 } | |
741 } | |
742 } | |
743 | |
744 // 6243940 We might end up in handle_wrong_method if | |
745 // the callee is deoptimized as we race thru here. If that | |
746 // happens we don't want to take a safepoint because the | |
747 // caller frame will look interpreted and arguments are now | |
748 // "compiled" so it is much better to make this transition | |
749 // invisible to the stack walking code. Unfortunately if | |
750 // we try and find the callee by normal means a safepoint | |
751 // is possible. So we stash the desired callee in the thread | |
752 // and the vm will find there should this case occur. | |
753 | |
754 __ movq(Address(r15_thread, JavaThread::callee_target_offset()), rbx); | |
755 | |
756 // put methodOop where a c2i would expect should we end up there | |
757 // only needed becaus eof c2 resolve stubs return methodOop as a result in | |
758 // rax | |
759 __ movq(rax, rbx); | |
760 __ jmp(r11); | |
761 } | |
762 | |
763 // --------------------------------------------------------------- | |
764 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, | |
765 int total_args_passed, | |
766 int comp_args_on_stack, | |
767 const BasicType *sig_bt, | |
768 const VMRegPair *regs) { | |
769 address i2c_entry = __ pc(); | |
770 | |
771 gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); | |
772 | |
773 // ------------------------------------------------------------------------- | |
774 // Generate a C2I adapter. On entry we know rbx holds the methodOop during calls | |
775 // to the interpreter. The args start out packed in the compiled layout. They | |
776 // need to be unpacked into the interpreter layout. This will almost always | |
777 // require some stack space. We grow the current (compiled) stack, then repack | |
778 // the args. We finally end in a jump to the generic interpreter entry point. | |
779 // On exit from the interpreter, the interpreter will restore our SP (lest the | |
780 // compiled code, which relys solely on SP and not RBP, get sick). | |
781 | |
782 address c2i_unverified_entry = __ pc(); | |
783 Label skip_fixup; | |
784 Label ok; | |
785 | |
786 Register holder = rax; | |
787 Register receiver = j_rarg0; | |
788 Register temp = rbx; | |
789 | |
790 { | |
791 __ verify_oop(holder); | |
792 __ movq(temp, Address(receiver, oopDesc::klass_offset_in_bytes())); | |
793 __ verify_oop(temp); | |
794 | |
795 __ cmpq(temp, Address(holder, compiledICHolderOopDesc::holder_klass_offset())); | |
796 __ movq(rbx, Address(holder, compiledICHolderOopDesc::holder_method_offset())); | |
797 __ jcc(Assembler::equal, ok); | |
798 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); | |
799 | |
800 __ bind(ok); | |
801 // Method might have been compiled since the call site was patched to | |
802 // interpreted if that is the case treat it as a miss so we can get | |
803 // the call site corrected. | |
804 __ cmpq(Address(rbx, in_bytes(methodOopDesc::code_offset())), (int)NULL_WORD); | |
805 __ jcc(Assembler::equal, skip_fixup); | |
806 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); | |
807 } | |
808 | |
809 address c2i_entry = __ pc(); | |
810 | |
811 gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); | |
812 | |
813 __ flush(); | |
814 return new AdapterHandlerEntry(i2c_entry, c2i_entry, c2i_unverified_entry); | |
815 } | |
816 | |
817 int SharedRuntime::c_calling_convention(const BasicType *sig_bt, | |
818 VMRegPair *regs, | |
819 int total_args_passed) { | |
820 // We return the amount of VMRegImpl stack slots we need to reserve for all | |
821 // the arguments NOT counting out_preserve_stack_slots. | |
822 | |
823 // NOTE: These arrays will have to change when c1 is ported | |
824 #ifdef _WIN64 | |
825 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { | |
826 c_rarg0, c_rarg1, c_rarg2, c_rarg3 | |
827 }; | |
828 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { | |
829 c_farg0, c_farg1, c_farg2, c_farg3 | |
830 }; | |
831 #else | |
832 static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { | |
833 c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5 | |
834 }; | |
835 static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { | |
836 c_farg0, c_farg1, c_farg2, c_farg3, | |
837 c_farg4, c_farg5, c_farg6, c_farg7 | |
838 }; | |
839 #endif // _WIN64 | |
840 | |
841 | |
842 uint int_args = 0; | |
843 uint fp_args = 0; | |
844 uint stk_args = 0; // inc by 2 each time | |
845 | |
846 for (int i = 0; i < total_args_passed; i++) { | |
847 switch (sig_bt[i]) { | |
848 case T_BOOLEAN: | |
849 case T_CHAR: | |
850 case T_BYTE: | |
851 case T_SHORT: | |
852 case T_INT: | |
853 if (int_args < Argument::n_int_register_parameters_c) { | |
854 regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); | |
855 #ifdef _WIN64 | |
856 fp_args++; | |
857 // Allocate slots for callee to stuff register args the stack. | |
858 stk_args += 2; | |
859 #endif | |
860 } else { | |
861 regs[i].set1(VMRegImpl::stack2reg(stk_args)); | |
862 stk_args += 2; | |
863 } | |
864 break; | |
865 case T_LONG: | |
866 assert(sig_bt[i + 1] == T_VOID, "expecting half"); | |
867 // fall through | |
868 case T_OBJECT: | |
869 case T_ARRAY: | |
870 case T_ADDRESS: | |
871 if (int_args < Argument::n_int_register_parameters_c) { | |
872 regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); | |
873 #ifdef _WIN64 | |
874 fp_args++; | |
875 stk_args += 2; | |
876 #endif | |
877 } else { | |
878 regs[i].set2(VMRegImpl::stack2reg(stk_args)); | |
879 stk_args += 2; | |
880 } | |
881 break; | |
882 case T_FLOAT: | |
883 if (fp_args < Argument::n_float_register_parameters_c) { | |
884 regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); | |
885 #ifdef _WIN64 | |
886 int_args++; | |
887 // Allocate slots for callee to stuff register args the stack. | |
888 stk_args += 2; | |
889 #endif | |
890 } else { | |
891 regs[i].set1(VMRegImpl::stack2reg(stk_args)); | |
892 stk_args += 2; | |
893 } | |
894 break; | |
895 case T_DOUBLE: | |
896 assert(sig_bt[i + 1] == T_VOID, "expecting half"); | |
897 if (fp_args < Argument::n_float_register_parameters_c) { | |
898 regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); | |
899 #ifdef _WIN64 | |
900 int_args++; | |
901 // Allocate slots for callee to stuff register args the stack. | |
902 stk_args += 2; | |
903 #endif | |
904 } else { | |
905 regs[i].set2(VMRegImpl::stack2reg(stk_args)); | |
906 stk_args += 2; | |
907 } | |
908 break; | |
909 case T_VOID: // Halves of longs and doubles | |
910 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); | |
911 regs[i].set_bad(); | |
912 break; | |
913 default: | |
914 ShouldNotReachHere(); | |
915 break; | |
916 } | |
917 } | |
918 #ifdef _WIN64 | |
919 // windows abi requires that we always allocate enough stack space | |
920 // for 4 64bit registers to be stored down. | |
921 if (stk_args < 8) { | |
922 stk_args = 8; | |
923 } | |
924 #endif // _WIN64 | |
925 | |
926 return stk_args; | |
927 } | |
928 | |
929 // On 64 bit we will store integer like items to the stack as | |
930 // 64 bits items (sparc abi) even though java would only store | |
931 // 32bits for a parameter. On 32bit it will simply be 32 bits | |
932 // So this routine will do 32->32 on 32bit and 32->64 on 64bit | |
933 static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { | |
934 if (src.first()->is_stack()) { | |
935 if (dst.first()->is_stack()) { | |
936 // stack to stack | |
937 __ movslq(rax, Address(rbp, reg2offset_in(src.first()))); | |
938 __ movq(Address(rsp, reg2offset_out(dst.first())), rax); | |
939 } else { | |
940 // stack to reg | |
941 __ movslq(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first()))); | |
942 } | |
943 } else if (dst.first()->is_stack()) { | |
944 // reg to stack | |
945 // Do we really have to sign extend??? | |
946 // __ movslq(src.first()->as_Register(), src.first()->as_Register()); | |
947 __ movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register()); | |
948 } else { | |
949 // Do we really have to sign extend??? | |
950 // __ movslq(dst.first()->as_Register(), src.first()->as_Register()); | |
951 if (dst.first() != src.first()) { | |
952 __ movq(dst.first()->as_Register(), src.first()->as_Register()); | |
953 } | |
954 } | |
955 } | |
956 | |
957 | |
958 // An oop arg. Must pass a handle not the oop itself | |
959 static void object_move(MacroAssembler* masm, | |
960 OopMap* map, | |
961 int oop_handle_offset, | |
962 int framesize_in_slots, | |
963 VMRegPair src, | |
964 VMRegPair dst, | |
965 bool is_receiver, | |
966 int* receiver_offset) { | |
967 | |
968 // must pass a handle. First figure out the location we use as a handle | |
969 | |
970 Register rHandle = dst.first()->is_stack() ? rax : dst.first()->as_Register(); | |
971 | |
972 // See if oop is NULL if it is we need no handle | |
973 | |
974 if (src.first()->is_stack()) { | |
975 | |
976 // Oop is already on the stack as an argument | |
977 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); | |
978 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); | |
979 if (is_receiver) { | |
980 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; | |
981 } | |
982 | |
983 __ cmpq(Address(rbp, reg2offset_in(src.first())), (int)NULL_WORD); | |
984 __ leaq(rHandle, Address(rbp, reg2offset_in(src.first()))); | |
985 // conditionally move a NULL | |
986 __ cmovq(Assembler::equal, rHandle, Address(rbp, reg2offset_in(src.first()))); | |
987 } else { | |
988 | |
989 // Oop is in an a register we must store it to the space we reserve | |
990 // on the stack for oop_handles and pass a handle if oop is non-NULL | |
991 | |
992 const Register rOop = src.first()->as_Register(); | |
993 int oop_slot; | |
994 if (rOop == j_rarg0) | |
995 oop_slot = 0; | |
996 else if (rOop == j_rarg1) | |
997 oop_slot = 1; | |
998 else if (rOop == j_rarg2) | |
999 oop_slot = 2; | |
1000 else if (rOop == j_rarg3) | |
1001 oop_slot = 3; | |
1002 else if (rOop == j_rarg4) | |
1003 oop_slot = 4; | |
1004 else { | |
1005 assert(rOop == j_rarg5, "wrong register"); | |
1006 oop_slot = 5; | |
1007 } | |
1008 | |
1009 oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; | |
1010 int offset = oop_slot*VMRegImpl::stack_slot_size; | |
1011 | |
1012 map->set_oop(VMRegImpl::stack2reg(oop_slot)); | |
1013 // Store oop in handle area, may be NULL | |
1014 __ movq(Address(rsp, offset), rOop); | |
1015 if (is_receiver) { | |
1016 *receiver_offset = offset; | |
1017 } | |
1018 | |
1019 __ cmpq(rOop, (int)NULL); | |
1020 __ leaq(rHandle, Address(rsp, offset)); | |
1021 // conditionally move a NULL from the handle area where it was just stored | |
1022 __ cmovq(Assembler::equal, rHandle, Address(rsp, offset)); | |
1023 } | |
1024 | |
1025 // If arg is on the stack then place it otherwise it is already in correct reg. | |
1026 if (dst.first()->is_stack()) { | |
1027 __ movq(Address(rsp, reg2offset_out(dst.first())), rHandle); | |
1028 } | |
1029 } | |
1030 | |
1031 // A float arg may have to do float reg int reg conversion | |
1032 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { | |
1033 assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); | |
1034 | |
1035 // The calling conventions assures us that each VMregpair is either | |
1036 // all really one physical register or adjacent stack slots. | |
1037 // This greatly simplifies the cases here compared to sparc. | |
1038 | |
1039 if (src.first()->is_stack()) { | |
1040 if (dst.first()->is_stack()) { | |
1041 __ movl(rax, Address(rbp, reg2offset_in(src.first()))); | |
1042 __ movq(Address(rsp, reg2offset_out(dst.first())), rax); | |
1043 } else { | |
1044 // stack to reg | |
1045 assert(dst.first()->is_XMMRegister(), "only expect xmm registers as parameters"); | |
1046 __ movflt(dst.first()->as_XMMRegister(), Address(rbp, reg2offset_in(src.first()))); | |
1047 } | |
1048 } else if (dst.first()->is_stack()) { | |
1049 // reg to stack | |
1050 assert(src.first()->is_XMMRegister(), "only expect xmm registers as parameters"); | |
1051 __ movflt(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister()); | |
1052 } else { | |
1053 // reg to reg | |
1054 // In theory these overlap but the ordering is such that this is likely a nop | |
1055 if ( src.first() != dst.first()) { | |
1056 __ movdbl(dst.first()->as_XMMRegister(), src.first()->as_XMMRegister()); | |
1057 } | |
1058 } | |
1059 } | |
1060 | |
1061 // A long move | |
1062 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { | |
1063 | |
1064 // The calling conventions assures us that each VMregpair is either | |
1065 // all really one physical register or adjacent stack slots. | |
1066 // This greatly simplifies the cases here compared to sparc. | |
1067 | |
1068 if (src.is_single_phys_reg() ) { | |
1069 if (dst.is_single_phys_reg()) { | |
1070 if (dst.first() != src.first()) { | |
1071 __ movq(dst.first()->as_Register(), src.first()->as_Register()); | |
1072 } | |
1073 } else { | |
1074 assert(dst.is_single_reg(), "not a stack pair"); | |
1075 __ movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register()); | |
1076 } | |
1077 } else if (dst.is_single_phys_reg()) { | |
1078 assert(src.is_single_reg(), "not a stack pair"); | |
1079 __ movq(dst.first()->as_Register(), Address(rbp, reg2offset_out(src.first()))); | |
1080 } else { | |
1081 assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs"); | |
1082 __ movq(rax, Address(rbp, reg2offset_in(src.first()))); | |
1083 __ movq(Address(rsp, reg2offset_out(dst.first())), rax); | |
1084 } | |
1085 } | |
1086 | |
1087 // A double move | |
1088 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { | |
1089 | |
1090 // The calling conventions assures us that each VMregpair is either | |
1091 // all really one physical register or adjacent stack slots. | |
1092 // This greatly simplifies the cases here compared to sparc. | |
1093 | |
1094 if (src.is_single_phys_reg() ) { | |
1095 if (dst.is_single_phys_reg()) { | |
1096 // In theory these overlap but the ordering is such that this is likely a nop | |
1097 if ( src.first() != dst.first()) { | |
1098 __ movdbl(dst.first()->as_XMMRegister(), src.first()->as_XMMRegister()); | |
1099 } | |
1100 } else { | |
1101 assert(dst.is_single_reg(), "not a stack pair"); | |
1102 __ movdbl(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister()); | |
1103 } | |
1104 } else if (dst.is_single_phys_reg()) { | |
1105 assert(src.is_single_reg(), "not a stack pair"); | |
1106 __ movdbl(dst.first()->as_XMMRegister(), Address(rbp, reg2offset_out(src.first()))); | |
1107 } else { | |
1108 assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs"); | |
1109 __ movq(rax, Address(rbp, reg2offset_in(src.first()))); | |
1110 __ movq(Address(rsp, reg2offset_out(dst.first())), rax); | |
1111 } | |
1112 } | |
1113 | |
1114 | |
1115 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { | |
1116 // We always ignore the frame_slots arg and just use the space just below frame pointer | |
1117 // which by this time is free to use | |
1118 switch (ret_type) { | |
1119 case T_FLOAT: | |
1120 __ movflt(Address(rbp, -wordSize), xmm0); | |
1121 break; | |
1122 case T_DOUBLE: | |
1123 __ movdbl(Address(rbp, -wordSize), xmm0); | |
1124 break; | |
1125 case T_VOID: break; | |
1126 default: { | |
1127 __ movq(Address(rbp, -wordSize), rax); | |
1128 } | |
1129 } | |
1130 } | |
1131 | |
1132 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { | |
1133 // We always ignore the frame_slots arg and just use the space just below frame pointer | |
1134 // which by this time is free to use | |
1135 switch (ret_type) { | |
1136 case T_FLOAT: | |
1137 __ movflt(xmm0, Address(rbp, -wordSize)); | |
1138 break; | |
1139 case T_DOUBLE: | |
1140 __ movdbl(xmm0, Address(rbp, -wordSize)); | |
1141 break; | |
1142 case T_VOID: break; | |
1143 default: { | |
1144 __ movq(rax, Address(rbp, -wordSize)); | |
1145 } | |
1146 } | |
1147 } | |
1148 | |
1149 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { | |
1150 for ( int i = first_arg ; i < arg_count ; i++ ) { | |
1151 if (args[i].first()->is_Register()) { | |
1152 __ pushq(args[i].first()->as_Register()); | |
1153 } else if (args[i].first()->is_XMMRegister()) { | |
1154 __ subq(rsp, 2*wordSize); | |
1155 __ movdbl(Address(rsp, 0), args[i].first()->as_XMMRegister()); | |
1156 } | |
1157 } | |
1158 } | |
1159 | |
1160 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { | |
1161 for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { | |
1162 if (args[i].first()->is_Register()) { | |
1163 __ popq(args[i].first()->as_Register()); | |
1164 } else if (args[i].first()->is_XMMRegister()) { | |
1165 __ movdbl(args[i].first()->as_XMMRegister(), Address(rsp, 0)); | |
1166 __ addq(rsp, 2*wordSize); | |
1167 } | |
1168 } | |
1169 } | |
1170 | |
1171 // --------------------------------------------------------------------------- | |
1172 // Generate a native wrapper for a given method. The method takes arguments | |
1173 // in the Java compiled code convention, marshals them to the native | |
1174 // convention (handlizes oops, etc), transitions to native, makes the call, | |
1175 // returns to java state (possibly blocking), unhandlizes any result and | |
1176 // returns. | |
1177 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, | |
1178 methodHandle method, | |
1179 int total_in_args, | |
1180 int comp_args_on_stack, | |
1181 BasicType *in_sig_bt, | |
1182 VMRegPair *in_regs, | |
1183 BasicType ret_type) { | |
1184 // Native nmethod wrappers never take possesion of the oop arguments. | |
1185 // So the caller will gc the arguments. The only thing we need an | |
1186 // oopMap for is if the call is static | |
1187 // | |
1188 // An OopMap for lock (and class if static) | |
1189 OopMapSet *oop_maps = new OopMapSet(); | |
1190 intptr_t start = (intptr_t)__ pc(); | |
1191 | |
1192 // We have received a description of where all the java arg are located | |
1193 // on entry to the wrapper. We need to convert these args to where | |
1194 // the jni function will expect them. To figure out where they go | |
1195 // we convert the java signature to a C signature by inserting | |
1196 // the hidden arguments as arg[0] and possibly arg[1] (static method) | |
1197 | |
1198 int total_c_args = total_in_args + 1; | |
1199 if (method->is_static()) { | |
1200 total_c_args++; | |
1201 } | |
1202 | |
1203 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); | |
1204 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); | |
1205 | |
1206 int argc = 0; | |
1207 out_sig_bt[argc++] = T_ADDRESS; | |
1208 if (method->is_static()) { | |
1209 out_sig_bt[argc++] = T_OBJECT; | |
1210 } | |
1211 | |
1212 for (int i = 0; i < total_in_args ; i++ ) { | |
1213 out_sig_bt[argc++] = in_sig_bt[i]; | |
1214 } | |
1215 | |
1216 // Now figure out where the args must be stored and how much stack space | |
1217 // they require. | |
1218 // | |
1219 int out_arg_slots; | |
1220 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); | |
1221 | |
1222 // Compute framesize for the wrapper. We need to handlize all oops in | |
1223 // incoming registers | |
1224 | |
1225 // Calculate the total number of stack slots we will need. | |
1226 | |
1227 // First count the abi requirement plus all of the outgoing args | |
1228 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; | |
1229 | |
1230 // Now the space for the inbound oop handle area | |
1231 | |
1232 int oop_handle_offset = stack_slots; | |
1233 stack_slots += 6*VMRegImpl::slots_per_word; | |
1234 | |
1235 // Now any space we need for handlizing a klass if static method | |
1236 | |
1237 int oop_temp_slot_offset = 0; | |
1238 int klass_slot_offset = 0; | |
1239 int klass_offset = -1; | |
1240 int lock_slot_offset = 0; | |
1241 bool is_static = false; | |
1242 | |
1243 if (method->is_static()) { | |
1244 klass_slot_offset = stack_slots; | |
1245 stack_slots += VMRegImpl::slots_per_word; | |
1246 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; | |
1247 is_static = true; | |
1248 } | |
1249 | |
1250 // Plus a lock if needed | |
1251 | |
1252 if (method->is_synchronized()) { | |
1253 lock_slot_offset = stack_slots; | |
1254 stack_slots += VMRegImpl::slots_per_word; | |
1255 } | |
1256 | |
1257 // Now a place (+2) to save return values or temp during shuffling | |
1258 // + 4 for return address (which we own) and saved rbp | |
1259 stack_slots += 6; | |
1260 | |
1261 // Ok The space we have allocated will look like: | |
1262 // | |
1263 // | |
1264 // FP-> | | | |
1265 // |---------------------| | |
1266 // | 2 slots for moves | | |
1267 // |---------------------| | |
1268 // | lock box (if sync) | | |
1269 // |---------------------| <- lock_slot_offset | |
1270 // | klass (if static) | | |
1271 // |---------------------| <- klass_slot_offset | |
1272 // | oopHandle area | | |
1273 // |---------------------| <- oop_handle_offset (6 java arg registers) | |
1274 // | outbound memory | | |
1275 // | based arguments | | |
1276 // | | | |
1277 // |---------------------| | |
1278 // | | | |
1279 // SP-> | out_preserved_slots | | |
1280 // | |
1281 // | |
1282 | |
1283 | |
1284 // Now compute actual number of stack words we need rounding to make | |
1285 // stack properly aligned. | |
1286 stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word); | |
1287 | |
1288 int stack_size = stack_slots * VMRegImpl::stack_slot_size; | |
1289 | |
1290 | |
1291 // First thing make an ic check to see if we should even be here | |
1292 | |
1293 // We are free to use all registers as temps without saving them and | |
1294 // restoring them except rbp. rbp is the only callee save register | |
1295 // as far as the interpreter and the compiler(s) are concerned. | |
1296 | |
1297 | |
1298 const Register ic_reg = rax; | |
1299 const Register receiver = j_rarg0; | |
1300 | |
1301 Label ok; | |
1302 Label exception_pending; | |
1303 | |
1304 __ verify_oop(receiver); | |
1305 __ cmpq(ic_reg, Address(receiver, oopDesc::klass_offset_in_bytes())); | |
1306 __ jcc(Assembler::equal, ok); | |
1307 | |
1308 __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); | |
1309 | |
1310 // Verified entry point must be aligned | |
1311 __ align(8); | |
1312 | |
1313 __ bind(ok); | |
1314 | |
1315 int vep_offset = ((intptr_t)__ pc()) - start; | |
1316 | |
1317 // The instruction at the verified entry point must be 5 bytes or longer | |
1318 // because it can be patched on the fly by make_non_entrant. The stack bang | |
1319 // instruction fits that requirement. | |
1320 | |
1321 // Generate stack overflow check | |
1322 | |
1323 if (UseStackBanging) { | |
1324 __ bang_stack_with_offset(StackShadowPages*os::vm_page_size()); | |
1325 } else { | |
1326 // need a 5 byte instruction to allow MT safe patching to non-entrant | |
1327 __ fat_nop(); | |
1328 } | |
1329 | |
1330 // Generate a new frame for the wrapper. | |
1331 __ enter(); | |
1332 // -2 because return address is already present and so is saved rbp | |
1333 __ subq(rsp, stack_size - 2*wordSize); | |
1334 | |
1335 // Frame is now completed as far as size and linkage. | |
1336 | |
1337 int frame_complete = ((intptr_t)__ pc()) - start; | |
1338 | |
1339 #ifdef ASSERT | |
1340 { | |
1341 Label L; | |
1342 __ movq(rax, rsp); | |
1343 __ andq(rax, -16); // must be 16 byte boundry (see amd64 ABI) | |
1344 __ cmpq(rax, rsp); | |
1345 __ jcc(Assembler::equal, L); | |
1346 __ stop("improperly aligned stack"); | |
1347 __ bind(L); | |
1348 } | |
1349 #endif /* ASSERT */ | |
1350 | |
1351 | |
1352 // We use r14 as the oop handle for the receiver/klass | |
1353 // It is callee save so it survives the call to native | |
1354 | |
1355 const Register oop_handle_reg = r14; | |
1356 | |
1357 | |
1358 | |
1359 // | |
1360 // We immediately shuffle the arguments so that any vm call we have to | |
1361 // make from here on out (sync slow path, jvmti, etc.) we will have | |
1362 // captured the oops from our caller and have a valid oopMap for | |
1363 // them. | |
1364 | |
1365 // ----------------- | |
1366 // The Grand Shuffle | |
1367 | |
1368 // The Java calling convention is either equal (linux) or denser (win64) than the | |
1369 // c calling convention. However the because of the jni_env argument the c calling | |
1370 // convention always has at least one more (and two for static) arguments than Java. | |
1371 // Therefore if we move the args from java -> c backwards then we will never have | |
1372 // a register->register conflict and we don't have to build a dependency graph | |
1373 // and figure out how to break any cycles. | |
1374 // | |
1375 | |
1376 // Record esp-based slot for receiver on stack for non-static methods | |
1377 int receiver_offset = -1; | |
1378 | |
1379 // This is a trick. We double the stack slots so we can claim | |
1380 // the oops in the caller's frame. Since we are sure to have | |
1381 // more args than the caller doubling is enough to make | |
1382 // sure we can capture all the incoming oop args from the | |
1383 // caller. | |
1384 // | |
1385 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); | |
1386 | |
1387 // Mark location of rbp (someday) | |
1388 // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp)); | |
1389 | |
1390 // Use eax, ebx as temporaries during any memory-memory moves we have to do | |
1391 // All inbound args are referenced based on rbp and all outbound args via rsp. | |
1392 | |
1393 | |
1394 #ifdef ASSERT | |
1395 bool reg_destroyed[RegisterImpl::number_of_registers]; | |
1396 bool freg_destroyed[XMMRegisterImpl::number_of_registers]; | |
1397 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { | |
1398 reg_destroyed[r] = false; | |
1399 } | |
1400 for ( int f = 0 ; f < XMMRegisterImpl::number_of_registers ; f++ ) { | |
1401 freg_destroyed[f] = false; | |
1402 } | |
1403 | |
1404 #endif /* ASSERT */ | |
1405 | |
1406 | |
1407 int c_arg = total_c_args - 1; | |
1408 for ( int i = total_in_args - 1; i >= 0 ; i--, c_arg-- ) { | |
1409 #ifdef ASSERT | |
1410 if (in_regs[i].first()->is_Register()) { | |
1411 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); | |
1412 } else if (in_regs[i].first()->is_XMMRegister()) { | |
1413 assert(!freg_destroyed[in_regs[i].first()->as_XMMRegister()->encoding()], "destroyed reg!"); | |
1414 } | |
1415 if (out_regs[c_arg].first()->is_Register()) { | |
1416 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; | |
1417 } else if (out_regs[c_arg].first()->is_XMMRegister()) { | |
1418 freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true; | |
1419 } | |
1420 #endif /* ASSERT */ | |
1421 switch (in_sig_bt[i]) { | |
1422 case T_ARRAY: | |
1423 case T_OBJECT: | |
1424 object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], | |
1425 ((i == 0) && (!is_static)), | |
1426 &receiver_offset); | |
1427 break; | |
1428 case T_VOID: | |
1429 break; | |
1430 | |
1431 case T_FLOAT: | |
1432 float_move(masm, in_regs[i], out_regs[c_arg]); | |
1433 break; | |
1434 | |
1435 case T_DOUBLE: | |
1436 assert( i + 1 < total_in_args && | |
1437 in_sig_bt[i + 1] == T_VOID && | |
1438 out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); | |
1439 double_move(masm, in_regs[i], out_regs[c_arg]); | |
1440 break; | |
1441 | |
1442 case T_LONG : | |
1443 long_move(masm, in_regs[i], out_regs[c_arg]); | |
1444 break; | |
1445 | |
1446 case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); | |
1447 | |
1448 default: | |
1449 move32_64(masm, in_regs[i], out_regs[c_arg]); | |
1450 } | |
1451 } | |
1452 | |
1453 // point c_arg at the first arg that is already loaded in case we | |
1454 // need to spill before we call out | |
1455 c_arg++; | |
1456 | |
1457 // Pre-load a static method's oop into r14. Used both by locking code and | |
1458 // the normal JNI call code. | |
1459 if (method->is_static()) { | |
1460 | |
1461 // load oop into a register | |
1462 __ movoop(oop_handle_reg, JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror())); | |
1463 | |
1464 // Now handlize the static class mirror it's known not-null. | |
1465 __ movq(Address(rsp, klass_offset), oop_handle_reg); | |
1466 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); | |
1467 | |
1468 // Now get the handle | |
1469 __ leaq(oop_handle_reg, Address(rsp, klass_offset)); | |
1470 // store the klass handle as second argument | |
1471 __ movq(c_rarg1, oop_handle_reg); | |
1472 // and protect the arg if we must spill | |
1473 c_arg--; | |
1474 } | |
1475 | |
1476 // Change state to native (we save the return address in the thread, since it might not | |
1477 // be pushed on the stack when we do a a stack traversal). It is enough that the pc() | |
1478 // points into the right code segment. It does not have to be the correct return pc. | |
1479 // We use the same pc/oopMap repeatedly when we call out | |
1480 | |
1481 intptr_t the_pc = (intptr_t) __ pc(); | |
1482 oop_maps->add_gc_map(the_pc - start, map); | |
1483 | |
1484 __ set_last_Java_frame(rsp, noreg, (address)the_pc); | |
1485 | |
1486 | |
1487 // We have all of the arguments setup at this point. We must not touch any register | |
1488 // argument registers at this point (what if we save/restore them there are no oop? | |
1489 | |
1490 { | |
1491 SkipIfEqual skip(masm, &DTraceMethodProbes, false); | |
1492 // protect the args we've loaded | |
1493 save_args(masm, total_c_args, c_arg, out_regs); | |
1494 __ movoop(c_rarg1, JNIHandles::make_local(method())); | |
1495 __ call_VM_leaf( | |
1496 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), | |
1497 r15_thread, c_rarg1); | |
1498 restore_args(masm, total_c_args, c_arg, out_regs); | |
1499 } | |
1500 | |
1501 // Lock a synchronized method | |
1502 | |
1503 // Register definitions used by locking and unlocking | |
1504 | |
1505 const Register swap_reg = rax; // Must use rax for cmpxchg instruction | |
1506 const Register obj_reg = rbx; // Will contain the oop | |
1507 const Register lock_reg = r13; // Address of compiler lock object (BasicLock) | |
1508 const Register old_hdr = r13; // value of old header at unlock time | |
1509 | |
1510 Label slow_path_lock; | |
1511 Label lock_done; | |
1512 | |
1513 if (method->is_synchronized()) { | |
1514 | |
1515 | |
1516 const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); | |
1517 | |
1518 // Get the handle (the 2nd argument) | |
1519 __ movq(oop_handle_reg, c_rarg1); | |
1520 | |
1521 // Get address of the box | |
1522 | |
1523 __ leaq(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); | |
1524 | |
1525 // Load the oop from the handle | |
1526 __ movq(obj_reg, Address(oop_handle_reg, 0)); | |
1527 | |
1528 if (UseBiasedLocking) { | |
1529 __ biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch1, false, lock_done, &slow_path_lock); | |
1530 } | |
1531 | |
1532 // Load immediate 1 into swap_reg %rax | |
1533 __ movl(swap_reg, 1); | |
1534 | |
1535 // Load (object->mark() | 1) into swap_reg %rax | |
1536 __ orq(swap_reg, Address(obj_reg, 0)); | |
1537 | |
1538 // Save (object->mark() | 1) into BasicLock's displaced header | |
1539 __ movq(Address(lock_reg, mark_word_offset), swap_reg); | |
1540 | |
1541 if (os::is_MP()) { | |
1542 __ lock(); | |
1543 } | |
1544 | |
1545 // src -> dest iff dest == rax else rax <- dest | |
1546 __ cmpxchgq(lock_reg, Address(obj_reg, 0)); | |
1547 __ jcc(Assembler::equal, lock_done); | |
1548 | |
1549 // Hmm should this move to the slow path code area??? | |
1550 | |
1551 // Test if the oopMark is an obvious stack pointer, i.e., | |
1552 // 1) (mark & 3) == 0, and | |
1553 // 2) rsp <= mark < mark + os::pagesize() | |
1554 // These 3 tests can be done by evaluating the following | |
1555 // expression: ((mark - rsp) & (3 - os::vm_page_size())), | |
1556 // assuming both stack pointer and pagesize have their | |
1557 // least significant 2 bits clear. | |
1558 // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg | |
1559 | |
1560 __ subq(swap_reg, rsp); | |
1561 __ andq(swap_reg, 3 - os::vm_page_size()); | |
1562 | |
1563 // Save the test result, for recursive case, the result is zero | |
1564 __ movq(Address(lock_reg, mark_word_offset), swap_reg); | |
1565 __ jcc(Assembler::notEqual, slow_path_lock); | |
1566 | |
1567 // Slow path will re-enter here | |
1568 | |
1569 __ bind(lock_done); | |
1570 } | |
1571 | |
1572 | |
1573 // Finally just about ready to make the JNI call | |
1574 | |
1575 | |
1576 // get JNIEnv* which is first argument to native | |
1577 | |
1578 __ leaq(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset()))); | |
1579 | |
1580 // Now set thread in native | |
1581 __ mov64(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native); | |
1582 | |
1583 __ call(RuntimeAddress(method->native_function())); | |
1584 | |
1585 // Either restore the MXCSR register after returning from the JNI Call | |
1586 // or verify that it wasn't changed. | |
1587 if (RestoreMXCSROnJNICalls) { | |
1588 __ ldmxcsr(ExternalAddress(StubRoutines::amd64::mxcsr_std())); | |
1589 | |
1590 } | |
1591 else if (CheckJNICalls ) { | |
1592 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::amd64::verify_mxcsr_entry()))); | |
1593 } | |
1594 | |
1595 | |
1596 // Unpack native results. | |
1597 switch (ret_type) { | |
1598 case T_BOOLEAN: __ c2bool(rax); break; | |
1599 case T_CHAR : __ movzwl(rax, rax); break; | |
1600 case T_BYTE : __ sign_extend_byte (rax); break; | |
1601 case T_SHORT : __ sign_extend_short(rax); break; | |
1602 case T_INT : /* nothing to do */ break; | |
1603 case T_DOUBLE : | |
1604 case T_FLOAT : | |
1605 // Result is in xmm0 we'll save as needed | |
1606 break; | |
1607 case T_ARRAY: // Really a handle | |
1608 case T_OBJECT: // Really a handle | |
1609 break; // can't de-handlize until after safepoint check | |
1610 case T_VOID: break; | |
1611 case T_LONG: break; | |
1612 default : ShouldNotReachHere(); | |
1613 } | |
1614 | |
1615 // Switch thread to "native transition" state before reading the synchronization state. | |
1616 // This additional state is necessary because reading and testing the synchronization | |
1617 // state is not atomic w.r.t. GC, as this scenario demonstrates: | |
1618 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. | |
1619 // VM thread changes sync state to synchronizing and suspends threads for GC. | |
1620 // Thread A is resumed to finish this native method, but doesn't block here since it | |
1621 // didn't see any synchronization is progress, and escapes. | |
1622 __ mov64(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans); | |
1623 | |
1624 if(os::is_MP()) { | |
1625 if (UseMembar) { | |
1626 // Force this write out before the read below | |
1627 __ membar(Assembler::Membar_mask_bits( | |
1628 Assembler::LoadLoad | Assembler::LoadStore | | |
1629 Assembler::StoreLoad | Assembler::StoreStore)); | |
1630 } else { | |
1631 // Write serialization page so VM thread can do a pseudo remote membar. | |
1632 // We use the current thread pointer to calculate a thread specific | |
1633 // offset to write to within the page. This minimizes bus traffic | |
1634 // due to cache line collision. | |
1635 __ serialize_memory(r15_thread, rcx); | |
1636 } | |
1637 } | |
1638 | |
1639 | |
1640 // check for safepoint operation in progress and/or pending suspend requests | |
1641 { | |
1642 Label Continue; | |
1643 | |
1644 __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()), | |
1645 SafepointSynchronize::_not_synchronized); | |
1646 | |
1647 Label L; | |
1648 __ jcc(Assembler::notEqual, L); | |
1649 __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0); | |
1650 __ jcc(Assembler::equal, Continue); | |
1651 __ bind(L); | |
1652 | |
1653 // Don't use call_VM as it will see a possible pending exception and forward it | |
1654 // and never return here preventing us from clearing _last_native_pc down below. | |
1655 // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are | |
1656 // preserved and correspond to the bcp/locals pointers. So we do a runtime call | |
1657 // by hand. | |
1658 // | |
1659 save_native_result(masm, ret_type, stack_slots); | |
1660 __ movq(c_rarg0, r15_thread); | |
1661 __ movq(r12, rsp); // remember sp | |
1662 __ subq(rsp, frame::arg_reg_save_area_bytes); // windows | |
1663 __ andq(rsp, -16); // align stack as required by ABI | |
1664 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); | |
1665 __ movq(rsp, r12); // restore sp | |
1666 // Restore any method result value | |
1667 restore_native_result(masm, ret_type, stack_slots); | |
1668 __ bind(Continue); | |
1669 } | |
1670 | |
1671 // change thread state | |
1672 __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java); | |
1673 | |
1674 Label reguard; | |
1675 Label reguard_done; | |
1676 __ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), JavaThread::stack_guard_yellow_disabled); | |
1677 __ jcc(Assembler::equal, reguard); | |
1678 __ bind(reguard_done); | |
1679 | |
1680 // native result if any is live | |
1681 | |
1682 // Unlock | |
1683 Label unlock_done; | |
1684 Label slow_path_unlock; | |
1685 if (method->is_synchronized()) { | |
1686 | |
1687 // Get locked oop from the handle we passed to jni | |
1688 __ movq(obj_reg, Address(oop_handle_reg, 0)); | |
1689 | |
1690 Label done; | |
1691 | |
1692 if (UseBiasedLocking) { | |
1693 __ biased_locking_exit(obj_reg, old_hdr, done); | |
1694 } | |
1695 | |
1696 // Simple recursive lock? | |
1697 | |
1698 __ cmpq(Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size), (int)NULL_WORD); | |
1699 __ jcc(Assembler::equal, done); | |
1700 | |
1701 // Must save rax if if it is live now because cmpxchg must use it | |
1702 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { | |
1703 save_native_result(masm, ret_type, stack_slots); | |
1704 } | |
1705 | |
1706 | |
1707 // get address of the stack lock | |
1708 __ leaq(rax, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); | |
1709 // get old displaced header | |
1710 __ movq(old_hdr, Address(rax, 0)); | |
1711 | |
1712 // Atomic swap old header if oop still contains the stack lock | |
1713 if (os::is_MP()) { | |
1714 __ lock(); | |
1715 } | |
1716 __ cmpxchgq(old_hdr, Address(obj_reg, 0)); | |
1717 __ jcc(Assembler::notEqual, slow_path_unlock); | |
1718 | |
1719 // slow path re-enters here | |
1720 __ bind(unlock_done); | |
1721 if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { | |
1722 restore_native_result(masm, ret_type, stack_slots); | |
1723 } | |
1724 | |
1725 __ bind(done); | |
1726 | |
1727 } | |
1728 | |
1729 { | |
1730 SkipIfEqual skip(masm, &DTraceMethodProbes, false); | |
1731 save_native_result(masm, ret_type, stack_slots); | |
1732 __ movoop(c_rarg1, JNIHandles::make_local(method())); | |
1733 __ call_VM_leaf( | |
1734 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), | |
1735 r15_thread, c_rarg1); | |
1736 restore_native_result(masm, ret_type, stack_slots); | |
1737 } | |
1738 | |
1739 __ reset_last_Java_frame(false, true); | |
1740 | |
1741 // Unpack oop result | |
1742 if (ret_type == T_OBJECT || ret_type == T_ARRAY) { | |
1743 Label L; | |
1744 __ testq(rax, rax); | |
1745 __ jcc(Assembler::zero, L); | |
1746 __ movq(rax, Address(rax, 0)); | |
1747 __ bind(L); | |
1748 __ verify_oop(rax); | |
1749 } | |
1750 | |
1751 // reset handle block | |
1752 __ movq(rcx, Address(r15_thread, JavaThread::active_handles_offset())); | |
1753 __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int)NULL_WORD); | |
1754 | |
1755 // pop our frame | |
1756 | |
1757 __ leave(); | |
1758 | |
1759 // Any exception pending? | |
1760 __ cmpq(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD); | |
1761 __ jcc(Assembler::notEqual, exception_pending); | |
1762 | |
1763 // Return | |
1764 | |
1765 __ ret(0); | |
1766 | |
1767 // Unexpected paths are out of line and go here | |
1768 | |
1769 // forward the exception | |
1770 __ bind(exception_pending); | |
1771 | |
1772 // and forward the exception | |
1773 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); | |
1774 | |
1775 | |
1776 // Slow path locking & unlocking | |
1777 if (method->is_synchronized()) { | |
1778 | |
1779 // BEGIN Slow path lock | |
1780 __ bind(slow_path_lock); | |
1781 | |
1782 // has last_Java_frame setup. No exceptions so do vanilla call not call_VM | |
1783 // args are (oop obj, BasicLock* lock, JavaThread* thread) | |
1784 | |
1785 // protect the args we've loaded | |
1786 save_args(masm, total_c_args, c_arg, out_regs); | |
1787 | |
1788 __ movq(c_rarg0, obj_reg); | |
1789 __ movq(c_rarg1, lock_reg); | |
1790 __ movq(c_rarg2, r15_thread); | |
1791 | |
1792 // Not a leaf but we have last_Java_frame setup as we want | |
1793 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); | |
1794 restore_args(masm, total_c_args, c_arg, out_regs); | |
1795 | |
1796 #ifdef ASSERT | |
1797 { Label L; | |
1798 __ cmpq(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD); | |
1799 __ jcc(Assembler::equal, L); | |
1800 __ stop("no pending exception allowed on exit from monitorenter"); | |
1801 __ bind(L); | |
1802 } | |
1803 #endif | |
1804 __ jmp(lock_done); | |
1805 | |
1806 // END Slow path lock | |
1807 | |
1808 // BEGIN Slow path unlock | |
1809 __ bind(slow_path_unlock); | |
1810 | |
1811 // If we haven't already saved the native result we must save it now as xmm registers | |
1812 // are still exposed. | |
1813 | |
1814 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { | |
1815 save_native_result(masm, ret_type, stack_slots); | |
1816 } | |
1817 | |
1818 __ leaq(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); | |
1819 | |
1820 __ movq(c_rarg0, obj_reg); | |
1821 __ movq(r12, rsp); // remember sp | |
1822 __ subq(rsp, frame::arg_reg_save_area_bytes); // windows | |
1823 __ andq(rsp, -16); // align stack as required by ABI | |
1824 | |
1825 // Save pending exception around call to VM (which contains an EXCEPTION_MARK) | |
1826 // NOTE that obj_reg == rbx currently | |
1827 __ movq(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset()))); | |
1828 __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD); | |
1829 | |
1830 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C))); | |
1831 __ movq(rsp, r12); // restore sp | |
1832 #ifdef ASSERT | |
1833 { | |
1834 Label L; | |
1835 __ cmpq(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD); | |
1836 __ jcc(Assembler::equal, L); | |
1837 __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); | |
1838 __ bind(L); | |
1839 } | |
1840 #endif /* ASSERT */ | |
1841 | |
1842 __ movq(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), rbx); | |
1843 | |
1844 if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { | |
1845 restore_native_result(masm, ret_type, stack_slots); | |
1846 } | |
1847 __ jmp(unlock_done); | |
1848 | |
1849 // END Slow path unlock | |
1850 | |
1851 } // synchronized | |
1852 | |
1853 // SLOW PATH Reguard the stack if needed | |
1854 | |
1855 __ bind(reguard); | |
1856 save_native_result(masm, ret_type, stack_slots); | |
1857 __ movq(r12, rsp); // remember sp | |
1858 __ subq(rsp, frame::arg_reg_save_area_bytes); // windows | |
1859 __ andq(rsp, -16); // align stack as required by ABI | |
1860 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages))); | |
1861 __ movq(rsp, r12); // restore sp | |
1862 restore_native_result(masm, ret_type, stack_slots); | |
1863 // and continue | |
1864 __ jmp(reguard_done); | |
1865 | |
1866 | |
1867 | |
1868 __ flush(); | |
1869 | |
1870 nmethod *nm = nmethod::new_native_nmethod(method, | |
1871 masm->code(), | |
1872 vep_offset, | |
1873 frame_complete, | |
1874 stack_slots / VMRegImpl::slots_per_word, | |
1875 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), | |
1876 in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), | |
1877 oop_maps); | |
1878 return nm; | |
1879 | |
1880 } | |
1881 | |
1882 // this function returns the adjust size (in number of words) to a c2i adapter | |
1883 // activation for use during deoptimization | |
1884 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) { | |
1885 return (callee_locals - callee_parameters) * Interpreter::stackElementWords(); | |
1886 } | |
1887 | |
1888 | |
1889 uint SharedRuntime::out_preserve_stack_slots() { | |
1890 return 0; | |
1891 } | |
1892 | |
1893 | |
1894 //------------------------------generate_deopt_blob---------------------------- | |
1895 void SharedRuntime::generate_deopt_blob() { | |
1896 // Allocate space for the code | |
1897 ResourceMark rm; | |
1898 // Setup code generation tools | |
1899 CodeBuffer buffer("deopt_blob", 2048, 1024); | |
1900 MacroAssembler* masm = new MacroAssembler(&buffer); | |
1901 int frame_size_in_words; | |
1902 OopMap* map = NULL; | |
1903 OopMapSet *oop_maps = new OopMapSet(); | |
1904 | |
1905 // ------------- | |
1906 // This code enters when returning to a de-optimized nmethod. A return | |
1907 // address has been pushed on the the stack, and return values are in | |
1908 // registers. | |
1909 // If we are doing a normal deopt then we were called from the patched | |
1910 // nmethod from the point we returned to the nmethod. So the return | |
1911 // address on the stack is wrong by NativeCall::instruction_size | |
1912 // We will adjust the value so it looks like we have the original return | |
1913 // address on the stack (like when we eagerly deoptimized). | |
1914 // In the case of an exception pending when deoptimizing, we enter | |
1915 // with a return address on the stack that points after the call we patched | |
1916 // into the exception handler. We have the following register state from, | |
1917 // e.g., the forward exception stub (see stubGenerator_x86_64.cpp). | |
1918 // rax: exception oop | |
1919 // rbx: exception handler | |
1920 // rdx: throwing pc | |
1921 // So in this case we simply jam rdx into the useless return address and | |
1922 // the stack looks just like we want. | |
1923 // | |
1924 // At this point we need to de-opt. We save the argument return | |
1925 // registers. We call the first C routine, fetch_unroll_info(). This | |
1926 // routine captures the return values and returns a structure which | |
1927 // describes the current frame size and the sizes of all replacement frames. | |
1928 // The current frame is compiled code and may contain many inlined | |
1929 // functions, each with their own JVM state. We pop the current frame, then | |
1930 // push all the new frames. Then we call the C routine unpack_frames() to | |
1931 // populate these frames. Finally unpack_frames() returns us the new target | |
1932 // address. Notice that callee-save registers are BLOWN here; they have | |
1933 // already been captured in the vframeArray at the time the return PC was | |
1934 // patched. | |
1935 address start = __ pc(); | |
1936 Label cont; | |
1937 | |
1938 // Prolog for non exception case! | |
1939 | |
1940 // Save everything in sight. | |
1941 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); | |
1942 | |
1943 // Normal deoptimization. Save exec mode for unpack_frames. | |
1944 __ movl(r12, Deoptimization::Unpack_deopt); // callee-saved | |
1945 __ jmp(cont); | |
1946 | |
1947 int exception_offset = __ pc() - start; | |
1948 | |
1949 // Prolog for exception case | |
1950 | |
1951 // Push throwing pc as return address | |
1952 __ pushq(rdx); | |
1953 | |
1954 // Save everything in sight. | |
1955 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); | |
1956 | |
1957 // Deopt during an exception. Save exec mode for unpack_frames. | |
1958 __ movl(r12, Deoptimization::Unpack_exception); // callee-saved | |
1959 | |
1960 __ bind(cont); | |
1961 | |
1962 // Call C code. Need thread and this frame, but NOT official VM entry | |
1963 // crud. We cannot block on this call, no GC can happen. | |
1964 // | |
1965 // UnrollBlock* fetch_unroll_info(JavaThread* thread) | |
1966 | |
1967 // fetch_unroll_info needs to call last_java_frame(). | |
1968 | |
1969 __ set_last_Java_frame(noreg, noreg, NULL); | |
1970 #ifdef ASSERT | |
1971 { Label L; | |
1972 __ cmpq(Address(r15_thread, | |
1973 JavaThread::last_Java_fp_offset()), | |
1974 0); | |
1975 __ jcc(Assembler::equal, L); | |
1976 __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); | |
1977 __ bind(L); | |
1978 } | |
1979 #endif // ASSERT | |
1980 __ movq(c_rarg0, r15_thread); | |
1981 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info))); | |
1982 | |
1983 // Need to have an oopmap that tells fetch_unroll_info where to | |
1984 // find any register it might need. | |
1985 oop_maps->add_gc_map(__ pc() - start, map); | |
1986 | |
1987 __ reset_last_Java_frame(false, false); | |
1988 | |
1989 // Load UnrollBlock* into rdi | |
1990 __ movq(rdi, rax); | |
1991 | |
1992 // Only register save data is on the stack. | |
1993 // Now restore the result registers. Everything else is either dead | |
1994 // or captured in the vframeArray. | |
1995 RegisterSaver::restore_result_registers(masm); | |
1996 | |
1997 // All of the register save area has been popped of the stack. Only the | |
1998 // return address remains. | |
1999 | |
2000 // Pop all the frames we must move/replace. | |
2001 // | |
2002 // Frame picture (youngest to oldest) | |
2003 // 1: self-frame (no frame link) | |
2004 // 2: deopting frame (no frame link) | |
2005 // 3: caller of deopting frame (could be compiled/interpreted). | |
2006 // | |
2007 // Note: by leaving the return address of self-frame on the stack | |
2008 // and using the size of frame 2 to adjust the stack | |
2009 // when we are done the return to frame 3 will still be on the stack. | |
2010 | |
2011 // Pop deoptimized frame | |
2012 __ movl(rcx, Address(rdi, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes())); | |
2013 __ addq(rsp, rcx); | |
2014 | |
2015 // rsp should be pointing at the return address to the caller (3) | |
2016 | |
2017 // Stack bang to make sure there's enough room for these interpreter frames. | |
2018 if (UseStackBanging) { | |
2019 __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); | |
2020 __ bang_stack_size(rbx, rcx); | |
2021 } | |
2022 | |
2023 // Load address of array of frame pcs into rcx | |
2024 __ movq(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); | |
2025 | |
2026 // Trash the old pc | |
2027 __ addq(rsp, wordSize); | |
2028 | |
2029 // Load address of array of frame sizes into rsi | |
2030 __ movq(rsi, Address(rdi, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); | |
2031 | |
2032 // Load counter into rdx | |
2033 __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); | |
2034 | |
2035 // Pick up the initial fp we should save | |
2036 __ movq(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_fp_offset_in_bytes())); | |
2037 | |
2038 // Now adjust the caller's stack to make up for the extra locals | |
2039 // but record the original sp so that we can save it in the skeletal interpreter | |
2040 // frame and the stack walking of interpreter_sender will get the unextended sp | |
2041 // value and not the "real" sp value. | |
2042 | |
2043 const Register sender_sp = r8; | |
2044 | |
2045 __ movq(sender_sp, rsp); | |
2046 __ movl(rbx, Address(rdi, | |
2047 Deoptimization::UnrollBlock:: | |
2048 caller_adjustment_offset_in_bytes())); | |
2049 __ subq(rsp, rbx); | |
2050 | |
2051 // Push interpreter frames in a loop | |
2052 Label loop; | |
2053 __ bind(loop); | |
2054 __ movq(rbx, Address(rsi, 0)); // Load frame size | |
2055 __ subq(rbx, 2*wordSize); // We'll push pc and ebp by hand | |
2056 __ pushq(Address(rcx, 0)); // Save return address | |
2057 __ enter(); // Save old & set new ebp | |
2058 __ subq(rsp, rbx); // Prolog | |
2059 __ movq(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), | |
2060 sender_sp); // Make it walkable | |
2061 // This value is corrected by layout_activation_impl | |
2062 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int)NULL_WORD ); | |
2063 __ movq(sender_sp, rsp); // Pass sender_sp to next frame | |
2064 __ addq(rsi, wordSize); // Bump array pointer (sizes) | |
2065 __ addq(rcx, wordSize); // Bump array pointer (pcs) | |
2066 __ decrementl(rdx); // Decrement counter | |
2067 __ jcc(Assembler::notZero, loop); | |
2068 __ pushq(Address(rcx, 0)); // Save final return address | |
2069 | |
2070 // Re-push self-frame | |
2071 __ enter(); // Save old & set new ebp | |
2072 | |
2073 // Allocate a full sized register save area. | |
2074 // Return address and rbp are in place, so we allocate two less words. | |
2075 __ subq(rsp, (frame_size_in_words - 2) * wordSize); | |
2076 | |
2077 // Restore frame locals after moving the frame | |
2078 __ movdbl(Address(rsp, RegisterSaver::xmm0_offset_in_bytes()), xmm0); | |
2079 __ movq(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); | |
2080 | |
2081 // Call C code. Need thread but NOT official VM entry | |
2082 // crud. We cannot block on this call, no GC can happen. Call should | |
2083 // restore return values to their stack-slots with the new SP. | |
2084 // | |
2085 // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) | |
2086 | |
2087 // Use rbp because the frames look interpreted now | |
2088 __ set_last_Java_frame(noreg, rbp, NULL); | |
2089 | |
2090 __ movq(c_rarg0, r15_thread); | |
2091 __ movl(c_rarg1, r12); // second arg: exec_mode | |
2092 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); | |
2093 | |
2094 // Set an oopmap for the call site | |
2095 oop_maps->add_gc_map(__ pc() - start, | |
2096 new OopMap( frame_size_in_words, 0 )); | |
2097 | |
2098 __ reset_last_Java_frame(true, false); | |
2099 | |
2100 // Collect return values | |
2101 __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes())); | |
2102 __ movq(rax, Address(rsp, RegisterSaver::rax_offset_in_bytes())); | |
2103 | |
2104 // Pop self-frame. | |
2105 __ leave(); // Epilog | |
2106 | |
2107 // Jump to interpreter | |
2108 __ ret(0); | |
2109 | |
2110 // Make sure all code is generated | |
2111 masm->flush(); | |
2112 | |
2113 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, 0, frame_size_in_words); | |
2114 } | |
2115 | |
2116 #ifdef COMPILER2 | |
2117 //------------------------------generate_uncommon_trap_blob-------------------- | |
2118 void SharedRuntime::generate_uncommon_trap_blob() { | |
2119 // Allocate space for the code | |
2120 ResourceMark rm; | |
2121 // Setup code generation tools | |
2122 CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); | |
2123 MacroAssembler* masm = new MacroAssembler(&buffer); | |
2124 | |
2125 assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); | |
2126 | |
2127 address start = __ pc(); | |
2128 | |
2129 // Push self-frame. We get here with a return address on the | |
2130 // stack, so rsp is 8-byte aligned until we allocate our frame. | |
2131 __ subq(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog! | |
2132 | |
2133 // No callee saved registers. rbp is assumed implicitly saved | |
2134 __ movq(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp); | |
2135 | |
2136 // compiler left unloaded_class_index in j_rarg0 move to where the | |
2137 // runtime expects it. | |
2138 __ movl(c_rarg1, j_rarg0); | |
2139 | |
2140 __ set_last_Java_frame(noreg, noreg, NULL); | |
2141 | |
2142 // Call C code. Need thread but NOT official VM entry | |
2143 // crud. We cannot block on this call, no GC can happen. Call should | |
2144 // capture callee-saved registers as well as return values. | |
2145 // Thread is in rdi already. | |
2146 // | |
2147 // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index); | |
2148 | |
2149 __ movq(c_rarg0, r15_thread); | |
2150 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); | |
2151 | |
2152 // Set an oopmap for the call site | |
2153 OopMapSet* oop_maps = new OopMapSet(); | |
2154 OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0); | |
2155 | |
2156 // location of rbp is known implicitly by the frame sender code | |
2157 | |
2158 oop_maps->add_gc_map(__ pc() - start, map); | |
2159 | |
2160 __ reset_last_Java_frame(false, false); | |
2161 | |
2162 // Load UnrollBlock* into rdi | |
2163 __ movq(rdi, rax); | |
2164 | |
2165 // Pop all the frames we must move/replace. | |
2166 // | |
2167 // Frame picture (youngest to oldest) | |
2168 // 1: self-frame (no frame link) | |
2169 // 2: deopting frame (no frame link) | |
2170 // 3: caller of deopting frame (could be compiled/interpreted). | |
2171 | |
2172 // Pop self-frame. We have no frame, and must rely only on rax and rsp. | |
2173 __ addq(rsp, (SimpleRuntimeFrame::framesize - 2) << LogBytesPerInt); // Epilog! | |
2174 | |
2175 // Pop deoptimized frame (int) | |
2176 __ movl(rcx, Address(rdi, | |
2177 Deoptimization::UnrollBlock:: | |
2178 size_of_deoptimized_frame_offset_in_bytes())); | |
2179 __ addq(rsp, rcx); | |
2180 | |
2181 // rsp should be pointing at the return address to the caller (3) | |
2182 | |
2183 // Stack bang to make sure there's enough room for these interpreter frames. | |
2184 if (UseStackBanging) { | |
2185 __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); | |
2186 __ bang_stack_size(rbx, rcx); | |
2187 } | |
2188 | |
2189 // Load address of array of frame pcs into rcx (address*) | |
2190 __ movq(rcx, | |
2191 Address(rdi, | |
2192 Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); | |
2193 | |
2194 // Trash the return pc | |
2195 __ addq(rsp, wordSize); | |
2196 | |
2197 // Load address of array of frame sizes into rsi (intptr_t*) | |
2198 __ movq(rsi, Address(rdi, | |
2199 Deoptimization::UnrollBlock:: | |
2200 frame_sizes_offset_in_bytes())); | |
2201 | |
2202 // Counter | |
2203 __ movl(rdx, Address(rdi, | |
2204 Deoptimization::UnrollBlock:: | |
2205 number_of_frames_offset_in_bytes())); // (int) | |
2206 | |
2207 // Pick up the initial fp we should save | |
2208 __ movq(rbp, | |
2209 Address(rdi, | |
2210 Deoptimization::UnrollBlock::initial_fp_offset_in_bytes())); | |
2211 | |
2212 // Now adjust the caller's stack to make up for the extra locals but | |
2213 // record the original sp so that we can save it in the skeletal | |
2214 // interpreter frame and the stack walking of interpreter_sender | |
2215 // will get the unextended sp value and not the "real" sp value. | |
2216 | |
2217 const Register sender_sp = r8; | |
2218 | |
2219 __ movq(sender_sp, rsp); | |
2220 __ movl(rbx, Address(rdi, | |
2221 Deoptimization::UnrollBlock:: | |
2222 caller_adjustment_offset_in_bytes())); // (int) | |
2223 __ subq(rsp, rbx); | |
2224 | |
2225 // Push interpreter frames in a loop | |
2226 Label loop; | |
2227 __ bind(loop); | |
2228 __ movq(rbx, Address(rsi, 0)); // Load frame size | |
2229 __ subq(rbx, 2 * wordSize); // We'll push pc and rbp by hand | |
2230 __ pushq(Address(rcx, 0)); // Save return address | |
2231 __ enter(); // Save old & set new rbp | |
2232 __ subq(rsp, rbx); // Prolog | |
2233 __ movq(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), | |
2234 sender_sp); // Make it walkable | |
2235 // This value is corrected by layout_activation_impl | |
2236 __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int)NULL_WORD ); | |
2237 __ movq(sender_sp, rsp); // Pass sender_sp to next frame | |
2238 __ addq(rsi, wordSize); // Bump array pointer (sizes) | |
2239 __ addq(rcx, wordSize); // Bump array pointer (pcs) | |
2240 __ decrementl(rdx); // Decrement counter | |
2241 __ jcc(Assembler::notZero, loop); | |
2242 __ pushq(Address(rcx, 0)); // Save final return address | |
2243 | |
2244 // Re-push self-frame | |
2245 __ enter(); // Save old & set new rbp | |
2246 __ subq(rsp, (SimpleRuntimeFrame::framesize - 4) << LogBytesPerInt); | |
2247 // Prolog | |
2248 | |
2249 // Use rbp because the frames look interpreted now | |
2250 __ set_last_Java_frame(noreg, rbp, NULL); | |
2251 | |
2252 // Call C code. Need thread but NOT official VM entry | |
2253 // crud. We cannot block on this call, no GC can happen. Call should | |
2254 // restore return values to their stack-slots with the new SP. | |
2255 // Thread is in rdi already. | |
2256 // | |
2257 // BasicType unpack_frames(JavaThread* thread, int exec_mode); | |
2258 | |
2259 __ movq(c_rarg0, r15_thread); | |
2260 __ movl(c_rarg1, Deoptimization::Unpack_uncommon_trap); | |
2261 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); | |
2262 | |
2263 // Set an oopmap for the call site | |
2264 oop_maps->add_gc_map(__ pc() - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); | |
2265 | |
2266 __ reset_last_Java_frame(true, false); | |
2267 | |
2268 // Pop self-frame. | |
2269 __ leave(); // Epilog | |
2270 | |
2271 // Jump to interpreter | |
2272 __ ret(0); | |
2273 | |
2274 // Make sure all code is generated | |
2275 masm->flush(); | |
2276 | |
2277 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, | |
2278 SimpleRuntimeFrame::framesize >> 1); | |
2279 } | |
2280 #endif // COMPILER2 | |
2281 | |
2282 | |
2283 //------------------------------generate_handler_blob------ | |
2284 // | |
2285 // Generate a special Compile2Runtime blob that saves all registers, | |
2286 // and setup oopmap. | |
2287 // | |
2288 static SafepointBlob* generate_handler_blob(address call_ptr, bool cause_return) { | |
2289 assert(StubRoutines::forward_exception_entry() != NULL, | |
2290 "must be generated before"); | |
2291 | |
2292 ResourceMark rm; | |
2293 OopMapSet *oop_maps = new OopMapSet(); | |
2294 OopMap* map; | |
2295 | |
2296 // Allocate space for the code. Setup code generation tools. | |
2297 CodeBuffer buffer("handler_blob", 2048, 1024); | |
2298 MacroAssembler* masm = new MacroAssembler(&buffer); | |
2299 | |
2300 address start = __ pc(); | |
2301 address call_pc = NULL; | |
2302 int frame_size_in_words; | |
2303 | |
2304 // Make room for return address (or push it again) | |
2305 if (!cause_return) { | |
2306 __ pushq(rbx); | |
2307 } | |
2308 | |
2309 // Save registers, fpu state, and flags | |
2310 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); | |
2311 | |
2312 // The following is basically a call_VM. However, we need the precise | |
2313 // address of the call in order to generate an oopmap. Hence, we do all the | |
2314 // work outselves. | |
2315 | |
2316 __ set_last_Java_frame(noreg, noreg, NULL); | |
2317 | |
2318 // The return address must always be correct so that frame constructor never | |
2319 // sees an invalid pc. | |
2320 | |
2321 if (!cause_return) { | |
2322 // overwrite the dummy value we pushed on entry | |
2323 __ movq(c_rarg0, Address(r15_thread, JavaThread::saved_exception_pc_offset())); | |
2324 __ movq(Address(rbp, wordSize), c_rarg0); | |
2325 } | |
2326 | |
2327 // Do the call | |
2328 __ movq(c_rarg0, r15_thread); | |
2329 __ call(RuntimeAddress(call_ptr)); | |
2330 | |
2331 // Set an oopmap for the call site. This oopmap will map all | |
2332 // oop-registers and debug-info registers as callee-saved. This | |
2333 // will allow deoptimization at this safepoint to find all possible | |
2334 // debug-info recordings, as well as let GC find all oops. | |
2335 | |
2336 oop_maps->add_gc_map( __ pc() - start, map); | |
2337 | |
2338 Label noException; | |
2339 | |
2340 __ reset_last_Java_frame(false, false); | |
2341 | |
2342 __ cmpq(Address(r15_thread, Thread::pending_exception_offset()), (int)NULL_WORD); | |
2343 __ jcc(Assembler::equal, noException); | |
2344 | |
2345 // Exception pending | |
2346 | |
2347 RegisterSaver::restore_live_registers(masm); | |
2348 | |
2349 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); | |
2350 | |
2351 // No exception case | |
2352 __ bind(noException); | |
2353 | |
2354 // Normal exit, restore registers and exit. | |
2355 RegisterSaver::restore_live_registers(masm); | |
2356 | |
2357 __ ret(0); | |
2358 | |
2359 // Make sure all code is generated | |
2360 masm->flush(); | |
2361 | |
2362 // Fill-out other meta info | |
2363 return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); | |
2364 } | |
2365 | |
2366 // | |
2367 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss | |
2368 // | |
2369 // Generate a stub that calls into vm to find out the proper destination | |
2370 // of a java call. All the argument registers are live at this point | |
2371 // but since this is generic code we don't know what they are and the caller | |
2372 // must do any gc of the args. | |
2373 // | |
2374 static RuntimeStub* generate_resolve_blob(address destination, const char* name) { | |
2375 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); | |
2376 | |
2377 // allocate space for the code | |
2378 ResourceMark rm; | |
2379 | |
2380 CodeBuffer buffer(name, 1000, 512); | |
2381 MacroAssembler* masm = new MacroAssembler(&buffer); | |
2382 | |
2383 int frame_size_in_words; | |
2384 | |
2385 OopMapSet *oop_maps = new OopMapSet(); | |
2386 OopMap* map = NULL; | |
2387 | |
2388 int start = __ offset(); | |
2389 | |
2390 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); | |
2391 | |
2392 int frame_complete = __ offset(); | |
2393 | |
2394 __ set_last_Java_frame(noreg, noreg, NULL); | |
2395 | |
2396 __ movq(c_rarg0, r15_thread); | |
2397 | |
2398 __ call(RuntimeAddress(destination)); | |
2399 | |
2400 | |
2401 // Set an oopmap for the call site. | |
2402 // We need this not only for callee-saved registers, but also for volatile | |
2403 // registers that the compiler might be keeping live across a safepoint. | |
2404 | |
2405 oop_maps->add_gc_map( __ offset() - start, map); | |
2406 | |
2407 // rax contains the address we are going to jump to assuming no exception got installed | |
2408 | |
2409 // clear last_Java_sp | |
2410 __ reset_last_Java_frame(false, false); | |
2411 // check for pending exceptions | |
2412 Label pending; | |
2413 __ cmpq(Address(r15_thread, Thread::pending_exception_offset()), (int)NULL_WORD); | |
2414 __ jcc(Assembler::notEqual, pending); | |
2415 | |
2416 // get the returned methodOop | |
2417 __ movq(rbx, Address(r15_thread, JavaThread::vm_result_offset())); | |
2418 __ movq(Address(rsp, RegisterSaver::rbx_offset_in_bytes()), rbx); | |
2419 | |
2420 __ movq(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax); | |
2421 | |
2422 RegisterSaver::restore_live_registers(masm); | |
2423 | |
2424 // We are back the the original state on entry and ready to go. | |
2425 | |
2426 __ jmp(rax); | |
2427 | |
2428 // Pending exception after the safepoint | |
2429 | |
2430 __ bind(pending); | |
2431 | |
2432 RegisterSaver::restore_live_registers(masm); | |
2433 | |
2434 // exception pending => remove activation and forward to exception handler | |
2435 | |
2436 __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD); | |
2437 | |
2438 __ movq(rax, Address(r15_thread, Thread::pending_exception_offset())); | |
2439 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); | |
2440 | |
2441 // ------------- | |
2442 // make sure all code is generated | |
2443 masm->flush(); | |
2444 | |
2445 // return the blob | |
2446 // frame_size_words or bytes?? | |
2447 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); | |
2448 } | |
2449 | |
2450 | |
2451 void SharedRuntime::generate_stubs() { | |
2452 | |
2453 _wrong_method_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method), | |
2454 "wrong_method_stub"); | |
2455 _ic_miss_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method_ic_miss), | |
2456 "ic_miss_stub"); | |
2457 _resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_opt_virtual_call_C), | |
2458 "resolve_opt_virtual_call"); | |
2459 | |
2460 _resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_virtual_call_C), | |
2461 "resolve_virtual_call"); | |
2462 | |
2463 _resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_static_call_C), | |
2464 "resolve_static_call"); | |
2465 _polling_page_safepoint_handler_blob = | |
2466 generate_handler_blob(CAST_FROM_FN_PTR(address, | |
2467 SafepointSynchronize::handle_polling_page_exception), false); | |
2468 | |
2469 _polling_page_return_handler_blob = | |
2470 generate_handler_blob(CAST_FROM_FN_PTR(address, | |
2471 SafepointSynchronize::handle_polling_page_exception), true); | |
2472 | |
2473 generate_deopt_blob(); | |
2474 | |
2475 #ifdef COMPILER2 | |
2476 generate_uncommon_trap_blob(); | |
2477 #endif // COMPILER2 | |
2478 } | |
2479 | |
2480 | |
2481 #ifdef COMPILER2 | |
2482 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame | |
2483 // | |
2484 //------------------------------generate_exception_blob--------------------------- | |
2485 // creates exception blob at the end | |
2486 // Using exception blob, this code is jumped from a compiled method. | |
2487 // (see emit_exception_handler in x86_64.ad file) | |
2488 // | |
2489 // Given an exception pc at a call we call into the runtime for the | |
2490 // handler in this method. This handler might merely restore state | |
2491 // (i.e. callee save registers) unwind the frame and jump to the | |
2492 // exception handler for the nmethod if there is no Java level handler | |
2493 // for the nmethod. | |
2494 // | |
2495 // This code is entered with a jmp. | |
2496 // | |
2497 // Arguments: | |
2498 // rax: exception oop | |
2499 // rdx: exception pc | |
2500 // | |
2501 // Results: | |
2502 // rax: exception oop | |
2503 // rdx: exception pc in caller or ??? | |
2504 // destination: exception handler of caller | |
2505 // | |
2506 // Note: the exception pc MUST be at a call (precise debug information) | |
2507 // Registers rax, rdx, rcx, rsi, rdi, r8-r11 are not callee saved. | |
2508 // | |
2509 | |
2510 void OptoRuntime::generate_exception_blob() { | |
2511 assert(!OptoRuntime::is_callee_saved_register(RDX_num), ""); | |
2512 assert(!OptoRuntime::is_callee_saved_register(RAX_num), ""); | |
2513 assert(!OptoRuntime::is_callee_saved_register(RCX_num), ""); | |
2514 | |
2515 assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); | |
2516 | |
2517 // Allocate space for the code | |
2518 ResourceMark rm; | |
2519 // Setup code generation tools | |
2520 CodeBuffer buffer("exception_blob", 2048, 1024); | |
2521 MacroAssembler* masm = new MacroAssembler(&buffer); | |
2522 | |
2523 | |
2524 address start = __ pc(); | |
2525 | |
2526 // Exception pc is 'return address' for stack walker | |
2527 __ pushq(rdx); | |
2528 __ subq(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Prolog | |
2529 | |
2530 // Save callee-saved registers. See x86_64.ad. | |
2531 | |
2532 // rbp is an implicitly saved callee saved register (i.e. the calling | |
2533 // convention will save restore it in prolog/epilog) Other than that | |
2534 // there are no callee save registers now that adapter frames are gone. | |
2535 | |
2536 __ movq(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp); | |
2537 | |
2538 // Store exception in Thread object. We cannot pass any arguments to the | |
2539 // handle_exception call, since we do not want to make any assumption | |
2540 // about the size of the frame where the exception happened in. | |
2541 // c_rarg0 is either rdi (Linux) or rcx (Windows). | |
2542 __ movq(Address(r15_thread, JavaThread::exception_oop_offset()),rax); | |
2543 __ movq(Address(r15_thread, JavaThread::exception_pc_offset()), rdx); | |
2544 | |
2545 // This call does all the hard work. It checks if an exception handler | |
2546 // exists in the method. | |
2547 // If so, it returns the handler address. | |
2548 // If not, it prepares for stack-unwinding, restoring the callee-save | |
2549 // registers of the frame being removed. | |
2550 // | |
2551 // address OptoRuntime::handle_exception_C(JavaThread* thread) | |
2552 | |
2553 __ set_last_Java_frame(noreg, noreg, NULL); | |
2554 __ movq(c_rarg0, r15_thread); | |
2555 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C))); | |
2556 | |
2557 // Set an oopmap for the call site. This oopmap will only be used if we | |
2558 // are unwinding the stack. Hence, all locations will be dead. | |
2559 // Callee-saved registers will be the same as the frame above (i.e., | |
2560 // handle_exception_stub), since they were restored when we got the | |
2561 // exception. | |
2562 | |
2563 OopMapSet* oop_maps = new OopMapSet(); | |
2564 | |
2565 oop_maps->add_gc_map( __ pc()-start, new OopMap(SimpleRuntimeFrame::framesize, 0)); | |
2566 | |
2567 __ reset_last_Java_frame(false, false); | |
2568 | |
2569 // Restore callee-saved registers | |
2570 | |
2571 // rbp is an implicitly saved callee saved register (i.e. the calling | |
2572 // convention will save restore it in prolog/epilog) Other than that | |
2573 // there are no callee save registers no that adapter frames are gone. | |
2574 | |
2575 __ movq(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt)); | |
2576 | |
2577 __ addq(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog | |
2578 __ popq(rdx); // No need for exception pc anymore | |
2579 | |
2580 // rax: exception handler | |
2581 | |
2582 // We have a handler in rax (could be deopt blob). | |
2583 __ movq(r8, rax); | |
2584 | |
2585 // Get the exception oop | |
2586 __ movq(rax, Address(r15_thread, JavaThread::exception_oop_offset())); | |
2587 // Get the exception pc in case we are deoptimized | |
2588 __ movq(rdx, Address(r15_thread, JavaThread::exception_pc_offset())); | |
2589 #ifdef ASSERT | |
2590 __ movptr(Address(r15_thread, JavaThread::exception_handler_pc_offset()), (int)NULL_WORD); | |
2591 __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int)NULL_WORD); | |
2592 #endif | |
2593 // Clear the exception oop so GC no longer processes it as a root. | |
2594 __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int)NULL_WORD); | |
2595 | |
2596 // rax: exception oop | |
2597 // r8: exception handler | |
2598 // rdx: exception pc | |
2599 // Jump to handler | |
2600 | |
2601 __ jmp(r8); | |
2602 | |
2603 // Make sure all code is generated | |
2604 masm->flush(); | |
2605 | |
2606 // Set exception blob | |
2607 _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); | |
2608 } | |
2609 #endif // COMPILER2 |