Mercurial > hg > truffle
annotate src/cpu/x86/vm/stubGenerator_x86_32.cpp @ 20504:6948da6d7c13
8052172: Evacuation failure handling in G1 does not evacuate all objects if -XX:-G1DeferredRSUpdate is set
Summary: Remove -XX:-G1DeferredRSUpdate functionality as it is racy. During evacuation failure handling, threads where evacuation failure handling occurred may try to add remembered sets to regions which remembered sets are currently being scanned. The iterator to handle the remembered set scan does not support addition of entries during scan and so may skip valid references.
Reviewed-by: iveresov, brutisso, mgerdin
author | tschatzl |
---|---|
date | Tue, 30 Sep 2014 09:44:36 +0200 |
parents | 04d32e7fad07 |
children | d8041d695d19 |
rev | line source |
---|---|
0 | 1 /* |
11080
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1506
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1506
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
1506
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #include "precompiled.hpp" |
7199
cd3d6a6b95d9
8003240: x86: move MacroAssembler into separate file
twisti
parents:
6894
diff
changeset
|
26 #include "asm/macroAssembler.hpp" |
cd3d6a6b95d9
8003240: x86: move MacroAssembler into separate file
twisti
parents:
6894
diff
changeset
|
27 #include "asm/macroAssembler.inline.hpp" |
1972 | 28 #include "interpreter/interpreter.hpp" |
29 #include "nativeInst_x86.hpp" | |
30 #include "oops/instanceOop.hpp" | |
6725
da91efe96a93
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
6266
diff
changeset
|
31 #include "oops/method.hpp" |
1972 | 32 #include "oops/objArrayKlass.hpp" |
33 #include "oops/oop.inline.hpp" | |
34 #include "prims/methodHandles.hpp" | |
35 #include "runtime/frame.inline.hpp" | |
36 #include "runtime/handles.inline.hpp" | |
37 #include "runtime/sharedRuntime.hpp" | |
38 #include "runtime/stubCodeGenerator.hpp" | |
39 #include "runtime/stubRoutines.hpp" | |
7180
f34d701e952e
8003935: Simplify the needed includes for using Thread::current()
stefank
parents:
6894
diff
changeset
|
40 #include "runtime/thread.inline.hpp" |
1972 | 41 #include "utilities/top.hpp" |
42 #ifdef COMPILER2 | |
43 #include "opto/runtime.hpp" | |
44 #endif | |
0 | 45 |
46 // Declaration and definition of StubGenerator (no .hpp file). | |
47 // For a more detailed description of the stub routine structure | |
48 // see the comment in stubRoutines.hpp | |
49 | |
50 #define __ _masm-> | |
304 | 51 #define a__ ((Assembler*)_masm)-> |
0 | 52 |
53 #ifdef PRODUCT | |
54 #define BLOCK_COMMENT(str) /* nothing */ | |
55 #else | |
56 #define BLOCK_COMMENT(str) __ block_comment(str) | |
57 #endif | |
58 | |
59 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") | |
60 | |
61 const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions | |
62 const int FPU_CNTRL_WRD_MASK = 0xFFFF; | |
63 | |
64 // ------------------------------------------------------------------------------------------------------------------------- | |
65 // Stub Code definitions | |
66 | |
67 static address handle_unsafe_access() { | |
68 JavaThread* thread = JavaThread::current(); | |
69 address pc = thread->saved_exception_pc(); | |
70 // pc is the instruction which we must emulate | |
71 // doing a no-op is fine: return garbage from the load | |
72 // therefore, compute npc | |
73 address npc = Assembler::locate_next_instruction(pc); | |
74 | |
75 // request an async exception | |
76 thread->set_pending_unsafe_access_error(); | |
77 | |
78 // return address of next instruction to execute | |
79 return npc; | |
80 } | |
81 | |
82 class StubGenerator: public StubCodeGenerator { | |
83 private: | |
84 | |
85 #ifdef PRODUCT | |
10973
ef57c43512d6
8014431: cleanup warnings indicated by the -Wunused-value compiler option on linux
ccheung
parents:
10324
diff
changeset
|
86 #define inc_counter_np(counter) ((void)0) |
0 | 87 #else |
88 void inc_counter_np_(int& counter) { | |
304 | 89 __ incrementl(ExternalAddress((address)&counter)); |
0 | 90 } |
91 #define inc_counter_np(counter) \ | |
92 BLOCK_COMMENT("inc_counter " #counter); \ | |
93 inc_counter_np_(counter); | |
94 #endif //PRODUCT | |
95 | |
96 void inc_copy_counter_np(BasicType t) { | |
97 #ifndef PRODUCT | |
98 switch (t) { | |
99 case T_BYTE: inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); return; | |
100 case T_SHORT: inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); return; | |
101 case T_INT: inc_counter_np(SharedRuntime::_jint_array_copy_ctr); return; | |
102 case T_LONG: inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); return; | |
103 case T_OBJECT: inc_counter_np(SharedRuntime::_oop_array_copy_ctr); return; | |
104 } | |
105 ShouldNotReachHere(); | |
106 #endif //PRODUCT | |
107 } | |
108 | |
109 //------------------------------------------------------------------------------------------------------------------------ | |
110 // Call stubs are used to call Java from C | |
111 // | |
112 // [ return_from_Java ] <--- rsp | |
113 // [ argument word n ] | |
114 // ... | |
115 // -N [ argument word 1 ] | |
116 // -7 [ Possible padding for stack alignment ] | |
117 // -6 [ Possible padding for stack alignment ] | |
118 // -5 [ Possible padding for stack alignment ] | |
119 // -4 [ mxcsr save ] <--- rsp_after_call | |
120 // -3 [ saved rbx, ] | |
121 // -2 [ saved rsi ] | |
122 // -1 [ saved rdi ] | |
123 // 0 [ saved rbp, ] <--- rbp, | |
124 // 1 [ return address ] | |
125 // 2 [ ptr. to call wrapper ] | |
126 // 3 [ result ] | |
127 // 4 [ result_type ] | |
128 // 5 [ method ] | |
129 // 6 [ entry_point ] | |
130 // 7 [ parameters ] | |
131 // 8 [ parameter_size ] | |
132 // 9 [ thread ] | |
133 | |
134 | |
135 address generate_call_stub(address& return_address) { | |
136 StubCodeMark mark(this, "StubRoutines", "call_stub"); | |
137 address start = __ pc(); | |
138 | |
139 // stub code parameters / addresses | |
140 assert(frame::entry_frame_call_wrapper_offset == 2, "adjust this code"); | |
141 bool sse_save = false; | |
142 const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_catch_exception()! | |
143 const int locals_count_in_bytes (4*wordSize); | |
144 const Address mxcsr_save (rbp, -4 * wordSize); | |
145 const Address saved_rbx (rbp, -3 * wordSize); | |
146 const Address saved_rsi (rbp, -2 * wordSize); | |
147 const Address saved_rdi (rbp, -1 * wordSize); | |
148 const Address result (rbp, 3 * wordSize); | |
149 const Address result_type (rbp, 4 * wordSize); | |
150 const Address method (rbp, 5 * wordSize); | |
151 const Address entry_point (rbp, 6 * wordSize); | |
152 const Address parameters (rbp, 7 * wordSize); | |
153 const Address parameter_size(rbp, 8 * wordSize); | |
154 const Address thread (rbp, 9 * wordSize); // same as in generate_catch_exception()! | |
155 sse_save = UseSSE > 0; | |
156 | |
157 // stub code | |
158 __ enter(); | |
304 | 159 __ movptr(rcx, parameter_size); // parameter counter |
1506 | 160 __ shlptr(rcx, Interpreter::logStackElementSize); // convert parameter count to bytes |
304 | 161 __ addptr(rcx, locals_count_in_bytes); // reserve space for register saves |
162 __ subptr(rsp, rcx); | |
163 __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack | |
0 | 164 |
165 // save rdi, rsi, & rbx, according to C calling conventions | |
304 | 166 __ movptr(saved_rdi, rdi); |
167 __ movptr(saved_rsi, rsi); | |
168 __ movptr(saved_rbx, rbx); | |
0 | 169 // save and initialize %mxcsr |
170 if (sse_save) { | |
171 Label skip_ldmx; | |
172 __ stmxcsr(mxcsr_save); | |
173 __ movl(rax, mxcsr_save); | |
174 __ andl(rax, MXCSR_MASK); // Only check control and mask bits | |
175 ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std()); | |
176 __ cmp32(rax, mxcsr_std); | |
177 __ jcc(Assembler::equal, skip_ldmx); | |
178 __ ldmxcsr(mxcsr_std); | |
179 __ bind(skip_ldmx); | |
180 } | |
181 | |
182 // make sure the control word is correct. | |
183 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); | |
184 | |
185 #ifdef ASSERT | |
186 // make sure we have no pending exceptions | |
187 { Label L; | |
304 | 188 __ movptr(rcx, thread); |
189 __ cmpptr(Address(rcx, Thread::pending_exception_offset()), (int32_t)NULL_WORD); | |
0 | 190 __ jcc(Assembler::equal, L); |
191 __ stop("StubRoutines::call_stub: entered with pending exception"); | |
192 __ bind(L); | |
193 } | |
194 #endif | |
195 | |
196 // pass parameters if any | |
197 BLOCK_COMMENT("pass parameters if any"); | |
198 Label parameters_done; | |
199 __ movl(rcx, parameter_size); // parameter counter | |
200 __ testl(rcx, rcx); | |
201 __ jcc(Assembler::zero, parameters_done); | |
202 | |
203 // parameter passing loop | |
204 | |
205 Label loop; | |
206 // Copy Java parameters in reverse order (receiver last) | |
207 // Note that the argument order is inverted in the process | |
208 // source is rdx[rcx: N-1..0] | |
209 // dest is rsp[rbx: 0..N-1] | |
210 | |
304 | 211 __ movptr(rdx, parameters); // parameter pointer |
212 __ xorptr(rbx, rbx); | |
0 | 213 |
214 __ BIND(loop); | |
215 | |
216 // get parameter | |
304 | 217 __ movptr(rax, Address(rdx, rcx, Interpreter::stackElementScale(), -wordSize)); |
218 __ movptr(Address(rsp, rbx, Interpreter::stackElementScale(), | |
0 | 219 Interpreter::expr_offset_in_bytes(0)), rax); // store parameter |
220 __ increment(rbx); | |
221 __ decrement(rcx); | |
222 __ jcc(Assembler::notZero, loop); | |
223 | |
224 // call Java function | |
225 __ BIND(parameters_done); | |
6725
da91efe96a93
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
6266
diff
changeset
|
226 __ movptr(rbx, method); // get Method* |
304 | 227 __ movptr(rax, entry_point); // get entry_point |
228 __ mov(rsi, rsp); // set sender sp | |
0 | 229 BLOCK_COMMENT("call Java function"); |
230 __ call(rax); | |
231 | |
232 BLOCK_COMMENT("call_stub_return_address:"); | |
233 return_address = __ pc(); | |
234 | |
2245
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
235 #ifdef COMPILER2 |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
236 { |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
237 Label L_skip; |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
238 if (UseSSE >= 2) { |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
239 __ verify_FPU(0, "call_stub_return"); |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
240 } else { |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
241 for (int i = 1; i < 8; i++) { |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
242 __ ffree(i); |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
243 } |
0 | 244 |
2245
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
245 // UseSSE <= 1 so double result should be left on TOS |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
246 __ movl(rsi, result_type); |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
247 __ cmpl(rsi, T_DOUBLE); |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
248 __ jcc(Assembler::equal, L_skip); |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
249 if (UseSSE == 0) { |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
250 // UseSSE == 0 so float result should be left on TOS |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
251 __ cmpl(rsi, T_FLOAT); |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
252 __ jcc(Assembler::equal, L_skip); |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
253 } |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
254 __ ffree(0); |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
255 } |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
256 __ BIND(L_skip); |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
257 } |
638119ce7cfd
7009309: JSR 292: compiler/6991596/Test6991596.java crashes on fastdebug JDK7/b122
twisti
parents:
1972
diff
changeset
|
258 #endif // COMPILER2 |
0 | 259 |
260 // store result depending on type | |
261 // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) | |
304 | 262 __ movptr(rdi, result); |
0 | 263 Label is_long, is_float, is_double, exit; |
264 __ movl(rsi, result_type); | |
265 __ cmpl(rsi, T_LONG); | |
266 __ jcc(Assembler::equal, is_long); | |
267 __ cmpl(rsi, T_FLOAT); | |
268 __ jcc(Assembler::equal, is_float); | |
269 __ cmpl(rsi, T_DOUBLE); | |
270 __ jcc(Assembler::equal, is_double); | |
271 | |
272 // handle T_INT case | |
273 __ movl(Address(rdi, 0), rax); | |
274 __ BIND(exit); | |
275 | |
276 // check that FPU stack is empty | |
277 __ verify_FPU(0, "generate_call_stub"); | |
278 | |
279 // pop parameters | |
304 | 280 __ lea(rsp, rsp_after_call); |
0 | 281 |
282 // restore %mxcsr | |
283 if (sse_save) { | |
284 __ ldmxcsr(mxcsr_save); | |
285 } | |
286 | |
287 // restore rdi, rsi and rbx, | |
304 | 288 __ movptr(rbx, saved_rbx); |
289 __ movptr(rsi, saved_rsi); | |
290 __ movptr(rdi, saved_rdi); | |
291 __ addptr(rsp, 4*wordSize); | |
0 | 292 |
293 // return | |
304 | 294 __ pop(rbp); |
0 | 295 __ ret(0); |
296 | |
297 // handle return types different from T_INT | |
298 __ BIND(is_long); | |
299 __ movl(Address(rdi, 0 * wordSize), rax); | |
300 __ movl(Address(rdi, 1 * wordSize), rdx); | |
301 __ jmp(exit); | |
302 | |
303 __ BIND(is_float); | |
304 // interpreter uses xmm0 for return values | |
305 if (UseSSE >= 1) { | |
306 __ movflt(Address(rdi, 0), xmm0); | |
307 } else { | |
308 __ fstp_s(Address(rdi, 0)); | |
309 } | |
310 __ jmp(exit); | |
311 | |
312 __ BIND(is_double); | |
313 // interpreter uses xmm0 for return values | |
314 if (UseSSE >= 2) { | |
315 __ movdbl(Address(rdi, 0), xmm0); | |
316 } else { | |
317 __ fstp_d(Address(rdi, 0)); | |
318 } | |
319 __ jmp(exit); | |
320 | |
321 return start; | |
322 } | |
323 | |
324 | |
325 //------------------------------------------------------------------------------------------------------------------------ | |
326 // Return point for a Java call if there's an exception thrown in Java code. | |
327 // The exception is caught and transformed into a pending exception stored in | |
328 // JavaThread that can be tested from within the VM. | |
329 // | |
330 // Note: Usually the parameters are removed by the callee. In case of an exception | |
331 // crossing an activation frame boundary, that is not the case if the callee | |
332 // is compiled code => need to setup the rsp. | |
333 // | |
334 // rax,: exception oop | |
335 | |
336 address generate_catch_exception() { | |
337 StubCodeMark mark(this, "StubRoutines", "catch_exception"); | |
338 const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_call_stub()! | |
339 const Address thread (rbp, 9 * wordSize); // same as in generate_call_stub()! | |
340 address start = __ pc(); | |
341 | |
342 // get thread directly | |
304 | 343 __ movptr(rcx, thread); |
0 | 344 #ifdef ASSERT |
345 // verify that threads correspond | |
346 { Label L; | |
347 __ get_thread(rbx); | |
304 | 348 __ cmpptr(rbx, rcx); |
0 | 349 __ jcc(Assembler::equal, L); |
350 __ stop("StubRoutines::catch_exception: threads must correspond"); | |
351 __ bind(L); | |
352 } | |
353 #endif | |
354 // set pending exception | |
355 __ verify_oop(rax); | |
304 | 356 __ movptr(Address(rcx, Thread::pending_exception_offset()), rax ); |
0 | 357 __ lea(Address(rcx, Thread::exception_file_offset ()), |
358 ExternalAddress((address)__FILE__)); | |
359 __ movl(Address(rcx, Thread::exception_line_offset ()), __LINE__ ); | |
360 // complete return to VM | |
361 assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); | |
362 __ jump(RuntimeAddress(StubRoutines::_call_stub_return_address)); | |
363 | |
364 return start; | |
365 } | |
366 | |
367 | |
368 //------------------------------------------------------------------------------------------------------------------------ | |
369 // Continuation point for runtime calls returning with a pending exception. | |
370 // The pending exception check happened in the runtime or native call stub. | |
371 // The pending exception in Thread is converted into a Java-level exception. | |
372 // | |
373 // Contract with Java-level exception handlers: | |
1295 | 374 // rax: exception |
0 | 375 // rdx: throwing pc |
376 // | |
377 // NOTE: At entry of this stub, exception-pc must be on stack !! | |
378 | |
379 address generate_forward_exception() { | |
380 StubCodeMark mark(this, "StubRoutines", "forward exception"); | |
381 address start = __ pc(); | |
1295 | 382 const Register thread = rcx; |
383 | |
384 // other registers used in this stub | |
385 const Register exception_oop = rax; | |
386 const Register handler_addr = rbx; | |
387 const Register exception_pc = rdx; | |
0 | 388 |
389 // Upon entry, the sp points to the return address returning into Java | |
390 // (interpreted or compiled) code; i.e., the return address becomes the | |
391 // throwing pc. | |
392 // | |
393 // Arguments pushed before the runtime call are still on the stack but | |
394 // the exception handler will reset the stack pointer -> ignore them. | |
395 // A potential result in registers can be ignored as well. | |
396 | |
397 #ifdef ASSERT | |
398 // make sure this code is only executed if there is a pending exception | |
399 { Label L; | |
1295 | 400 __ get_thread(thread); |
401 __ cmpptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); | |
0 | 402 __ jcc(Assembler::notEqual, L); |
403 __ stop("StubRoutines::forward exception: no pending exception (1)"); | |
404 __ bind(L); | |
405 } | |
406 #endif | |
407 | |
408 // compute exception handler into rbx, | |
1295 | 409 __ get_thread(thread); |
410 __ movptr(exception_pc, Address(rsp, 0)); | |
0 | 411 BLOCK_COMMENT("call exception_handler_for_return_address"); |
1295 | 412 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, exception_pc); |
413 __ mov(handler_addr, rax); | |
0 | 414 |
1295 | 415 // setup rax & rdx, remove return address & clear pending exception |
416 __ get_thread(thread); | |
417 __ pop(exception_pc); | |
418 __ movptr(exception_oop, Address(thread, Thread::pending_exception_offset())); | |
419 __ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD); | |
0 | 420 |
421 #ifdef ASSERT | |
422 // make sure exception is set | |
423 { Label L; | |
1295 | 424 __ testptr(exception_oop, exception_oop); |
0 | 425 __ jcc(Assembler::notEqual, L); |
426 __ stop("StubRoutines::forward exception: no pending exception (2)"); | |
427 __ bind(L); | |
428 } | |
429 #endif | |
430 | |
1295 | 431 // Verify that there is really a valid exception in RAX. |
432 __ verify_oop(exception_oop); | |
433 | |
0 | 434 // continue at exception handler (return address removed) |
1295 | 435 // rax: exception |
436 // rbx: exception handler | |
0 | 437 // rdx: throwing pc |
1295 | 438 __ jmp(handler_addr); |
0 | 439 |
440 return start; | |
441 } | |
442 | |
443 | |
444 //---------------------------------------------------------------------------------------------------- | |
445 // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest) | |
446 // | |
447 // xchg exists as far back as 8086, lock needed for MP only | |
448 // Stack layout immediately after call: | |
449 // | |
450 // 0 [ret addr ] <--- rsp | |
451 // 1 [ ex ] | |
452 // 2 [ dest ] | |
453 // | |
454 // Result: *dest <- ex, return (old *dest) | |
455 // | |
456 // Note: win32 does not currently use this code | |
457 | |
458 address generate_atomic_xchg() { | |
459 StubCodeMark mark(this, "StubRoutines", "atomic_xchg"); | |
460 address start = __ pc(); | |
461 | |
304 | 462 __ push(rdx); |
0 | 463 Address exchange(rsp, 2 * wordSize); |
464 Address dest_addr(rsp, 3 * wordSize); | |
465 __ movl(rax, exchange); | |
304 | 466 __ movptr(rdx, dest_addr); |
467 __ xchgl(rax, Address(rdx, 0)); | |
468 __ pop(rdx); | |
0 | 469 __ ret(0); |
470 | |
471 return start; | |
472 } | |
473 | |
474 //---------------------------------------------------------------------------------------------------- | |
475 // Support for void verify_mxcsr() | |
476 // | |
477 // This routine is used with -Xcheck:jni to verify that native | |
478 // JNI code does not return to Java code without restoring the | |
479 // MXCSR register to our expected state. | |
480 | |
481 | |
482 address generate_verify_mxcsr() { | |
483 StubCodeMark mark(this, "StubRoutines", "verify_mxcsr"); | |
484 address start = __ pc(); | |
485 | |
486 const Address mxcsr_save(rsp, 0); | |
487 | |
488 if (CheckJNICalls && UseSSE > 0 ) { | |
489 Label ok_ret; | |
490 ExternalAddress mxcsr_std(StubRoutines::addr_mxcsr_std()); | |
304 | 491 __ push(rax); |
492 __ subptr(rsp, wordSize); // allocate a temp location | |
0 | 493 __ stmxcsr(mxcsr_save); |
494 __ movl(rax, mxcsr_save); | |
495 __ andl(rax, MXCSR_MASK); | |
496 __ cmp32(rax, mxcsr_std); | |
497 __ jcc(Assembler::equal, ok_ret); | |
498 | |
499 __ warn("MXCSR changed by native JNI code."); | |
500 | |
501 __ ldmxcsr(mxcsr_std); | |
502 | |
503 __ bind(ok_ret); | |
304 | 504 __ addptr(rsp, wordSize); |
505 __ pop(rax); | |
0 | 506 } |
507 | |
508 __ ret(0); | |
509 | |
510 return start; | |
511 } | |
512 | |
513 | |
514 //--------------------------------------------------------------------------- | |
515 // Support for void verify_fpu_cntrl_wrd() | |
516 // | |
517 // This routine is used with -Xcheck:jni to verify that native | |
518 // JNI code does not return to Java code without restoring the | |
519 // FP control word to our expected state. | |
520 | |
521 address generate_verify_fpu_cntrl_wrd() { | |
522 StubCodeMark mark(this, "StubRoutines", "verify_spcw"); | |
523 address start = __ pc(); | |
524 | |
525 const Address fpu_cntrl_wrd_save(rsp, 0); | |
526 | |
527 if (CheckJNICalls) { | |
528 Label ok_ret; | |
304 | 529 __ push(rax); |
530 __ subptr(rsp, wordSize); // allocate a temp location | |
0 | 531 __ fnstcw(fpu_cntrl_wrd_save); |
532 __ movl(rax, fpu_cntrl_wrd_save); | |
533 __ andl(rax, FPU_CNTRL_WRD_MASK); | |
534 ExternalAddress fpu_std(StubRoutines::addr_fpu_cntrl_wrd_std()); | |
535 __ cmp32(rax, fpu_std); | |
536 __ jcc(Assembler::equal, ok_ret); | |
537 | |
538 __ warn("Floating point control word changed by native JNI code."); | |
539 | |
540 __ fldcw(fpu_std); | |
541 | |
542 __ bind(ok_ret); | |
304 | 543 __ addptr(rsp, wordSize); |
544 __ pop(rax); | |
0 | 545 } |
546 | |
547 __ ret(0); | |
548 | |
549 return start; | |
550 } | |
551 | |
552 //--------------------------------------------------------------------------- | |
553 // Wrapper for slow-case handling of double-to-integer conversion | |
554 // d2i or f2i fast case failed either because it is nan or because | |
555 // of under/overflow. | |
556 // Input: FPU TOS: float value | |
557 // Output: rax, (rdx): integer (long) result | |
558 | |
559 address generate_d2i_wrapper(BasicType t, address fcn) { | |
560 StubCodeMark mark(this, "StubRoutines", "d2i_wrapper"); | |
561 address start = __ pc(); | |
562 | |
563 // Capture info about frame layout | |
564 enum layout { FPUState_off = 0, | |
565 rbp_off = FPUStateSizeInWords, | |
566 rdi_off, | |
567 rsi_off, | |
568 rcx_off, | |
569 rbx_off, | |
570 saved_argument_off, | |
571 saved_argument_off2, // 2nd half of double | |
572 framesize | |
573 }; | |
574 | |
575 assert(FPUStateSizeInWords == 27, "update stack layout"); | |
576 | |
577 // Save outgoing argument to stack across push_FPU_state() | |
304 | 578 __ subptr(rsp, wordSize * 2); |
0 | 579 __ fstp_d(Address(rsp, 0)); |
580 | |
581 // Save CPU & FPU state | |
304 | 582 __ push(rbx); |
583 __ push(rcx); | |
584 __ push(rsi); | |
585 __ push(rdi); | |
586 __ push(rbp); | |
0 | 587 __ push_FPU_state(); |
588 | |
589 // push_FPU_state() resets the FP top of stack | |
590 // Load original double into FP top of stack | |
591 __ fld_d(Address(rsp, saved_argument_off * wordSize)); | |
592 // Store double into stack as outgoing argument | |
304 | 593 __ subptr(rsp, wordSize*2); |
0 | 594 __ fst_d(Address(rsp, 0)); |
595 | |
596 // Prepare FPU for doing math in C-land | |
597 __ empty_FPU_stack(); | |
598 // Call the C code to massage the double. Result in EAX | |
599 if (t == T_INT) | |
600 { BLOCK_COMMENT("SharedRuntime::d2i"); } | |
601 else if (t == T_LONG) | |
602 { BLOCK_COMMENT("SharedRuntime::d2l"); } | |
603 __ call_VM_leaf( fcn, 2 ); | |
604 | |
605 // Restore CPU & FPU state | |
606 __ pop_FPU_state(); | |
304 | 607 __ pop(rbp); |
608 __ pop(rdi); | |
609 __ pop(rsi); | |
610 __ pop(rcx); | |
611 __ pop(rbx); | |
612 __ addptr(rsp, wordSize * 2); | |
0 | 613 |
614 __ ret(0); | |
615 | |
616 return start; | |
617 } | |
618 | |
619 | |
620 //--------------------------------------------------------------------------- | |
621 // The following routine generates a subroutine to throw an asynchronous | |
622 // UnknownError when an unsafe access gets a fault that could not be | |
623 // reasonably prevented by the programmer. (Example: SIGBUS/OBJERR.) | |
624 address generate_handler_for_unsafe_access() { | |
625 StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access"); | |
626 address start = __ pc(); | |
627 | |
304 | 628 __ push(0); // hole for return address-to-be |
629 __ pusha(); // push registers | |
0 | 630 Address next_pc(rsp, RegisterImpl::number_of_registers * BytesPerWord); |
631 BLOCK_COMMENT("call handle_unsafe_access"); | |
632 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, handle_unsafe_access))); | |
304 | 633 __ movptr(next_pc, rax); // stuff next address |
634 __ popa(); | |
0 | 635 __ ret(0); // jump to next address |
636 | |
637 return start; | |
638 } | |
639 | |
640 | |
641 //---------------------------------------------------------------------------------------------------- | |
642 // Non-destructive plausibility checks for oops | |
643 | |
644 address generate_verify_oop() { | |
645 StubCodeMark mark(this, "StubRoutines", "verify_oop"); | |
646 address start = __ pc(); | |
647 | |
648 // Incoming arguments on stack after saving rax,: | |
649 // | |
650 // [tos ]: saved rdx | |
651 // [tos + 1]: saved EFLAGS | |
652 // [tos + 2]: return address | |
653 // [tos + 3]: char* error message | |
654 // [tos + 4]: oop object to verify | |
655 // [tos + 5]: saved rax, - saved by caller and bashed | |
656 | |
657 Label exit, error; | |
304 | 658 __ pushf(); |
659 __ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr())); | |
660 __ push(rdx); // save rdx | |
0 | 661 // make sure object is 'reasonable' |
304 | 662 __ movptr(rax, Address(rsp, 4 * wordSize)); // get object |
663 __ testptr(rax, rax); | |
0 | 664 __ jcc(Assembler::zero, exit); // if obj is NULL it is ok |
665 | |
666 // Check if the oop is in the right area of memory | |
667 const int oop_mask = Universe::verify_oop_mask(); | |
668 const int oop_bits = Universe::verify_oop_bits(); | |
304 | 669 __ mov(rdx, rax); |
670 __ andptr(rdx, oop_mask); | |
671 __ cmpptr(rdx, oop_bits); | |
0 | 672 __ jcc(Assembler::notZero, error); |
673 | |
6725
da91efe96a93
6964458: Reimplement class meta-data storage to use native memory
coleenp
parents:
6266
diff
changeset
|
674 // make sure klass is 'reasonable', which is not zero. |
304 | 675 __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass |
676 __ testptr(rax, rax); | |
0 | 677 __ jcc(Assembler::zero, error); // if klass is NULL it is broken |
678 | |
679 // return if everything seems ok | |
680 __ bind(exit); | |
304 | 681 __ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back |
682 __ pop(rdx); // restore rdx | |
683 __ popf(); // restore EFLAGS | |
0 | 684 __ ret(3 * wordSize); // pop arguments |
685 | |
686 // handle errors | |
687 __ bind(error); | |
304 | 688 __ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back |
689 __ pop(rdx); // get saved rdx back | |
690 __ popf(); // get saved EFLAGS off stack -- will be ignored | |
691 __ pusha(); // push registers (eip = return address & msg are already pushed) | |
0 | 692 BLOCK_COMMENT("call MacroAssembler::debug"); |
304 | 693 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); |
694 __ popa(); | |
0 | 695 __ ret(3 * wordSize); // pop arguments |
696 return start; | |
697 } | |
698 | |
699 // | |
700 // Generate pre-barrier for array stores | |
701 // | |
702 // Input: | |
703 // start - starting address | |
845
df6caf649ff7
6700789: G1: Enable use of compressed oops with G1 heaps
ysr
parents:
710
diff
changeset
|
704 // count - element count |
2324 | 705 void gen_write_ref_array_pre_barrier(Register start, Register count, bool uninitialized_target) { |
0 | 706 assert_different_registers(start, count); |
707 BarrierSet* bs = Universe::heap()->barrier_set(); | |
708 switch (bs->kind()) { | |
709 case BarrierSet::G1SATBCT: | |
710 case BarrierSet::G1SATBCTLogging: | |
2324 | 711 // With G1, don't generate the call if we statically know that the target in uninitialized |
712 if (!uninitialized_target) { | |
713 __ pusha(); // push registers | |
714 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), | |
715 start, count); | |
716 __ popa(); | |
717 } | |
0 | 718 break; |
719 case BarrierSet::CardTableModRef: | |
720 case BarrierSet::CardTableExtension: | |
721 case BarrierSet::ModRef: | |
722 break; | |
723 default : | |
724 ShouldNotReachHere(); | |
725 | |
726 } | |
727 } | |
728 | |
729 | |
730 // | |
731 // Generate a post-barrier for an array store | |
732 // | |
733 // start - starting address | |
734 // count - element count | |
735 // | |
736 // The two input registers are overwritten. | |
737 // | |
738 void gen_write_ref_array_post_barrier(Register start, Register count) { | |
739 BarrierSet* bs = Universe::heap()->barrier_set(); | |
740 assert_different_registers(start, count); | |
741 switch (bs->kind()) { | |
742 case BarrierSet::G1SATBCT: | |
743 case BarrierSet::G1SATBCTLogging: | |
744 { | |
304 | 745 __ pusha(); // push registers |
1192
776fb94f33cc
6918006: G1: spill space must be reserved on the stack for barrier calls on Windows x64
apetrusenko
parents:
1174
diff
changeset
|
746 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), |
776fb94f33cc
6918006: G1: spill space must be reserved on the stack for barrier calls on Windows x64
apetrusenko
parents:
1174
diff
changeset
|
747 start, count); |
304 | 748 __ popa(); |
0 | 749 } |
750 break; | |
751 | |
752 case BarrierSet::CardTableModRef: | |
753 case BarrierSet::CardTableExtension: | |
754 { | |
755 CardTableModRefBS* ct = (CardTableModRefBS*)bs; | |
756 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); | |
757 | |
758 Label L_loop; | |
759 const Register end = count; // elements count; end == start+count-1 | |
760 assert_different_registers(start, end); | |
761 | |
304 | 762 __ lea(end, Address(start, count, Address::times_ptr, -wordSize)); |
763 __ shrptr(start, CardTableModRefBS::card_shift); | |
764 __ shrptr(end, CardTableModRefBS::card_shift); | |
765 __ subptr(end, start); // end --> count | |
0 | 766 __ BIND(L_loop); |
249
910a4cb98e9e
6717457: Internal Error (src/share/vm/code/relocInfo.hpp:1089)
never
parents:
19
diff
changeset
|
767 intptr_t disp = (intptr_t) ct->byte_map_base; |
910a4cb98e9e
6717457: Internal Error (src/share/vm/code/relocInfo.hpp:1089)
never
parents:
19
diff
changeset
|
768 Address cardtable(start, count, Address::times_1, disp); |
910a4cb98e9e
6717457: Internal Error (src/share/vm/code/relocInfo.hpp:1089)
never
parents:
19
diff
changeset
|
769 __ movb(cardtable, 0); |
0 | 770 __ decrement(count); |
771 __ jcc(Assembler::greaterEqual, L_loop); | |
772 } | |
773 break; | |
774 case BarrierSet::ModRef: | |
775 break; | |
776 default : | |
777 ShouldNotReachHere(); | |
778 | |
779 } | |
780 } | |
781 | |
405 | 782 |
783 // Copy 64 bytes chunks | |
784 // | |
785 // Inputs: | |
786 // from - source array address | |
787 // to_from - destination array address - from | |
788 // qword_count - 8-bytes element count, negative | |
789 // | |
790 void xmm_copy_forward(Register from, Register to_from, Register qword_count) { | |
791 assert( UseSSE >= 2, "supported cpu only" ); | |
792 Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit; | |
793 // Copy 64-byte chunks | |
794 __ jmpb(L_copy_64_bytes); | |
1365 | 795 __ align(OptoLoopAlignment); |
405 | 796 __ BIND(L_copy_64_bytes_loop); |
797 | |
7475
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7427
diff
changeset
|
798 if (UseUnalignedLoadStores) { |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7427
diff
changeset
|
799 if (UseAVX >= 2) { |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7427
diff
changeset
|
800 __ vmovdqu(xmm0, Address(from, 0)); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7427
diff
changeset
|
801 __ vmovdqu(Address(from, to_from, Address::times_1, 0), xmm0); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7427
diff
changeset
|
802 __ vmovdqu(xmm1, Address(from, 32)); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7427
diff
changeset
|
803 __ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7427
diff
changeset
|
804 } else { |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7427
diff
changeset
|
805 __ movdqu(xmm0, Address(from, 0)); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7427
diff
changeset
|
806 __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7427
diff
changeset
|
807 __ movdqu(xmm1, Address(from, 16)); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7427
diff
changeset
|
808 __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7427
diff
changeset
|
809 __ movdqu(xmm2, Address(from, 32)); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7427
diff
changeset
|
810 __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7427
diff
changeset
|
811 __ movdqu(xmm3, Address(from, 48)); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7427
diff
changeset
|
812 __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3); |
e2e6bf86682c
8005544: Use 256bit YMM registers in arraycopy stubs on x86
kvn
parents:
7427
diff
changeset
|
813 } |
405 | 814 } else { |
815 __ movq(xmm0, Address(from, 0)); | |
816 __ movq(Address(from, to_from, Address::times_1, 0), xmm0); | |
817 __ movq(xmm1, Address(from, 8)); | |
818 __ movq(Address(from, to_from, Address::times_1, 8), xmm1); | |
819 __ movq(xmm2, Address(from, 16)); | |
820 __ movq(Address(from, to_from, Address::times_1, 16), xmm2); | |
821 __ movq(xmm3, Address(from, 24)); | |
822 __ movq(Address(from, to_from, Address::times_1, 24), xmm3); | |
823 __ movq(xmm4, Address(from, 32)); | |
824 __ movq(Address(from, to_from, Address::times_1, 32), xmm4); | |
825 __ movq(xmm5, Address(from, 40)); | |
826 __ movq(Address(from, to_from, Address::times_1, 40), xmm5); | |
827 __ movq(xmm6, Address(from, 48)); | |
828 __ movq(Address(from, to_from, Address::times_1, 48), xmm6); | |
829 __ movq(xmm7, Address(from, 56)); | |
830 __ movq(Address(from, to_from, Address::times_1, 56), xmm7); | |
831 } | |
832 | |
833 __ addl(from, 64); | |
834 __ BIND(L_copy_64_bytes); | |
835 __ subl(qword_count, 8); | |
836 __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop); | |
8873
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
7475
diff
changeset
|
837 |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
7475
diff
changeset
|
838 if (UseUnalignedLoadStores && (UseAVX >= 2)) { |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
7475
diff
changeset
|
839 // clean upper bits of YMM registers |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
7475
diff
changeset
|
840 __ vzeroupper(); |
e961c11b85fe
8011102: Clear AVX registers after return from JNI call
kvn
parents:
7475
diff
changeset
|
841 } |
405 | 842 __ addl(qword_count, 8); |
843 __ jccb(Assembler::zero, L_exit); | |
844 // | |
845 // length is too short, just copy qwords | |
846 // | |
847 __ BIND(L_copy_8_bytes); | |
848 __ movq(xmm0, Address(from, 0)); | |
849 __ movq(Address(from, to_from, Address::times_1), xmm0); | |
850 __ addl(from, 8); | |
851 __ decrement(qword_count); | |
852 __ jcc(Assembler::greater, L_copy_8_bytes); | |
853 __ BIND(L_exit); | |
854 } | |
855 | |
0 | 856 // Copy 64 bytes chunks |
857 // | |
858 // Inputs: | |
859 // from - source array address | |
860 // to_from - destination array address - from | |
861 // qword_count - 8-bytes element count, negative | |
862 // | |
863 void mmx_copy_forward(Register from, Register to_from, Register qword_count) { | |
405 | 864 assert( VM_Version::supports_mmx(), "supported cpu only" ); |
0 | 865 Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit; |
866 // Copy 64-byte chunks | |
867 __ jmpb(L_copy_64_bytes); | |
1365 | 868 __ align(OptoLoopAlignment); |
0 | 869 __ BIND(L_copy_64_bytes_loop); |
870 __ movq(mmx0, Address(from, 0)); | |
871 __ movq(mmx1, Address(from, 8)); | |
872 __ movq(mmx2, Address(from, 16)); | |
873 __ movq(Address(from, to_from, Address::times_1, 0), mmx0); | |
874 __ movq(mmx3, Address(from, 24)); | |
875 __ movq(Address(from, to_from, Address::times_1, 8), mmx1); | |
876 __ movq(mmx4, Address(from, 32)); | |
877 __ movq(Address(from, to_from, Address::times_1, 16), mmx2); | |
878 __ movq(mmx5, Address(from, 40)); | |
879 __ movq(Address(from, to_from, Address::times_1, 24), mmx3); | |
880 __ movq(mmx6, Address(from, 48)); | |
881 __ movq(Address(from, to_from, Address::times_1, 32), mmx4); | |
882 __ movq(mmx7, Address(from, 56)); | |
883 __ movq(Address(from, to_from, Address::times_1, 40), mmx5); | |
884 __ movq(Address(from, to_from, Address::times_1, 48), mmx6); | |
885 __ movq(Address(from, to_from, Address::times_1, 56), mmx7); | |
304 | 886 __ addptr(from, 64); |
0 | 887 __ BIND(L_copy_64_bytes); |
888 __ subl(qword_count, 8); | |
889 __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop); | |
890 __ addl(qword_count, 8); | |
891 __ jccb(Assembler::zero, L_exit); | |
892 // | |
893 // length is too short, just copy qwords | |
894 // | |
895 __ BIND(L_copy_8_bytes); | |
896 __ movq(mmx0, Address(from, 0)); | |
897 __ movq(Address(from, to_from, Address::times_1), mmx0); | |
304 | 898 __ addptr(from, 8); |
0 | 899 __ decrement(qword_count); |
900 __ jcc(Assembler::greater, L_copy_8_bytes); | |
901 __ BIND(L_exit); | |
902 __ emms(); | |
903 } | |
904 | |
905 address generate_disjoint_copy(BasicType t, bool aligned, | |
906 Address::ScaleFactor sf, | |
2324 | 907 address* entry, const char *name, |
908 bool dest_uninitialized = false) { | |
0 | 909 __ align(CodeEntryAlignment); |
910 StubCodeMark mark(this, "StubRoutines", name); | |
911 address start = __ pc(); | |
912 | |
913 Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte; | |
914 Label L_copy_2_bytes, L_copy_4_bytes, L_copy_64_bytes; | |
915 | |
304 | 916 int shift = Address::times_ptr - sf; |
0 | 917 |
918 const Register from = rsi; // source array address | |
919 const Register to = rdi; // destination array address | |
920 const Register count = rcx; // elements count | |
921 const Register to_from = to; // (to - from) | |
922 const Register saved_to = rdx; // saved destination array address | |
923 | |
924 __ enter(); // required for proper stackwalking of RuntimeStub frame | |
304 | 925 __ push(rsi); |
926 __ push(rdi); | |
927 __ movptr(from , Address(rsp, 12+ 4)); | |
928 __ movptr(to , Address(rsp, 12+ 8)); | |
0 | 929 __ movl(count, Address(rsp, 12+ 12)); |
2313
d89a22843c62
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
2245
diff
changeset
|
930 |
d89a22843c62
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
2245
diff
changeset
|
931 if (entry != NULL) { |
d89a22843c62
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
2245
diff
changeset
|
932 *entry = __ pc(); // Entry point from conjoint arraycopy stub. |
d89a22843c62
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
2245
diff
changeset
|
933 BLOCK_COMMENT("Entry:"); |
d89a22843c62
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
2245
diff
changeset
|
934 } |
d89a22843c62
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
2245
diff
changeset
|
935 |
0 | 936 if (t == T_OBJECT) { |
937 __ testl(count, count); | |
938 __ jcc(Assembler::zero, L_0_count); | |
2324 | 939 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized); |
304 | 940 __ mov(saved_to, to); // save 'to' |
0 | 941 } |
942 | |
304 | 943 __ subptr(to, from); // to --> to_from |
0 | 944 __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element |
945 __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp | |
405 | 946 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { |
0 | 947 // align source address at 4 bytes address boundary |
948 if (t == T_BYTE) { | |
949 // One byte misalignment happens only for byte arrays | |
950 __ testl(from, 1); | |
951 __ jccb(Assembler::zero, L_skip_align1); | |
952 __ movb(rax, Address(from, 0)); | |
953 __ movb(Address(from, to_from, Address::times_1, 0), rax); | |
954 __ increment(from); | |
955 __ decrement(count); | |
956 __ BIND(L_skip_align1); | |
957 } | |
958 // Two bytes misalignment happens only for byte and short (char) arrays | |
959 __ testl(from, 2); | |
960 __ jccb(Assembler::zero, L_skip_align2); | |
961 __ movw(rax, Address(from, 0)); | |
962 __ movw(Address(from, to_from, Address::times_1, 0), rax); | |
304 | 963 __ addptr(from, 2); |
0 | 964 __ subl(count, 1<<(shift-1)); |
965 __ BIND(L_skip_align2); | |
966 } | |
967 if (!VM_Version::supports_mmx()) { | |
304 | 968 __ mov(rax, count); // save 'count' |
969 __ shrl(count, shift); // bytes count | |
970 __ addptr(to_from, from);// restore 'to' | |
971 __ rep_mov(); | |
972 __ subptr(to_from, from);// restore 'to_from' | |
973 __ mov(count, rax); // restore 'count' | |
0 | 974 __ jmpb(L_copy_2_bytes); // all dwords were copied |
975 } else { | |
405 | 976 if (!UseUnalignedLoadStores) { |
977 // align to 8 bytes, we know we are 4 byte aligned to start | |
978 __ testptr(from, 4); | |
979 __ jccb(Assembler::zero, L_copy_64_bytes); | |
980 __ movl(rax, Address(from, 0)); | |
981 __ movl(Address(from, to_from, Address::times_1, 0), rax); | |
982 __ addptr(from, 4); | |
983 __ subl(count, 1<<shift); | |
984 } | |
0 | 985 __ BIND(L_copy_64_bytes); |
304 | 986 __ mov(rax, count); |
0 | 987 __ shrl(rax, shift+1); // 8 bytes chunk count |
988 // | |
989 // Copy 8-byte chunks through MMX registers, 8 per iteration of the loop | |
990 // | |
405 | 991 if (UseXMMForArrayCopy) { |
992 xmm_copy_forward(from, to_from, rax); | |
993 } else { | |
994 mmx_copy_forward(from, to_from, rax); | |
995 } | |
0 | 996 } |
997 // copy tailing dword | |
998 __ BIND(L_copy_4_bytes); | |
999 __ testl(count, 1<<shift); | |
1000 __ jccb(Assembler::zero, L_copy_2_bytes); | |
1001 __ movl(rax, Address(from, 0)); | |
1002 __ movl(Address(from, to_from, Address::times_1, 0), rax); | |
1003 if (t == T_BYTE || t == T_SHORT) { | |
304 | 1004 __ addptr(from, 4); |
0 | 1005 __ BIND(L_copy_2_bytes); |
1006 // copy tailing word | |
1007 __ testl(count, 1<<(shift-1)); | |
1008 __ jccb(Assembler::zero, L_copy_byte); | |
1009 __ movw(rax, Address(from, 0)); | |
1010 __ movw(Address(from, to_from, Address::times_1, 0), rax); | |
1011 if (t == T_BYTE) { | |
304 | 1012 __ addptr(from, 2); |
0 | 1013 __ BIND(L_copy_byte); |
1014 // copy tailing byte | |
1015 __ testl(count, 1); | |
1016 __ jccb(Assembler::zero, L_exit); | |
1017 __ movb(rax, Address(from, 0)); | |
1018 __ movb(Address(from, to_from, Address::times_1, 0), rax); | |
1019 __ BIND(L_exit); | |
1020 } else { | |
1021 __ BIND(L_copy_byte); | |
1022 } | |
1023 } else { | |
1024 __ BIND(L_copy_2_bytes); | |
1025 } | |
1026 | |
1027 if (t == T_OBJECT) { | |
1028 __ movl(count, Address(rsp, 12+12)); // reread 'count' | |
304 | 1029 __ mov(to, saved_to); // restore 'to' |
0 | 1030 gen_write_ref_array_post_barrier(to, count); |
1031 __ BIND(L_0_count); | |
1032 } | |
1033 inc_copy_counter_np(t); | |
304 | 1034 __ pop(rdi); |
1035 __ pop(rsi); | |
0 | 1036 __ leave(); // required for proper stackwalking of RuntimeStub frame |
304 | 1037 __ xorptr(rax, rax); // return 0 |
0 | 1038 __ ret(0); |
1039 return start; | |
1040 } | |
1041 | |
1042 | |
1763 | 1043 address generate_fill(BasicType t, bool aligned, const char *name) { |
1044 __ align(CodeEntryAlignment); | |
1045 StubCodeMark mark(this, "StubRoutines", name); | |
1046 address start = __ pc(); | |
1047 | |
1048 BLOCK_COMMENT("Entry:"); | |
1049 | |
1050 const Register to = rdi; // source array address | |
1051 const Register value = rdx; // value | |
1052 const Register count = rsi; // elements count | |
1053 | |
1054 __ enter(); // required for proper stackwalking of RuntimeStub frame | |
1055 __ push(rsi); | |
1056 __ push(rdi); | |
1057 __ movptr(to , Address(rsp, 12+ 4)); | |
1058 __ movl(value, Address(rsp, 12+ 8)); | |
1059 __ movl(count, Address(rsp, 12+ 12)); | |
1060 | |
1061 __ generate_fill(t, aligned, to, value, count, rax, xmm0); | |
1062 | |
1063 __ pop(rdi); | |
1064 __ pop(rsi); | |
1065 __ leave(); // required for proper stackwalking of RuntimeStub frame | |
1066 __ ret(0); | |
1067 return start; | |
1068 } | |
1069 | |
0 | 1070 address generate_conjoint_copy(BasicType t, bool aligned, |
1071 Address::ScaleFactor sf, | |
1072 address nooverlap_target, | |
2324 | 1073 address* entry, const char *name, |
1074 bool dest_uninitialized = false) { | |
0 | 1075 __ align(CodeEntryAlignment); |
1076 StubCodeMark mark(this, "StubRoutines", name); | |
1077 address start = __ pc(); | |
1078 | |
1079 Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte; | |
1080 Label L_copy_2_bytes, L_copy_4_bytes, L_copy_8_bytes, L_copy_8_bytes_loop; | |
1081 | |
304 | 1082 int shift = Address::times_ptr - sf; |
0 | 1083 |
1084 const Register src = rax; // source array address | |
1085 const Register dst = rdx; // destination array address | |
1086 const Register from = rsi; // source array address | |
1087 const Register to = rdi; // destination array address | |
1088 const Register count = rcx; // elements count | |
1089 const Register end = rax; // array end address | |
1090 | |
1091 __ enter(); // required for proper stackwalking of RuntimeStub frame | |
304 | 1092 __ push(rsi); |
1093 __ push(rdi); | |
1094 __ movptr(src , Address(rsp, 12+ 4)); // from | |
1095 __ movptr(dst , Address(rsp, 12+ 8)); // to | |
1096 __ movl2ptr(count, Address(rsp, 12+12)); // count | |
0 | 1097 |
1098 if (entry != NULL) { | |
1099 *entry = __ pc(); // Entry point from generic arraycopy stub. | |
1100 BLOCK_COMMENT("Entry:"); | |
1101 } | |
1102 | |
2313
d89a22843c62
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
2245
diff
changeset
|
1103 // nooverlap_target expects arguments in rsi and rdi. |
304 | 1104 __ mov(from, src); |
1105 __ mov(to , dst); | |
0 | 1106 |
2313
d89a22843c62
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
2245
diff
changeset
|
1107 // arrays overlap test: dispatch to disjoint stub if necessary. |
0 | 1108 RuntimeAddress nooverlap(nooverlap_target); |
304 | 1109 __ cmpptr(dst, src); |
1110 __ lea(end, Address(src, count, sf, 0)); // src + count * elem_size | |
0 | 1111 __ jump_cc(Assembler::belowEqual, nooverlap); |
304 | 1112 __ cmpptr(dst, end); |
0 | 1113 __ jump_cc(Assembler::aboveEqual, nooverlap); |
1114 | |
2313
d89a22843c62
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
2245
diff
changeset
|
1115 if (t == T_OBJECT) { |
d89a22843c62
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
2245
diff
changeset
|
1116 __ testl(count, count); |
d89a22843c62
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
2245
diff
changeset
|
1117 __ jcc(Assembler::zero, L_0_count); |
2324 | 1118 gen_write_ref_array_pre_barrier(dst, count, dest_uninitialized); |
2313
d89a22843c62
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
2245
diff
changeset
|
1119 } |
d89a22843c62
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
2245
diff
changeset
|
1120 |
0 | 1121 // copy from high to low |
1122 __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element | |
1123 __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp | |
1124 if (t == T_BYTE || t == T_SHORT) { | |
1125 // Align the end of destination array at 4 bytes address boundary | |
304 | 1126 __ lea(end, Address(dst, count, sf, 0)); |
0 | 1127 if (t == T_BYTE) { |
1128 // One byte misalignment happens only for byte arrays | |
1129 __ testl(end, 1); | |
1130 __ jccb(Assembler::zero, L_skip_align1); | |
1131 __ decrement(count); | |
1132 __ movb(rdx, Address(from, count, sf, 0)); | |
1133 __ movb(Address(to, count, sf, 0), rdx); | |
1134 __ BIND(L_skip_align1); | |
1135 } | |
1136 // Two bytes misalignment happens only for byte and short (char) arrays | |
1137 __ testl(end, 2); | |
1138 __ jccb(Assembler::zero, L_skip_align2); | |
304 | 1139 __ subptr(count, 1<<(shift-1)); |
0 | 1140 __ movw(rdx, Address(from, count, sf, 0)); |
1141 __ movw(Address(to, count, sf, 0), rdx); | |
1142 __ BIND(L_skip_align2); | |
1143 __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element | |
1144 __ jcc(Assembler::below, L_copy_4_bytes); | |
1145 } | |
1146 | |
1147 if (!VM_Version::supports_mmx()) { | |
1148 __ std(); | |
304 | 1149 __ mov(rax, count); // Save 'count' |
1150 __ mov(rdx, to); // Save 'to' | |
1151 __ lea(rsi, Address(from, count, sf, -4)); | |
1152 __ lea(rdi, Address(to , count, sf, -4)); | |
1153 __ shrptr(count, shift); // bytes count | |
1154 __ rep_mov(); | |
0 | 1155 __ cld(); |
304 | 1156 __ mov(count, rax); // restore 'count' |
0 | 1157 __ andl(count, (1<<shift)-1); // mask the number of rest elements |
304 | 1158 __ movptr(from, Address(rsp, 12+4)); // reread 'from' |
1159 __ mov(to, rdx); // restore 'to' | |
0 | 1160 __ jmpb(L_copy_2_bytes); // all dword were copied |
1161 } else { | |
1162 // Align to 8 bytes the end of array. It is aligned to 4 bytes already. | |
304 | 1163 __ testptr(end, 4); |
0 | 1164 __ jccb(Assembler::zero, L_copy_8_bytes); |
1165 __ subl(count, 1<<shift); | |
1166 __ movl(rdx, Address(from, count, sf, 0)); | |
1167 __ movl(Address(to, count, sf, 0), rdx); | |
1168 __ jmpb(L_copy_8_bytes); | |
1169 | |
1365 | 1170 __ align(OptoLoopAlignment); |
0 | 1171 // Move 8 bytes |
1172 __ BIND(L_copy_8_bytes_loop); | |
405 | 1173 if (UseXMMForArrayCopy) { |
1174 __ movq(xmm0, Address(from, count, sf, 0)); | |
1175 __ movq(Address(to, count, sf, 0), xmm0); | |
1176 } else { | |
1177 __ movq(mmx0, Address(from, count, sf, 0)); | |
1178 __ movq(Address(to, count, sf, 0), mmx0); | |
1179 } | |
0 | 1180 __ BIND(L_copy_8_bytes); |
1181 __ subl(count, 2<<shift); | |
1182 __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); | |
1183 __ addl(count, 2<<shift); | |
405 | 1184 if (!UseXMMForArrayCopy) { |
1185 __ emms(); | |
1186 } | |
0 | 1187 } |
1188 __ BIND(L_copy_4_bytes); | |
1189 // copy prefix qword | |
1190 __ testl(count, 1<<shift); | |
1191 __ jccb(Assembler::zero, L_copy_2_bytes); | |
1192 __ movl(rdx, Address(from, count, sf, -4)); | |
1193 __ movl(Address(to, count, sf, -4), rdx); | |
1194 | |
1195 if (t == T_BYTE || t == T_SHORT) { | |
1196 __ subl(count, (1<<shift)); | |
1197 __ BIND(L_copy_2_bytes); | |
1198 // copy prefix dword | |
1199 __ testl(count, 1<<(shift-1)); | |
1200 __ jccb(Assembler::zero, L_copy_byte); | |
1201 __ movw(rdx, Address(from, count, sf, -2)); | |
1202 __ movw(Address(to, count, sf, -2), rdx); | |
1203 if (t == T_BYTE) { | |
1204 __ subl(count, 1<<(shift-1)); | |
1205 __ BIND(L_copy_byte); | |
1206 // copy prefix byte | |
1207 __ testl(count, 1); | |
1208 __ jccb(Assembler::zero, L_exit); | |
1209 __ movb(rdx, Address(from, 0)); | |
1210 __ movb(Address(to, 0), rdx); | |
1211 __ BIND(L_exit); | |
1212 } else { | |
1213 __ BIND(L_copy_byte); | |
1214 } | |
1215 } else { | |
1216 __ BIND(L_copy_2_bytes); | |
1217 } | |
1218 if (t == T_OBJECT) { | |
304 | 1219 __ movl2ptr(count, Address(rsp, 12+12)); // reread count |
0 | 1220 gen_write_ref_array_post_barrier(to, count); |
1221 __ BIND(L_0_count); | |
1222 } | |
1223 inc_copy_counter_np(t); | |
304 | 1224 __ pop(rdi); |
1225 __ pop(rsi); | |
0 | 1226 __ leave(); // required for proper stackwalking of RuntimeStub frame |
304 | 1227 __ xorptr(rax, rax); // return 0 |
0 | 1228 __ ret(0); |
1229 return start; | |
1230 } | |
1231 | |
1232 | |
1233 address generate_disjoint_long_copy(address* entry, const char *name) { | |
1234 __ align(CodeEntryAlignment); | |
1235 StubCodeMark mark(this, "StubRoutines", name); | |
1236 address start = __ pc(); | |
1237 | |
1238 Label L_copy_8_bytes, L_copy_8_bytes_loop; | |
1239 const Register from = rax; // source array address | |
1240 const Register to = rdx; // destination array address | |
1241 const Register count = rcx; // elements count | |
1242 const Register to_from = rdx; // (to - from) | |
1243 | |
1244 __ enter(); // required for proper stackwalking of RuntimeStub frame | |
304 | 1245 __ movptr(from , Address(rsp, 8+0)); // from |
1246 __ movptr(to , Address(rsp, 8+4)); // to | |
1247 __ movl2ptr(count, Address(rsp, 8+8)); // count | |
0 | 1248 |
1249 *entry = __ pc(); // Entry point from conjoint arraycopy stub. | |
1250 BLOCK_COMMENT("Entry:"); | |
1251 | |
304 | 1252 __ subptr(to, from); // to --> to_from |
0 | 1253 if (VM_Version::supports_mmx()) { |
405 | 1254 if (UseXMMForArrayCopy) { |
1255 xmm_copy_forward(from, to_from, count); | |
1256 } else { | |
1257 mmx_copy_forward(from, to_from, count); | |
1258 } | |
0 | 1259 } else { |
1260 __ jmpb(L_copy_8_bytes); | |
1365 | 1261 __ align(OptoLoopAlignment); |
0 | 1262 __ BIND(L_copy_8_bytes_loop); |
1263 __ fild_d(Address(from, 0)); | |
1264 __ fistp_d(Address(from, to_from, Address::times_1)); | |
304 | 1265 __ addptr(from, 8); |
0 | 1266 __ BIND(L_copy_8_bytes); |
1267 __ decrement(count); | |
1268 __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); | |
1269 } | |
1270 inc_copy_counter_np(T_LONG); | |
1271 __ leave(); // required for proper stackwalking of RuntimeStub frame | |
304 | 1272 __ xorptr(rax, rax); // return 0 |
0 | 1273 __ ret(0); |
1274 return start; | |
1275 } | |
1276 | |
1277 address generate_conjoint_long_copy(address nooverlap_target, | |
1278 address* entry, const char *name) { | |
1279 __ align(CodeEntryAlignment); | |
1280 StubCodeMark mark(this, "StubRoutines", name); | |
1281 address start = __ pc(); | |
1282 | |
1283 Label L_copy_8_bytes, L_copy_8_bytes_loop; | |
1284 const Register from = rax; // source array address | |
1285 const Register to = rdx; // destination array address | |
1286 const Register count = rcx; // elements count | |
1287 const Register end_from = rax; // source array end address | |
1288 | |
1289 __ enter(); // required for proper stackwalking of RuntimeStub frame | |
304 | 1290 __ movptr(from , Address(rsp, 8+0)); // from |
1291 __ movptr(to , Address(rsp, 8+4)); // to | |
1292 __ movl2ptr(count, Address(rsp, 8+8)); // count | |
0 | 1293 |
1294 *entry = __ pc(); // Entry point from generic arraycopy stub. | |
1295 BLOCK_COMMENT("Entry:"); | |
1296 | |
1297 // arrays overlap test | |
304 | 1298 __ cmpptr(to, from); |
0 | 1299 RuntimeAddress nooverlap(nooverlap_target); |
1300 __ jump_cc(Assembler::belowEqual, nooverlap); | |
304 | 1301 __ lea(end_from, Address(from, count, Address::times_8, 0)); |
1302 __ cmpptr(to, end_from); | |
1303 __ movptr(from, Address(rsp, 8)); // from | |
0 | 1304 __ jump_cc(Assembler::aboveEqual, nooverlap); |
1305 | |
1306 __ jmpb(L_copy_8_bytes); | |
1307 | |
1365 | 1308 __ align(OptoLoopAlignment); |
0 | 1309 __ BIND(L_copy_8_bytes_loop); |
1310 if (VM_Version::supports_mmx()) { | |
405 | 1311 if (UseXMMForArrayCopy) { |
1312 __ movq(xmm0, Address(from, count, Address::times_8)); | |
1313 __ movq(Address(to, count, Address::times_8), xmm0); | |
1314 } else { | |
1315 __ movq(mmx0, Address(from, count, Address::times_8)); | |
1316 __ movq(Address(to, count, Address::times_8), mmx0); | |
1317 } | |
0 | 1318 } else { |
1319 __ fild_d(Address(from, count, Address::times_8)); | |
1320 __ fistp_d(Address(to, count, Address::times_8)); | |
1321 } | |
1322 __ BIND(L_copy_8_bytes); | |
1323 __ decrement(count); | |
1324 __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); | |
1325 | |
405 | 1326 if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) { |
0 | 1327 __ emms(); |
1328 } | |
1329 inc_copy_counter_np(T_LONG); | |
1330 __ leave(); // required for proper stackwalking of RuntimeStub frame | |
304 | 1331 __ xorptr(rax, rax); // return 0 |
0 | 1332 __ ret(0); |
1333 return start; | |
1334 } | |
1335 | |
1336 | |
1337 // Helper for generating a dynamic type check. | |
1338 // The sub_klass must be one of {rbx, rdx, rsi}. | |
1339 // The temp is killed. | |
1340 void generate_type_check(Register sub_klass, | |
1341 Address& super_check_offset_addr, | |
1342 Address& super_klass_addr, | |
1343 Register temp, | |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1344 Label* L_success, Label* L_failure) { |
0 | 1345 BLOCK_COMMENT("type_check:"); |
1346 | |
1347 Label L_fallthrough; | |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1348 #define LOCAL_JCC(assembler_con, label_ptr) \ |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1349 if (label_ptr != NULL) __ jcc(assembler_con, *(label_ptr)); \ |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1350 else __ jcc(assembler_con, L_fallthrough) /*omit semi*/ |
0 | 1351 |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1352 // The following is a strange variation of the fast path which requires |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1353 // one less register, because needed values are on the argument stack. |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1354 // __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp, |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1355 // L_success, L_failure, NULL); |
0 | 1356 assert_different_registers(sub_klass, temp); |
1357 | |
4762
069ab3f976d3
7118863: Move sizeof(klassOopDesc) into the *Klass::*_offset_in_bytes() functions
stefank
parents:
3960
diff
changeset
|
1358 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); |
0 | 1359 |
1360 // if the pointers are equal, we are done (e.g., String[] elements) | |
304 | 1361 __ cmpptr(sub_klass, super_klass_addr); |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1362 LOCAL_JCC(Assembler::equal, L_success); |
0 | 1363 |
1364 // check the supertype display: | |
304 | 1365 __ movl2ptr(temp, super_check_offset_addr); |
0 | 1366 Address super_check_addr(sub_klass, temp, Address::times_1, 0); |
304 | 1367 __ movptr(temp, super_check_addr); // load displayed supertype |
1368 __ cmpptr(temp, super_klass_addr); // test the super type | |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1369 LOCAL_JCC(Assembler::equal, L_success); |
0 | 1370 |
1371 // if it was a primary super, we can just fail immediately | |
1372 __ cmpl(super_check_offset_addr, sc_offset); | |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1373 LOCAL_JCC(Assembler::notEqual, L_failure); |
0 | 1374 |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1375 // The repne_scan instruction uses fixed registers, which will get spilled. |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1376 // We happen to know this works best when super_klass is in rax. |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1377 Register super_klass = temp; |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1378 __ movptr(super_klass, super_klass_addr); |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1379 __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1380 L_success, L_failure); |
0 | 1381 |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1382 __ bind(L_fallthrough); |
0 | 1383 |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1384 if (L_success == NULL) { BLOCK_COMMENT("L_success:"); } |
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1385 if (L_failure == NULL) { BLOCK_COMMENT("L_failure:"); } |
0 | 1386 |
644
c517646eef23
6813212: factor duplicated assembly code for general subclass check (for 6655638)
jrose
parents:
533
diff
changeset
|
1387 #undef LOCAL_JCC |
0 | 1388 } |
1389 | |
1390 // | |
1391 // Generate checkcasting array copy stub | |
1392 // | |
1393 // Input: | |
1394 // 4(rsp) - source array address | |
1395 // 8(rsp) - destination array address | |
1396 // 12(rsp) - element count, can be zero | |
1397 // 16(rsp) - size_t ckoff (super_check_offset) | |
1398 // 20(rsp) - oop ckval (super_klass) | |
1399 // | |
1400 // Output: | |
1401 // rax, == 0 - success | |
1402 // rax, == -1^K - failure, where K is partial transfer count | |
1403 // | |
2324 | 1404 address generate_checkcast_copy(const char *name, address* entry, bool dest_uninitialized = false) { |
0 | 1405 __ align(CodeEntryAlignment); |
1406 StubCodeMark mark(this, "StubRoutines", name); | |
1407 address start = __ pc(); | |
1408 | |
1409 Label L_load_element, L_store_element, L_do_card_marks, L_done; | |
1410 | |
1411 // register use: | |
1412 // rax, rdx, rcx -- loop control (end_from, end_to, count) | |
1413 // rdi, rsi -- element access (oop, klass) | |
1414 // rbx, -- temp | |
1415 const Register from = rax; // source array address | |
1416 const Register to = rdx; // destination array address | |
1417 const Register length = rcx; // elements count | |
1418 const Register elem = rdi; // each oop copied | |
1419 const Register elem_klass = rsi; // each elem._klass (sub_klass) | |
1420 const Register temp = rbx; // lone remaining temp | |
1421 | |
1422 __ enter(); // required for proper stackwalking of RuntimeStub frame | |
1423 | |
304 | 1424 __ push(rsi); |
1425 __ push(rdi); | |
1426 __ push(rbx); | |
0 | 1427 |
1428 Address from_arg(rsp, 16+ 4); // from | |
1429 Address to_arg(rsp, 16+ 8); // to | |
1430 Address length_arg(rsp, 16+12); // elements count | |
1431 Address ckoff_arg(rsp, 16+16); // super_check_offset | |
1432 Address ckval_arg(rsp, 16+20); // super_klass | |
1433 | |
1434 // Load up: | |
304 | 1435 __ movptr(from, from_arg); |
1436 __ movptr(to, to_arg); | |
1437 __ movl2ptr(length, length_arg); | |
0 | 1438 |
2313
d89a22843c62
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
2245
diff
changeset
|
1439 if (entry != NULL) { |
d89a22843c62
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
2245
diff
changeset
|
1440 *entry = __ pc(); // Entry point from generic arraycopy stub. |
d89a22843c62
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
2245
diff
changeset
|
1441 BLOCK_COMMENT("Entry:"); |
d89a22843c62
7020521: arraycopy stubs place prebarriers incorrectly
iveresov
parents:
2245
diff
changeset
|
1442 } |
0 | 1443 |
1444 //--------------------------------------------------------------- | |
1445 // Assembler stub will be used for this call to arraycopy | |
1446 // if the two arrays are subtypes of Object[] but the | |
1447 // destination array type is not equal to or a supertype | |
1448 // of the source type. Each element must be separately | |
1449 // checked. | |
1450 | |
1451 // Loop-invariant addresses. They are exclusive end pointers. | |
304 | 1452 Address end_from_addr(from, length, Address::times_ptr, 0); |
1453 Address end_to_addr(to, length, Address::times_ptr, 0); | |
0 | 1454 |
1455 Register end_from = from; // re-use | |
1456 Register end_to = to; // re-use | |
1457 Register count = length; // re-use | |
1458 | |
1459 // Loop-variant addresses. They assume post-incremented count < 0. | |
304 | 1460 Address from_element_addr(end_from, count, Address::times_ptr, 0); |
1461 Address to_element_addr(end_to, count, Address::times_ptr, 0); | |
0 | 1462 Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes()); |
1463 | |
1464 // Copy from low to high addresses, indexed from the end of each array. | |
2324 | 1465 gen_write_ref_array_pre_barrier(to, count, dest_uninitialized); |
304 | 1466 __ lea(end_from, end_from_addr); |
1467 __ lea(end_to, end_to_addr); | |
0 | 1468 assert(length == count, ""); // else fix next line: |
304 | 1469 __ negptr(count); // negate and test the length |
0 | 1470 __ jccb(Assembler::notZero, L_load_element); |
1471 | |
1472 // Empty array: Nothing to do. | |
304 | 1473 __ xorptr(rax, rax); // return 0 on (trivial) success |
0 | 1474 __ jmp(L_done); |
1475 | |
1476 // ======== begin loop ======== | |
1477 // (Loop is rotated; its entry is L_load_element.) | |
1478 // Loop control: | |
1479 // for (count = -count; count != 0; count++) | |
1480 // Base pointers src, dst are biased by 8*count,to last element. | |
1365 | 1481 __ align(OptoLoopAlignment); |
0 | 1482 |
1483 __ BIND(L_store_element); | |
304 | 1484 __ movptr(to_element_addr, elem); // store the oop |
0 | 1485 __ increment(count); // increment the count toward zero |
1486 __ jccb(Assembler::zero, L_do_card_marks); | |
1487 | |
1488 // ======== loop entry is here ======== | |
1489 __ BIND(L_load_element); | |
304 | 1490 __ movptr(elem, from_element_addr); // load the oop |
1491 __ testptr(elem, elem); | |
0 | 1492 __ jccb(Assembler::zero, L_store_element); |
1493 | |
1494 // (Could do a trick here: Remember last successful non-null | |
1495 // element stored and make a quick oop equality check on it.) | |
1496 | |
304 | 1497 __ movptr(elem_klass, elem_klass_addr); // query the object klass |
0 | 1498 generate_type_check(elem_klass, ckoff_arg, ckval_arg, temp, |
1499 &L_store_element, NULL); | |
10324
3f281b313240
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
8873
diff
changeset
|
1500 // (On fall-through, we have failed the element type check.) |
0 | 1501 // ======== end loop ======== |
1502 | |
1503 // It was a real error; we must depend on the caller to finish the job. | |
19
a73cc31728fe
6614036: REGRESSION: Java server x86 VM intermittently crash with SIGSEGV (0xb)
rasbold
parents:
16
diff
changeset
|
1504 // Register "count" = -1 * number of *remaining* oops, length_arg = *total* oops. |
a73cc31728fe
6614036: REGRESSION: Java server x86 VM intermittently crash with SIGSEGV (0xb)
rasbold
parents:
16
diff
changeset
|
1505 // Emit GC store barriers for the oops we have copied (length_arg + count), |
0 | 1506 // and report their number to the caller. |
10324
3f281b313240
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
8873
diff
changeset
|
1507 assert_different_registers(to, count, rax); |
3f281b313240
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
8873
diff
changeset
|
1508 Label L_post_barrier; |
0 | 1509 __ addl(count, length_arg); // transfers = (length - remaining) |
304 | 1510 __ movl2ptr(rax, count); // save the value |
10324
3f281b313240
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
8873
diff
changeset
|
1511 __ notptr(rax); // report (-1^K) to caller (does not affect flags) |
3f281b313240
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
8873
diff
changeset
|
1512 __ jccb(Assembler::notZero, L_post_barrier); |
3f281b313240
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
8873
diff
changeset
|
1513 __ jmp(L_done); // K == 0, nothing was copied, skip post barrier |
0 | 1514 |
1515 // Come here on success only. | |
1516 __ BIND(L_do_card_marks); | |
10324
3f281b313240
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
8873
diff
changeset
|
1517 __ xorptr(rax, rax); // return 0 on success |
304 | 1518 __ movl2ptr(count, length_arg); |
10324
3f281b313240
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
8873
diff
changeset
|
1519 |
3f281b313240
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
8873
diff
changeset
|
1520 __ BIND(L_post_barrier); |
3f281b313240
8010927: Kitchensink crashed with SIGSEGV, Problematic frame: v ~StubRoutines::checkcast_arraycopy
kvn
parents:
8873
diff
changeset
|
1521 __ movptr(to, to_arg); // reload |
0 | 1522 gen_write_ref_array_post_barrier(to, count); |
1523 | |
1524 // Common exit point (success or failure). | |
1525 __ BIND(L_done); | |
304 | 1526 __ pop(rbx); |
1527 __ pop(rdi); | |
1528 __ pop(rsi); | |
0 | 1529 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); |
1530 __ leave(); // required for proper stackwalking of RuntimeStub frame | |
1531 __ ret(0); | |
1532 | |
1533 return start; | |
1534 } | |
1535 | |
1536 // | |
1537 // Generate 'unsafe' array copy stub | |
1538 // Though just as safe as the other stubs, it takes an unscaled | |
1539 // size_t argument instead of an element count. | |
1540 // | |
1541 // Input: | |
1542 // 4(rsp) - source array address | |
1543 // 8(rsp) - destination array address | |
1544 // 12(rsp) - byte count, can be zero | |
1545 // | |
1546 // Output: | |
1547 // rax, == 0 - success | |
1548 // rax, == -1 - need to call System.arraycopy | |
1549 // | |
1550 // Examines the alignment of the operands and dispatches | |
1551 // to a long, int, short, or byte copy loop. | |
1552 // | |
1553 address generate_unsafe_copy(const char *name, | |
1554 address byte_copy_entry, | |
1555 address short_copy_entry, | |
1556 address int_copy_entry, | |
1557 address long_copy_entry) { | |
1558 | |
1559 Label L_long_aligned, L_int_aligned, L_short_aligned; | |
1560 | |
1561 __ align(CodeEntryAlignment); | |
1562 StubCodeMark mark(this, "StubRoutines", name); | |
1563 address start = __ pc(); | |
1564 | |
1565 const Register from = rax; // source array address | |
1566 const Register to = rdx; // destination array address | |
1567 const Register count = rcx; // elements count | |
1568 | |
1569 __ enter(); // required for proper stackwalking of RuntimeStub frame | |
304 | 1570 __ push(rsi); |
1571 __ push(rdi); | |
0 | 1572 Address from_arg(rsp, 12+ 4); // from |
1573 Address to_arg(rsp, 12+ 8); // to | |
1574 Address count_arg(rsp, 12+12); // byte count | |
1575 | |
1576 // Load up: | |
304 | 1577 __ movptr(from , from_arg); |
1578 __ movptr(to , to_arg); | |
1579 __ movl2ptr(count, count_arg); | |
0 | 1580 |
1581 // bump this on entry, not on exit: | |
1582 inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); | |
1583 | |
1584 const Register bits = rsi; | |
304 | 1585 __ mov(bits, from); |
1586 __ orptr(bits, to); | |
1587 __ orptr(bits, count); | |
0 | 1588 |
1589 __ testl(bits, BytesPerLong-1); | |
1590 __ jccb(Assembler::zero, L_long_aligned); | |
1591 | |
1592 __ testl(bits, BytesPerInt-1); | |
1593 __ jccb(Assembler::zero, L_int_aligned); | |
1594 | |
1595 __ testl(bits, BytesPerShort-1); | |
1596 __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry)); | |
1597 | |
1598 __ BIND(L_short_aligned); | |
304 | 1599 __ shrptr(count, LogBytesPerShort); // size => short_count |
0 | 1600 __ movl(count_arg, count); // update 'count' |
1601 __ jump(RuntimeAddress(short_copy_entry)); | |
1602 | |
1603 __ BIND(L_int_aligned); | |
304 | 1604 __ shrptr(count, LogBytesPerInt); // size => int_count |
0 | 1605 __ movl(count_arg, count); // update 'count' |
1606 __ jump(RuntimeAddress(int_copy_entry)); | |
1607 | |
1608 __ BIND(L_long_aligned); | |
304 | 1609 __ shrptr(count, LogBytesPerLong); // size => qword_count |
0 | 1610 __ movl(count_arg, count); // update 'count' |
304 | 1611 __ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it. |
1612 __ pop(rsi); | |
0 | 1613 __ jump(RuntimeAddress(long_copy_entry)); |
1614 | |
1615 return start; | |
1616 } | |
1617 | |
1618 | |
1619 // Perform range checks on the proposed arraycopy. | |
1620 // Smashes src_pos and dst_pos. (Uses them up for temps.) | |
1621 void arraycopy_range_checks(Register src, | |
1622 Register src_pos, | |
1623 Register dst, | |
1624 Register dst_pos, | |
1625 Address& length, | |
1626 Label& L_failed) { | |
1627 BLOCK_COMMENT("arraycopy_range_checks:"); | |
1628 const Register src_end = src_pos; // source array end position | |
1629 const Register dst_end = dst_pos; // destination array end position | |
1630 __ addl(src_end, length); // src_pos + length | |
1631 __ addl(dst_end, length); // dst_pos + length | |
1632 | |
1633 // if (src_pos + length > arrayOop(src)->length() ) FAIL; | |
1634 __ cmpl(src_end, Address(src, arrayOopDesc::length_offset_in_bytes())); | |
1635 __ jcc(Assembler::above, L_failed); | |
1636 | |
1637 // if (dst_pos + length > arrayOop(dst)->length() ) FAIL; | |
1638 __ cmpl(dst_end, Address(dst, arrayOopDesc::length_offset_in_bytes())); | |
1639 __ jcc(Assembler::above, L_failed); | |
1640 | |
1641 BLOCK_COMMENT("arraycopy_range_checks done"); | |
1642 } | |
1643 | |
1644 | |
1645 // | |
1646 // Generate generic array copy stubs | |
1647 // | |
1648 // Input: | |
1649 // 4(rsp) - src oop | |
1650 // 8(rsp) - src_pos | |
1651 // 12(rsp) - dst oop | |
1652 // 16(rsp) - dst_pos | |
1653 // 20(rsp) - element count | |
1654 // | |
1655 // Output: | |
1656 // rax, == 0 - success | |
1657 // rax, == -1^K - failure, where K is partial transfer count | |
1658 // | |
1659 address generate_generic_copy(const char *name, | |
1660 address entry_jbyte_arraycopy, | |
1661 address entry_jshort_arraycopy, | |
1662 address entry_jint_arraycopy, | |
1663 address entry_oop_arraycopy, | |
1664 address entry_jlong_arraycopy, | |
1665 address entry_checkcast_arraycopy) { | |
1666 Label L_failed, L_failed_0, L_objArray; | |
1667 | |
1668 { int modulus = CodeEntryAlignment; | |
1669 int target = modulus - 5; // 5 = sizeof jmp(L_failed) | |
1670 int advance = target - (__ offset() % modulus); | |
1671 if (advance < 0) advance += modulus; | |
1672 if (advance > 0) __ nop(advance); | |
1673 } | |
1674 StubCodeMark mark(this, "StubRoutines", name); | |
1675 | |
1676 // Short-hop target to L_failed. Makes for denser prologue code. | |
1677 __ BIND(L_failed_0); | |
1678 __ jmp(L_failed); | |
1679 assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed"); | |
1680 | |
1681 __ align(CodeEntryAlignment); | |
1682 address start = __ pc(); | |
1683 | |
1684 __ enter(); // required for proper stackwalking of RuntimeStub frame | |
304 | 1685 __ push(rsi); |
1686 __ push(rdi); | |
0 | 1687 |
1688 // bump this on entry, not on exit: | |
1689 inc_counter_np(SharedRuntime::_generic_array_copy_ctr); | |
1690 | |
1691 // Input values | |
1692 Address SRC (rsp, 12+ 4); | |
1693 Address SRC_POS (rsp, 12+ 8); | |
1694 Address DST (rsp, 12+12); | |
1695 Address DST_POS (rsp, 12+16); | |
1696 Address LENGTH (rsp, 12+20); | |
1697 | |
1698 //----------------------------------------------------------------------- | |
1699 // Assembler stub will be used for this call to arraycopy | |
1700 // if the following conditions are met: | |
1701 // | |
1702 // (1) src and dst must not be null. | |
1703 // (2) src_pos must not be negative. | |
1704 // (3) dst_pos must not be negative. | |
1705 // (4) length must not be negative. | |
1706 // (5) src klass and dst klass should be the same and not NULL. | |
1707 // (6) src and dst should be arrays. | |
1708 // (7) src_pos + length must not exceed length of src. | |
1709 // (8) dst_pos + length must not exceed length of dst. | |
1710 // | |
1711 | |
1712 const Register src = rax; // source array oop | |
1713 const Register src_pos = rsi; | |
1714 const Register dst = rdx; // destination array oop | |
1715 const Register dst_pos = rdi; | |
1716 const Register length = rcx; // transfer count | |
1717 | |
1718 // if (src == NULL) return -1; | |
304 | 1719 __ movptr(src, SRC); // src oop |
1720 __ testptr(src, src); | |
0 | 1721 __ jccb(Assembler::zero, L_failed_0); |
1722 | |
1723 // if (src_pos < 0) return -1; | |
304 | 1724 __ movl2ptr(src_pos, SRC_POS); // src_pos |
0 | 1725 __ testl(src_pos, src_pos); |
1726 __ jccb(Assembler::negative, L_failed_0); | |
1727 | |
1728 // if (dst == NULL) return -1; | |
304 | 1729 __ movptr(dst, DST); // dst oop |
1730 __ testptr(dst, dst); | |
0 | 1731 __ jccb(Assembler::zero, L_failed_0); |
1732 | |
1733 // if (dst_pos < 0) return -1; | |
304 | 1734 __ movl2ptr(dst_pos, DST_POS); // dst_pos |
0 | 1735 __ testl(dst_pos, dst_pos); |
1736 __ jccb(Assembler::negative, L_failed_0); | |
1737 | |
1738 // if (length < 0) return -1; | |
304 | 1739 __ movl2ptr(length, LENGTH); // length |
0 | 1740 __ testl(length, length); |
1741 __ jccb(Assembler::negative, L_failed_0); | |
1742 | |
1743 // if (src->klass() == NULL) return -1; | |
1744 Address src_klass_addr(src, oopDesc::klass_offset_in_bytes()); | |
1745 Address dst_klass_addr(dst, oopDesc::klass_offset_in_bytes()); | |
1746 const Register rcx_src_klass = rcx; // array klass | |
304 | 1747 __ movptr(rcx_src_klass, Address(src, oopDesc::klass_offset_in_bytes())); |
0 | 1748 |
1749 #ifdef ASSERT | |
1750 // assert(src->klass() != NULL); | |
1751 BLOCK_COMMENT("assert klasses not null"); | |
1752 { Label L1, L2; | |
304 | 1753 __ testptr(rcx_src_klass, rcx_src_klass); |
0 | 1754 __ jccb(Assembler::notZero, L2); // it is broken if klass is NULL |
1755 __ bind(L1); | |
1756 __ stop("broken null klass"); | |
1757 __ bind(L2); | |
304 | 1758 __ cmpptr(dst_klass_addr, (int32_t)NULL_WORD); |
0 | 1759 __ jccb(Assembler::equal, L1); // this would be broken also |
1760 BLOCK_COMMENT("assert done"); | |
1761 } | |
1762 #endif //ASSERT | |
1763 | |
1764 // Load layout helper (32-bits) | |
1765 // | |
1766 // |array_tag| | header_size | element_type | |log2_element_size| | |
1767 // 32 30 24 16 8 2 0 | |
1768 // | |
1769 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 | |
1770 // | |
1771 | |
4762
069ab3f976d3
7118863: Move sizeof(klassOopDesc) into the *Klass::*_offset_in_bytes() functions
stefank
parents:
3960
diff
changeset
|
1772 int lh_offset = in_bytes(Klass::layout_helper_offset()); |
0 | 1773 Address src_klass_lh_addr(rcx_src_klass, lh_offset); |
1774 | |
1775 // Handle objArrays completely differently... | |
1776 jint objArray_lh = Klass::array_layout_helper(T_OBJECT); | |
1777 __ cmpl(src_klass_lh_addr, objArray_lh); | |
1778 __ jcc(Assembler::equal, L_objArray); | |
1779 | |
1780 // if (src->klass() != dst->klass()) return -1; | |
304 | 1781 __ cmpptr(rcx_src_klass, dst_klass_addr); |
0 | 1782 __ jccb(Assembler::notEqual, L_failed_0); |
1783 | |
1784 const Register rcx_lh = rcx; // layout helper | |
1785 assert(rcx_lh == rcx_src_klass, "known alias"); | |
1786 __ movl(rcx_lh, src_klass_lh_addr); | |
1787 | |
1788 // if (!src->is_Array()) return -1; | |
1789 __ cmpl(rcx_lh, Klass::_lh_neutral_value); | |
1790 __ jcc(Assembler::greaterEqual, L_failed_0); // signed cmp | |
1791 | |
1792 // At this point, it is known to be a typeArray (array_tag 0x3). | |
1793 #ifdef ASSERT | |
1794 { Label L; | |
1795 __ cmpl(rcx_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); | |
1796 __ jcc(Assembler::greaterEqual, L); // signed cmp | |
1797 __ stop("must be a primitive array"); | |
1798 __ bind(L); | |
1799 } | |
1800 #endif | |
1801 | |
1802 assert_different_registers(src, src_pos, dst, dst_pos, rcx_lh); | |
1803 arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); | |
1804 | |
6831
d8ce2825b193
8000213: NPG: Should have renamed arrayKlass and typeArrayKlass
coleenp
parents:
6725
diff
changeset
|
1805 // TypeArrayKlass |
0 | 1806 // |
1807 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); | |
1808 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); | |
1809 // | |
1810 const Register rsi_offset = rsi; // array offset | |
1811 const Register src_array = src; // src array offset | |
1812 const Register dst_array = dst; // dst array offset | |
1813 const Register rdi_elsize = rdi; // log2 element size | |
1814 | |
304 | 1815 __ mov(rsi_offset, rcx_lh); |
1816 __ shrptr(rsi_offset, Klass::_lh_header_size_shift); | |
1817 __ andptr(rsi_offset, Klass::_lh_header_size_mask); // array_offset | |
1818 __ addptr(src_array, rsi_offset); // src array offset | |
1819 __ addptr(dst_array, rsi_offset); // dst array offset | |
1820 __ andptr(rcx_lh, Klass::_lh_log2_element_size_mask); // log2 elsize | |
0 | 1821 |
1822 // next registers should be set before the jump to corresponding stub | |
1823 const Register from = src; // source array address | |
1824 const Register to = dst; // destination array address | |
1825 const Register count = rcx; // elements count | |
1826 // some of them should be duplicated on stack | |
1827 #define FROM Address(rsp, 12+ 4) | |
1828 #define TO Address(rsp, 12+ 8) // Not used now | |
1829 #define COUNT Address(rsp, 12+12) // Only for oop arraycopy | |
1830 | |
1831 BLOCK_COMMENT("scale indexes to element size"); | |
304 | 1832 __ movl2ptr(rsi, SRC_POS); // src_pos |
1833 __ shlptr(rsi); // src_pos << rcx (log2 elsize) | |
0 | 1834 assert(src_array == from, ""); |
304 | 1835 __ addptr(from, rsi); // from = src_array + SRC_POS << log2 elsize |
1836 __ movl2ptr(rdi, DST_POS); // dst_pos | |
1837 __ shlptr(rdi); // dst_pos << rcx (log2 elsize) | |
0 | 1838 assert(dst_array == to, ""); |
304 | 1839 __ addptr(to, rdi); // to = dst_array + DST_POS << log2 elsize |
1840 __ movptr(FROM, from); // src_addr | |
1841 __ mov(rdi_elsize, rcx_lh); // log2 elsize | |
1842 __ movl2ptr(count, LENGTH); // elements count | |
0 | 1843 |
1844 BLOCK_COMMENT("choose copy loop based on element size"); | |
1845 __ cmpl(rdi_elsize, 0); | |
1846 | |
1847 __ jump_cc(Assembler::equal, RuntimeAddress(entry_jbyte_arraycopy)); | |
1848 __ cmpl(rdi_elsize, LogBytesPerShort); | |
1849 __ jump_cc(Assembler::equal, RuntimeAddress(entry_jshort_arraycopy)); | |
1850 __ cmpl(rdi_elsize, LogBytesPerInt); | |
1851 __ jump_cc(Assembler::equal, RuntimeAddress(entry_jint_arraycopy)); | |
1852 #ifdef ASSERT | |
1853 __ cmpl(rdi_elsize, LogBytesPerLong); | |
1854 __ jccb(Assembler::notEqual, L_failed); | |
1855 #endif | |
304 | 1856 __ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it. |
1857 __ pop(rsi); | |
0 | 1858 __ jump(RuntimeAddress(entry_jlong_arraycopy)); |
1859 | |
1860 __ BIND(L_failed); | |
304 | 1861 __ xorptr(rax, rax); |
1862 __ notptr(rax); // return -1 | |
1863 __ pop(rdi); | |
1864 __ pop(rsi); | |
0 | 1865 __ leave(); // required for proper stackwalking of RuntimeStub frame |
1866 __ ret(0); | |
1867 | |
6831
d8ce2825b193
8000213: NPG: Should have renamed arrayKlass and typeArrayKlass
coleenp
parents:
6725
diff
changeset
|
1868 // ObjArrayKlass |
0 | 1869 __ BIND(L_objArray); |
1870 // live at this point: rcx_src_klass, src[_pos], dst[_pos] | |
1871 | |
1872 Label L_plain_copy, L_checkcast_copy; | |
1873 // test array classes for subtyping | |
304 | 1874 __ cmpptr(rcx_src_klass, dst_klass_addr); // usual case is exact equality |
0 | 1875 __ jccb(Assembler::notEqual, L_checkcast_copy); |
1876 | |
1877 // Identically typed arrays can be copied without element-wise checks. | |
1878 assert_different_registers(src, src_pos, dst, dst_pos, rcx_src_klass); | |
1879 arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); | |
1880 | |
1881 __ BIND(L_plain_copy); | |
304 | 1882 __ movl2ptr(count, LENGTH); // elements count |
1883 __ movl2ptr(src_pos, SRC_POS); // reload src_pos | |
1884 __ lea(from, Address(src, src_pos, Address::times_ptr, | |
1885 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr | |
1886 __ movl2ptr(dst_pos, DST_POS); // reload dst_pos | |
1887 __ lea(to, Address(dst, dst_pos, Address::times_ptr, | |
1888 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr | |
1889 __ movptr(FROM, from); // src_addr | |
1890 __ movptr(TO, to); // dst_addr | |
0 | 1891 __ movl(COUNT, count); // count |
1892 __ jump(RuntimeAddress(entry_oop_arraycopy)); | |
1893 | |
1894 __ BIND(L_checkcast_copy); | |
1895 // live at this point: rcx_src_klass, dst[_pos], src[_pos] | |
1896 { | |
1897 // Handy offsets: | |
6831
d8ce2825b193
8000213: NPG: Should have renamed arrayKlass and typeArrayKlass
coleenp
parents:
6725
diff
changeset
|
1898 int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); |
4762
069ab3f976d3
7118863: Move sizeof(klassOopDesc) into the *Klass::*_offset_in_bytes() functions
stefank
parents:
3960
diff
changeset
|
1899 int sco_offset = in_bytes(Klass::super_check_offset_offset()); |
0 | 1900 |
1901 Register rsi_dst_klass = rsi; | |
1902 Register rdi_temp = rdi; | |
1903 assert(rsi_dst_klass == src_pos, "expected alias w/ src_pos"); | |
1904 assert(rdi_temp == dst_pos, "expected alias w/ dst_pos"); | |
1905 Address dst_klass_lh_addr(rsi_dst_klass, lh_offset); | |
1906 | |
1907 // Before looking at dst.length, make sure dst is also an objArray. | |
304 | 1908 __ movptr(rsi_dst_klass, dst_klass_addr); |
0 | 1909 __ cmpl(dst_klass_lh_addr, objArray_lh); |
1910 __ jccb(Assembler::notEqual, L_failed); | |
1911 | |
1912 // It is safe to examine both src.length and dst.length. | |
304 | 1913 __ movl2ptr(src_pos, SRC_POS); // reload rsi |
0 | 1914 arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); |
1915 // (Now src_pos and dst_pos are killed, but not src and dst.) | |
1916 | |
1917 // We'll need this temp (don't forget to pop it after the type check). | |
304 | 1918 __ push(rbx); |
0 | 1919 Register rbx_src_klass = rbx; |
1920 | |
304 | 1921 __ mov(rbx_src_klass, rcx_src_klass); // spill away from rcx |
1922 __ movptr(rsi_dst_klass, dst_klass_addr); | |
0 | 1923 Address super_check_offset_addr(rsi_dst_klass, sco_offset); |
1924 Label L_fail_array_check; | |
1925 generate_type_check(rbx_src_klass, | |
1926 super_check_offset_addr, dst_klass_addr, | |
1927 rdi_temp, NULL, &L_fail_array_check); | |
1928 // (On fall-through, we have passed the array type check.) | |
304 | 1929 __ pop(rbx); |
0 | 1930 __ jmp(L_plain_copy); |
1931 | |
1932 __ BIND(L_fail_array_check); | |
1933 // Reshuffle arguments so we can call checkcast_arraycopy: | |
1934 | |
1935 // match initial saves for checkcast_arraycopy | |
304 | 1936 // push(rsi); // already done; see above |
1937 // push(rdi); // already done; see above | |
1938 // push(rbx); // already done; see above | |
0 | 1939 |
1940 // Marshal outgoing arguments now, freeing registers. | |
1941 Address from_arg(rsp, 16+ 4); // from | |
1942 Address to_arg(rsp, 16+ 8); // to | |
1943 Address length_arg(rsp, 16+12); // elements count | |
1944 Address ckoff_arg(rsp, 16+16); // super_check_offset | |
1945 Address ckval_arg(rsp, 16+20); // super_klass | |
1946 | |
1947 Address SRC_POS_arg(rsp, 16+ 8); | |
1948 Address DST_POS_arg(rsp, 16+16); | |
1949 Address LENGTH_arg(rsp, 16+20); | |
1950 // push rbx, changed the incoming offsets (why not just use rbp,??) | |
1951 // assert(SRC_POS_arg.disp() == SRC_POS.disp() + 4, ""); | |
1952 | |
304 | 1953 __ movptr(rbx, Address(rsi_dst_klass, ek_offset)); |
1954 __ movl2ptr(length, LENGTH_arg); // reload elements count | |
1955 __ movl2ptr(src_pos, SRC_POS_arg); // reload src_pos | |
1956 __ movl2ptr(dst_pos, DST_POS_arg); // reload dst_pos | |
0 | 1957 |
304 | 1958 __ movptr(ckval_arg, rbx); // destination element type |
0 | 1959 __ movl(rbx, Address(rbx, sco_offset)); |
1960 __ movl(ckoff_arg, rbx); // corresponding class check offset | |
1961 | |
1962 __ movl(length_arg, length); // outgoing length argument | |
1963 | |
304 | 1964 __ lea(from, Address(src, src_pos, Address::times_ptr, |
0 | 1965 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); |
304 | 1966 __ movptr(from_arg, from); |
0 | 1967 |
304 | 1968 __ lea(to, Address(dst, dst_pos, Address::times_ptr, |
0 | 1969 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); |
304 | 1970 __ movptr(to_arg, to); |
0 | 1971 __ jump(RuntimeAddress(entry_checkcast_arraycopy)); |
1972 } | |
1973 | |
1974 return start; | |
1975 } | |
1976 | |
1977 void generate_arraycopy_stubs() { | |
1978 address entry; | |
1979 address entry_jbyte_arraycopy; | |
1980 address entry_jshort_arraycopy; | |
1981 address entry_jint_arraycopy; | |
1982 address entry_oop_arraycopy; | |
1983 address entry_jlong_arraycopy; | |
1984 address entry_checkcast_arraycopy; | |
1985 | |
1986 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = | |
1987 generate_disjoint_copy(T_BYTE, true, Address::times_1, &entry, | |
1988 "arrayof_jbyte_disjoint_arraycopy"); | |
1989 StubRoutines::_arrayof_jbyte_arraycopy = | |
1990 generate_conjoint_copy(T_BYTE, true, Address::times_1, entry, | |
1991 NULL, "arrayof_jbyte_arraycopy"); | |
1992 StubRoutines::_jbyte_disjoint_arraycopy = | |
1993 generate_disjoint_copy(T_BYTE, false, Address::times_1, &entry, | |
1994 "jbyte_disjoint_arraycopy"); | |
1995 StubRoutines::_jbyte_arraycopy = | |
1996 generate_conjoint_copy(T_BYTE, false, Address::times_1, entry, | |
1997 &entry_jbyte_arraycopy, "jbyte_arraycopy"); | |
1998 | |
1999 StubRoutines::_arrayof_jshort_disjoint_arraycopy = | |
2000 generate_disjoint_copy(T_SHORT, true, Address::times_2, &entry, | |
2001 "arrayof_jshort_disjoint_arraycopy"); | |
2002 StubRoutines::_arrayof_jshort_arraycopy = | |
2003 generate_conjoint_copy(T_SHORT, true, Address::times_2, entry, | |
2004 NULL, "arrayof_jshort_arraycopy"); | |
2005 StubRoutines::_jshort_disjoint_arraycopy = | |
2006 generate_disjoint_copy(T_SHORT, false, Address::times_2, &entry, | |
2007 "jshort_disjoint_arraycopy"); | |
2008 StubRoutines::_jshort_arraycopy = | |
2009 generate_conjoint_copy(T_SHORT, false, Address::times_2, entry, | |
2010 &entry_jshort_arraycopy, "jshort_arraycopy"); | |
2011 | |
2012 // Next arrays are always aligned on 4 bytes at least. | |
2013 StubRoutines::_jint_disjoint_arraycopy = | |
2014 generate_disjoint_copy(T_INT, true, Address::times_4, &entry, | |
2015 "jint_disjoint_arraycopy"); | |
2016 StubRoutines::_jint_arraycopy = | |
2017 generate_conjoint_copy(T_INT, true, Address::times_4, entry, | |
2018 &entry_jint_arraycopy, "jint_arraycopy"); | |
2019 | |
2020 StubRoutines::_oop_disjoint_arraycopy = | |
304 | 2021 generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry, |
0 | 2022 "oop_disjoint_arraycopy"); |
2023 StubRoutines::_oop_arraycopy = | |
304 | 2024 generate_conjoint_copy(T_OBJECT, true, Address::times_ptr, entry, |
0 | 2025 &entry_oop_arraycopy, "oop_arraycopy"); |
2026 | |
2324 | 2027 StubRoutines::_oop_disjoint_arraycopy_uninit = |
2028 generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry, | |
2029 "oop_disjoint_arraycopy_uninit", | |
2030 /*dest_uninitialized*/true); | |
2031 StubRoutines::_oop_arraycopy_uninit = | |
2032 generate_conjoint_copy(T_OBJECT, true, Address::times_ptr, entry, | |
2033 NULL, "oop_arraycopy_uninit", | |
2034 /*dest_uninitialized*/true); | |
2035 | |
0 | 2036 StubRoutines::_jlong_disjoint_arraycopy = |
2037 generate_disjoint_long_copy(&entry, "jlong_disjoint_arraycopy"); | |
2038 StubRoutines::_jlong_arraycopy = | |
2039 generate_conjoint_long_copy(entry, &entry_jlong_arraycopy, | |
2040 "jlong_arraycopy"); | |
2041 | |
1763 | 2042 StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); |
2043 StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); | |
2044 StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); | |
2045 StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); | |
2046 StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); | |
2047 StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); | |
2048 | |
2324 | 2049 StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; |
2050 StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; | |
2051 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; | |
2052 StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; | |
0 | 2053 |
2324 | 2054 StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; |
2055 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; | |
2056 StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; | |
2057 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; | |
0 | 2058 |
2059 StubRoutines::_checkcast_arraycopy = | |
2324 | 2060 generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); |
2061 StubRoutines::_checkcast_arraycopy_uninit = | |
2062 generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, /*dest_uninitialized*/true); | |
0 | 2063 |
2064 StubRoutines::_unsafe_arraycopy = | |
2065 generate_unsafe_copy("unsafe_arraycopy", | |
2066 entry_jbyte_arraycopy, | |
2067 entry_jshort_arraycopy, | |
2068 entry_jint_arraycopy, | |
2069 entry_jlong_arraycopy); | |
2070 | |
2071 StubRoutines::_generic_arraycopy = | |
2072 generate_generic_copy("generic_arraycopy", | |
2073 entry_jbyte_arraycopy, | |
2074 entry_jshort_arraycopy, | |
2075 entry_jint_arraycopy, | |
2076 entry_oop_arraycopy, | |
2077 entry_jlong_arraycopy, | |
2078 entry_checkcast_arraycopy); | |
2079 } | |
2080 | |
1174
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2081 void generate_math_stubs() { |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2082 { |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2083 StubCodeMark mark(this, "StubRoutines", "log"); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2084 StubRoutines::_intrinsic_log = (double (*)(double)) __ pc(); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2085 |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2086 __ fld_d(Address(rsp, 4)); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2087 __ flog(); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2088 __ ret(0); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2089 } |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2090 { |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2091 StubCodeMark mark(this, "StubRoutines", "log10"); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2092 StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc(); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2093 |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2094 __ fld_d(Address(rsp, 4)); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2095 __ flog10(); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2096 __ ret(0); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2097 } |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2098 { |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2099 StubCodeMark mark(this, "StubRoutines", "sin"); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2100 StubRoutines::_intrinsic_sin = (double (*)(double)) __ pc(); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2101 |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2102 __ fld_d(Address(rsp, 4)); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2103 __ trigfunc('s'); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2104 __ ret(0); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2105 } |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2106 { |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2107 StubCodeMark mark(this, "StubRoutines", "cos"); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2108 StubRoutines::_intrinsic_cos = (double (*)(double)) __ pc(); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2109 |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2110 __ fld_d(Address(rsp, 4)); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2111 __ trigfunc('c'); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2112 __ ret(0); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2113 } |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2114 { |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2115 StubCodeMark mark(this, "StubRoutines", "tan"); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2116 StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc(); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2117 |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2118 __ fld_d(Address(rsp, 4)); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2119 __ trigfunc('t'); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2120 __ ret(0); |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2121 } |
6084
6759698e3140
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
4771
diff
changeset
|
2122 { |
6759698e3140
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
4771
diff
changeset
|
2123 StubCodeMark mark(this, "StubRoutines", "exp"); |
6759698e3140
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
4771
diff
changeset
|
2124 StubRoutines::_intrinsic_exp = (double (*)(double)) __ pc(); |
1174
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2125 |
6084
6759698e3140
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
4771
diff
changeset
|
2126 __ fld_d(Address(rsp, 4)); |
6759698e3140
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
4771
diff
changeset
|
2127 __ exp_with_fallback(0); |
6759698e3140
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
4771
diff
changeset
|
2128 __ ret(0); |
6759698e3140
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
4771
diff
changeset
|
2129 } |
6759698e3140
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
4771
diff
changeset
|
2130 { |
6759698e3140
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
4771
diff
changeset
|
2131 StubCodeMark mark(this, "StubRoutines", "pow"); |
6759698e3140
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
4771
diff
changeset
|
2132 StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc(); |
6759698e3140
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
4771
diff
changeset
|
2133 |
6759698e3140
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
4771
diff
changeset
|
2134 __ fld_d(Address(rsp, 12)); |
6759698e3140
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
4771
diff
changeset
|
2135 __ fld_d(Address(rsp, 4)); |
6759698e3140
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
4771
diff
changeset
|
2136 __ pow_with_fallback(0); |
6759698e3140
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
4771
diff
changeset
|
2137 __ ret(0); |
6759698e3140
7133857: exp() and pow() should use the x87 ISA on x86
roland
parents:
4771
diff
changeset
|
2138 } |
1174
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2139 } |
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
2140 |
6894 | 2141 // AES intrinsic stubs |
2142 enum {AESBlockSize = 16}; | |
2143 | |
2144 address generate_key_shuffle_mask() { | |
2145 __ align(16); | |
2146 StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask"); | |
2147 address start = __ pc(); | |
2148 __ emit_data(0x00010203, relocInfo::none, 0 ); | |
2149 __ emit_data(0x04050607, relocInfo::none, 0 ); | |
2150 __ emit_data(0x08090a0b, relocInfo::none, 0 ); | |
2151 __ emit_data(0x0c0d0e0f, relocInfo::none, 0 ); | |
2152 return start; | |
2153 } | |
2154 | |
2155 // Utility routine for loading a 128-bit key word in little endian format | |
2156 // can optionally specify that the shuffle mask is already in an xmmregister | |
2157 void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { | |
2158 __ movdqu(xmmdst, Address(key, offset)); | |
2159 if (xmm_shuf_mask != NULL) { | |
2160 __ pshufb(xmmdst, xmm_shuf_mask); | |
2161 } else { | |
2162 __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | |
2163 } | |
2164 } | |
2165 | |
2166 // aesenc using specified key+offset | |
2167 // can optionally specify that the shuffle mask is already in an xmmregister | |
2168 void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { | |
2169 load_key(xmmtmp, key, offset, xmm_shuf_mask); | |
2170 __ aesenc(xmmdst, xmmtmp); | |
2171 } | |
2172 | |
2173 // aesdec using specified key+offset | |
2174 // can optionally specify that the shuffle mask is already in an xmmregister | |
2175 void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { | |
2176 load_key(xmmtmp, key, offset, xmm_shuf_mask); | |
2177 __ aesdec(xmmdst, xmmtmp); | |
2178 } | |
2179 | |
2180 | |
2181 // Arguments: | |
2182 // | |
2183 // Inputs: | |
2184 // c_rarg0 - source byte array address | |
2185 // c_rarg1 - destination byte array address | |
2186 // c_rarg2 - K (key) in little endian int array | |
2187 // | |
2188 address generate_aescrypt_encryptBlock() { | |
7427 | 2189 assert(UseAES, "need AES instructions and misaligned SSE support"); |
6894 | 2190 __ align(CodeEntryAlignment); |
2191 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); | |
2192 Label L_doLast; | |
2193 address start = __ pc(); | |
2194 | |
7427 | 2195 const Register from = rdx; // source array address |
6894 | 2196 const Register to = rdx; // destination array address |
2197 const Register key = rcx; // key array address | |
2198 const Register keylen = rax; | |
2199 const Address from_param(rbp, 8+0); | |
2200 const Address to_param (rbp, 8+4); | |
2201 const Address key_param (rbp, 8+8); | |
2202 | |
2203 const XMMRegister xmm_result = xmm0; | |
7427 | 2204 const XMMRegister xmm_key_shuf_mask = xmm1; |
2205 const XMMRegister xmm_temp1 = xmm2; | |
2206 const XMMRegister xmm_temp2 = xmm3; | |
2207 const XMMRegister xmm_temp3 = xmm4; | |
2208 const XMMRegister xmm_temp4 = xmm5; | |
2209 | |
2210 __ enter(); // required for proper stackwalking of RuntimeStub frame | |
2211 __ movptr(from, from_param); | |
2212 __ movptr(key, key_param); | |
2213 | |
2214 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} | |
6894 | 2215 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
2216 | |
2217 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | |
2218 __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input | |
7427 | 2219 __ movptr(to, to_param); |
6894 | 2220 |
2221 // For encryption, the java expanded key ordering is just what we need | |
2222 | |
7427 | 2223 load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask); |
2224 __ pxor(xmm_result, xmm_temp1); | |
2225 | |
2226 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); | |
2227 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); | |
2228 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); | |
2229 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); | |
2230 | |
2231 __ aesenc(xmm_result, xmm_temp1); | |
2232 __ aesenc(xmm_result, xmm_temp2); | |
2233 __ aesenc(xmm_result, xmm_temp3); | |
2234 __ aesenc(xmm_result, xmm_temp4); | |
2235 | |
2236 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); | |
2237 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); | |
2238 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); | |
2239 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); | |
2240 | |
2241 __ aesenc(xmm_result, xmm_temp1); | |
2242 __ aesenc(xmm_result, xmm_temp2); | |
2243 __ aesenc(xmm_result, xmm_temp3); | |
2244 __ aesenc(xmm_result, xmm_temp4); | |
2245 | |
2246 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); | |
2247 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); | |
2248 | |
2249 __ cmpl(keylen, 44); | |
2250 __ jccb(Assembler::equal, L_doLast); | |
2251 | |
2252 __ aesenc(xmm_result, xmm_temp1); | |
2253 __ aesenc(xmm_result, xmm_temp2); | |
2254 | |
2255 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); | |
2256 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); | |
2257 | |
2258 __ cmpl(keylen, 52); | |
2259 __ jccb(Assembler::equal, L_doLast); | |
2260 | |
2261 __ aesenc(xmm_result, xmm_temp1); | |
2262 __ aesenc(xmm_result, xmm_temp2); | |
2263 | |
2264 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); | |
2265 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); | |
6894 | 2266 |
2267 __ BIND(L_doLast); | |
7427 | 2268 __ aesenc(xmm_result, xmm_temp1); |
2269 __ aesenclast(xmm_result, xmm_temp2); | |
6894 | 2270 __ movdqu(Address(to, 0), xmm_result); // store the result |
2271 __ xorptr(rax, rax); // return 0 | |
2272 __ leave(); // required for proper stackwalking of RuntimeStub frame | |
2273 __ ret(0); | |
2274 | |
2275 return start; | |
2276 } | |
2277 | |
2278 | |
2279 // Arguments: | |
2280 // | |
2281 // Inputs: | |
2282 // c_rarg0 - source byte array address | |
2283 // c_rarg1 - destination byte array address | |
2284 // c_rarg2 - K (key) in little endian int array | |
2285 // | |
2286 address generate_aescrypt_decryptBlock() { | |
7427 | 2287 assert(UseAES, "need AES instructions and misaligned SSE support"); |
6894 | 2288 __ align(CodeEntryAlignment); |
2289 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); | |
2290 Label L_doLast; | |
2291 address start = __ pc(); | |
2292 | |
7427 | 2293 const Register from = rdx; // source array address |
6894 | 2294 const Register to = rdx; // destination array address |
2295 const Register key = rcx; // key array address | |
2296 const Register keylen = rax; | |
2297 const Address from_param(rbp, 8+0); | |
2298 const Address to_param (rbp, 8+4); | |
2299 const Address key_param (rbp, 8+8); | |
2300 | |
2301 const XMMRegister xmm_result = xmm0; | |
7427 | 2302 const XMMRegister xmm_key_shuf_mask = xmm1; |
2303 const XMMRegister xmm_temp1 = xmm2; | |
2304 const XMMRegister xmm_temp2 = xmm3; | |
2305 const XMMRegister xmm_temp3 = xmm4; | |
2306 const XMMRegister xmm_temp4 = xmm5; | |
6894 | 2307 |
2308 __ enter(); // required for proper stackwalking of RuntimeStub frame | |
7427 | 2309 __ movptr(from, from_param); |
2310 __ movptr(key, key_param); | |
2311 | |
2312 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} | |
6894 | 2313 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
2314 | |
2315 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | |
2316 __ movdqu(xmm_result, Address(from, 0)); | |
7427 | 2317 __ movptr(to, to_param); |
6894 | 2318 |
2319 // for decryption java expanded key ordering is rotated one position from what we want | |
2320 // so we start from 0x10 here and hit 0x00 last | |
2321 // we don't know if the key is aligned, hence not using load-execute form | |
7427 | 2322 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); |
2323 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); | |
2324 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); | |
2325 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); | |
2326 | |
2327 __ pxor (xmm_result, xmm_temp1); | |
2328 __ aesdec(xmm_result, xmm_temp2); | |
2329 __ aesdec(xmm_result, xmm_temp3); | |
2330 __ aesdec(xmm_result, xmm_temp4); | |
2331 | |
2332 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); | |
2333 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); | |
2334 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); | |
2335 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); | |
2336 | |
2337 __ aesdec(xmm_result, xmm_temp1); | |
2338 __ aesdec(xmm_result, xmm_temp2); | |
2339 __ aesdec(xmm_result, xmm_temp3); | |
2340 __ aesdec(xmm_result, xmm_temp4); | |
2341 | |
2342 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); | |
2343 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); | |
2344 load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask); | |
2345 | |
2346 __ cmpl(keylen, 44); | |
2347 __ jccb(Assembler::equal, L_doLast); | |
2348 | |
2349 __ aesdec(xmm_result, xmm_temp1); | |
2350 __ aesdec(xmm_result, xmm_temp2); | |
2351 | |
2352 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); | |
2353 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); | |
2354 | |
2355 __ cmpl(keylen, 52); | |
2356 __ jccb(Assembler::equal, L_doLast); | |
2357 | |
2358 __ aesdec(xmm_result, xmm_temp1); | |
2359 __ aesdec(xmm_result, xmm_temp2); | |
2360 | |
2361 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); | |
2362 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); | |
6894 | 2363 |
2364 __ BIND(L_doLast); | |
7427 | 2365 __ aesdec(xmm_result, xmm_temp1); |
2366 __ aesdec(xmm_result, xmm_temp2); | |
2367 | |
6894 | 2368 // for decryption the aesdeclast operation is always on key+0x00 |
7427 | 2369 __ aesdeclast(xmm_result, xmm_temp3); |
6894 | 2370 __ movdqu(Address(to, 0), xmm_result); // store the result |
2371 __ xorptr(rax, rax); // return 0 | |
2372 __ leave(); // required for proper stackwalking of RuntimeStub frame | |
2373 __ ret(0); | |
2374 | |
2375 return start; | |
2376 } | |
2377 | |
2378 void handleSOERegisters(bool saving) { | |
2379 const int saveFrameSizeInBytes = 4 * wordSize; | |
2380 const Address saved_rbx (rbp, -3 * wordSize); | |
2381 const Address saved_rsi (rbp, -2 * wordSize); | |
2382 const Address saved_rdi (rbp, -1 * wordSize); | |
2383 | |
2384 if (saving) { | |
2385 __ subptr(rsp, saveFrameSizeInBytes); | |
2386 __ movptr(saved_rsi, rsi); | |
2387 __ movptr(saved_rdi, rdi); | |
2388 __ movptr(saved_rbx, rbx); | |
2389 } else { | |
2390 // restoring | |
2391 __ movptr(rsi, saved_rsi); | |
2392 __ movptr(rdi, saved_rdi); | |
2393 __ movptr(rbx, saved_rbx); | |
2394 } | |
2395 } | |
2396 | |
2397 // Arguments: | |
2398 // | |
2399 // Inputs: | |
2400 // c_rarg0 - source byte array address | |
2401 // c_rarg1 - destination byte array address | |
2402 // c_rarg2 - K (key) in little endian int array | |
2403 // c_rarg3 - r vector byte array address | |
2404 // c_rarg4 - input length | |
2405 // | |
17670 | 2406 // Output: |
2407 // rax - input length | |
2408 // | |
6894 | 2409 address generate_cipherBlockChaining_encryptAESCrypt() { |
7427 | 2410 assert(UseAES, "need AES instructions and misaligned SSE support"); |
6894 | 2411 __ align(CodeEntryAlignment); |
2412 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); | |
2413 address start = __ pc(); | |
2414 | |
2415 Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; | |
2416 const Register from = rsi; // source array address | |
2417 const Register to = rdx; // destination array address | |
2418 const Register key = rcx; // key array address | |
2419 const Register rvec = rdi; // r byte array initialized from initvector array address | |
2420 // and left with the results of the last encryption block | |
2421 const Register len_reg = rbx; // src len (must be multiple of blocksize 16) | |
2422 const Register pos = rax; | |
2423 | |
2424 // xmm register assignments for the loops below | |
2425 const XMMRegister xmm_result = xmm0; | |
2426 const XMMRegister xmm_temp = xmm1; | |
2427 // first 6 keys preloaded into xmm2-xmm7 | |
2428 const int XMM_REG_NUM_KEY_FIRST = 2; | |
2429 const int XMM_REG_NUM_KEY_LAST = 7; | |
2430 const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); | |
2431 | |
2432 __ enter(); // required for proper stackwalking of RuntimeStub frame | |
2433 handleSOERegisters(true /*saving*/); | |
2434 | |
2435 // load registers from incoming parameters | |
2436 const Address from_param(rbp, 8+0); | |
2437 const Address to_param (rbp, 8+4); | |
2438 const Address key_param (rbp, 8+8); | |
2439 const Address rvec_param (rbp, 8+12); | |
2440 const Address len_param (rbp, 8+16); | |
2441 __ movptr(from , from_param); | |
2442 __ movptr(to , to_param); | |
2443 __ movptr(key , key_param); | |
2444 __ movptr(rvec , rvec_param); | |
2445 __ movptr(len_reg , len_param); | |
2446 | |
2447 const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front | |
2448 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | |
2449 // load up xmm regs 2 thru 7 with keys 0-5 | |
2450 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | |
2451 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); | |
2452 offset += 0x10; | |
2453 } | |
2454 | |
2455 __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec | |
2456 | |
2457 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) | |
2458 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); | |
2459 __ cmpl(rax, 44); | |
2460 __ jcc(Assembler::notEqual, L_key_192_256); | |
2461 | |
2462 // 128 bit code follows here | |
7427 | 2463 __ movl(pos, 0); |
6894 | 2464 __ align(OptoLoopAlignment); |
2465 __ BIND(L_loopTop_128); | |
2466 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input | |
2467 __ pxor (xmm_result, xmm_temp); // xor with the current r vector | |
2468 | |
2469 __ pxor (xmm_result, xmm_key0); // do the aes rounds | |
2470 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | |
2471 __ aesenc(xmm_result, as_XMMRegister(rnum)); | |
2472 } | |
2473 for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) { | |
2474 aes_enc_key(xmm_result, xmm_temp, key, key_offset); | |
2475 } | |
2476 load_key(xmm_temp, key, 0xa0); | |
2477 __ aesenclast(xmm_result, xmm_temp); | |
2478 | |
2479 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output | |
2480 // no need to store r to memory until we exit | |
2481 __ addptr(pos, AESBlockSize); | |
2482 __ subptr(len_reg, AESBlockSize); | |
2483 __ jcc(Assembler::notEqual, L_loopTop_128); | |
2484 | |
2485 __ BIND(L_exit); | |
2486 __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object | |
2487 | |
2488 handleSOERegisters(false /*restoring*/); | |
17670 | 2489 __ movptr(rax, len_param); // return length |
6894 | 2490 __ leave(); // required for proper stackwalking of RuntimeStub frame |
2491 __ ret(0); | |
2492 | |
7427 | 2493 __ BIND(L_key_192_256); |
2494 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) | |
6894 | 2495 __ cmpl(rax, 52); |
2496 __ jcc(Assembler::notEqual, L_key_256); | |
2497 | |
2498 // 192-bit code follows here (could be changed to use more xmm registers) | |
7427 | 2499 __ movl(pos, 0); |
2500 __ align(OptoLoopAlignment); | |
2501 __ BIND(L_loopTop_192); | |
6894 | 2502 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
2503 __ pxor (xmm_result, xmm_temp); // xor with the current r vector | |
2504 | |
2505 __ pxor (xmm_result, xmm_key0); // do the aes rounds | |
2506 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | |
2507 __ aesenc(xmm_result, as_XMMRegister(rnum)); | |
2508 } | |
2509 for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) { | |
2510 aes_enc_key(xmm_result, xmm_temp, key, key_offset); | |
2511 } | |
2512 load_key(xmm_temp, key, 0xc0); | |
2513 __ aesenclast(xmm_result, xmm_temp); | |
2514 | |
2515 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output | |
2516 // no need to store r to memory until we exit | |
2517 __ addptr(pos, AESBlockSize); | |
2518 __ subptr(len_reg, AESBlockSize); | |
2519 __ jcc(Assembler::notEqual, L_loopTop_192); | |
2520 __ jmp(L_exit); | |
2521 | |
7427 | 2522 __ BIND(L_key_256); |
6894 | 2523 // 256-bit code follows here (could be changed to use more xmm registers) |
7427 | 2524 __ movl(pos, 0); |
2525 __ align(OptoLoopAlignment); | |
2526 __ BIND(L_loopTop_256); | |
6894 | 2527 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
2528 __ pxor (xmm_result, xmm_temp); // xor with the current r vector | |
2529 | |
2530 __ pxor (xmm_result, xmm_key0); // do the aes rounds | |
2531 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | |
2532 __ aesenc(xmm_result, as_XMMRegister(rnum)); | |
2533 } | |
2534 for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) { | |
2535 aes_enc_key(xmm_result, xmm_temp, key, key_offset); | |
2536 } | |
2537 load_key(xmm_temp, key, 0xe0); | |
2538 __ aesenclast(xmm_result, xmm_temp); | |
2539 | |
2540 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output | |
2541 // no need to store r to memory until we exit | |
2542 __ addptr(pos, AESBlockSize); | |
2543 __ subptr(len_reg, AESBlockSize); | |
2544 __ jcc(Assembler::notEqual, L_loopTop_256); | |
2545 __ jmp(L_exit); | |
2546 | |
2547 return start; | |
2548 } | |
2549 | |
2550 | |
2551 // CBC AES Decryption. | |
2552 // In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time. | |
2553 // | |
2554 // Arguments: | |
2555 // | |
2556 // Inputs: | |
2557 // c_rarg0 - source byte array address | |
2558 // c_rarg1 - destination byte array address | |
2559 // c_rarg2 - K (key) in little endian int array | |
2560 // c_rarg3 - r vector byte array address | |
2561 // c_rarg4 - input length | |
2562 // | |
17670 | 2563 // Output: |
2564 // rax - input length | |
2565 // | |
6894 | 2566 |
2567 address generate_cipherBlockChaining_decryptAESCrypt() { | |
7427 | 2568 assert(UseAES, "need AES instructions and misaligned SSE support"); |
6894 | 2569 __ align(CodeEntryAlignment); |
2570 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); | |
2571 address start = __ pc(); | |
2572 | |
2573 Label L_exit, L_key_192_256, L_key_256; | |
2574 Label L_singleBlock_loopTop_128; | |
2575 Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256; | |
2576 const Register from = rsi; // source array address | |
2577 const Register to = rdx; // destination array address | |
2578 const Register key = rcx; // key array address | |
2579 const Register rvec = rdi; // r byte array initialized from initvector array address | |
2580 // and left with the results of the last encryption block | |
2581 const Register len_reg = rbx; // src len (must be multiple of blocksize 16) | |
2582 const Register pos = rax; | |
2583 | |
2584 // xmm register assignments for the loops below | |
2585 const XMMRegister xmm_result = xmm0; | |
2586 const XMMRegister xmm_temp = xmm1; | |
2587 // first 6 keys preloaded into xmm2-xmm7 | |
2588 const int XMM_REG_NUM_KEY_FIRST = 2; | |
2589 const int XMM_REG_NUM_KEY_LAST = 7; | |
2590 const int FIRST_NON_REG_KEY_offset = 0x70; | |
2591 const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); | |
2592 | |
2593 __ enter(); // required for proper stackwalking of RuntimeStub frame | |
2594 handleSOERegisters(true /*saving*/); | |
2595 | |
2596 // load registers from incoming parameters | |
2597 const Address from_param(rbp, 8+0); | |
2598 const Address to_param (rbp, 8+4); | |
2599 const Address key_param (rbp, 8+8); | |
2600 const Address rvec_param (rbp, 8+12); | |
2601 const Address len_param (rbp, 8+16); | |
2602 __ movptr(from , from_param); | |
2603 __ movptr(to , to_param); | |
2604 __ movptr(key , key_param); | |
2605 __ movptr(rvec , rvec_param); | |
2606 __ movptr(len_reg , len_param); | |
2607 | |
2608 // the java expanded key ordering is rotated one position from what we want | |
2609 // so we start from 0x10 here and hit 0x00 last | |
2610 const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front | |
2611 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); | |
2612 // load up xmm regs 2 thru 6 with first 5 keys | |
2613 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | |
2614 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); | |
2615 offset += 0x10; | |
2616 } | |
2617 | |
2618 // inside here, use the rvec register to point to previous block cipher | |
2619 // with which we xor at the end of each newly decrypted block | |
2620 const Register prev_block_cipher_ptr = rvec; | |
2621 | |
2622 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) | |
2623 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); | |
2624 __ cmpl(rax, 44); | |
2625 __ jcc(Assembler::notEqual, L_key_192_256); | |
2626 | |
2627 | |
2628 // 128-bit code follows here, parallelized | |
7427 | 2629 __ movl(pos, 0); |
2630 __ align(OptoLoopAlignment); | |
2631 __ BIND(L_singleBlock_loopTop_128); | |
6894 | 2632 __ cmpptr(len_reg, 0); // any blocks left?? |
2633 __ jcc(Assembler::equal, L_exit); | |
2634 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input | |
2635 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds | |
2636 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | |
2637 __ aesdec(xmm_result, as_XMMRegister(rnum)); | |
2638 } | |
2639 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) { // 128-bit runs up to key offset a0 | |
2640 aes_dec_key(xmm_result, xmm_temp, key, key_offset); | |
2641 } | |
2642 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 | |
2643 __ aesdeclast(xmm_result, xmm_temp); | |
2644 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); | |
2645 __ pxor (xmm_result, xmm_temp); // xor with the current r vector | |
2646 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output | |
2647 // no need to store r to memory until we exit | |
2648 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr | |
2649 __ addptr(pos, AESBlockSize); | |
2650 __ subptr(len_reg, AESBlockSize); | |
2651 __ jmp(L_singleBlock_loopTop_128); | |
2652 | |
2653 | |
2654 __ BIND(L_exit); | |
2655 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); | |
2656 __ movptr(rvec , rvec_param); // restore this since used in loop | |
2657 __ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object | |
2658 handleSOERegisters(false /*restoring*/); | |
17670 | 2659 __ movptr(rax, len_param); // return length |
6894 | 2660 __ leave(); // required for proper stackwalking of RuntimeStub frame |
2661 __ ret(0); | |
2662 | |
2663 | |
2664 __ BIND(L_key_192_256); | |
2665 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) | |
2666 __ cmpl(rax, 52); | |
2667 __ jcc(Assembler::notEqual, L_key_256); | |
2668 | |
2669 // 192-bit code follows here (could be optimized to use parallelism) | |
7427 | 2670 __ movl(pos, 0); |
6894 | 2671 __ align(OptoLoopAlignment); |
2672 __ BIND(L_singleBlock_loopTop_192); | |
2673 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input | |
2674 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds | |
2675 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | |
2676 __ aesdec(xmm_result, as_XMMRegister(rnum)); | |
2677 } | |
2678 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) { // 192-bit runs up to key offset c0 | |
2679 aes_dec_key(xmm_result, xmm_temp, key, key_offset); | |
2680 } | |
2681 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 | |
2682 __ aesdeclast(xmm_result, xmm_temp); | |
2683 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); | |
2684 __ pxor (xmm_result, xmm_temp); // xor with the current r vector | |
2685 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output | |
2686 // no need to store r to memory until we exit | |
2687 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr | |
2688 __ addptr(pos, AESBlockSize); | |
2689 __ subptr(len_reg, AESBlockSize); | |
2690 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); | |
2691 __ jmp(L_exit); | |
2692 | |
2693 __ BIND(L_key_256); | |
2694 // 256-bit code follows here (could be optimized to use parallelism) | |
7427 | 2695 __ movl(pos, 0); |
6894 | 2696 __ align(OptoLoopAlignment); |
2697 __ BIND(L_singleBlock_loopTop_256); | |
2698 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input | |
2699 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds | |
2700 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { | |
2701 __ aesdec(xmm_result, as_XMMRegister(rnum)); | |
2702 } | |
2703 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0 | |
2704 aes_dec_key(xmm_result, xmm_temp, key, key_offset); | |
2705 } | |
2706 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 | |
2707 __ aesdeclast(xmm_result, xmm_temp); | |
2708 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); | |
2709 __ pxor (xmm_result, xmm_temp); // xor with the current r vector | |
2710 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output | |
2711 // no need to store r to memory until we exit | |
2712 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr | |
2713 __ addptr(pos, AESBlockSize); | |
2714 __ subptr(len_reg, AESBlockSize); | |
2715 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256); | |
2716 __ jmp(L_exit); | |
2717 | |
2718 return start; | |
2719 } | |
2720 | |
11080
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2721 /** |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2722 * Arguments: |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2723 * |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2724 * Inputs: |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2725 * rsp(4) - int crc |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2726 * rsp(8) - byte* buf |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2727 * rsp(12) - int length |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2728 * |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2729 * Ouput: |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2730 * rax - int crc result |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2731 */ |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2732 address generate_updateBytesCRC32() { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2733 assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions"); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2734 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2735 __ align(CodeEntryAlignment); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2736 StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2737 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2738 address start = __ pc(); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2739 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2740 const Register crc = rdx; // crc |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2741 const Register buf = rsi; // source java byte array address |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2742 const Register len = rcx; // length |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2743 const Register table = rdi; // crc_table address (reuse register) |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2744 const Register tmp = rbx; |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2745 assert_different_registers(crc, buf, len, table, tmp, rax); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2746 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2747 BLOCK_COMMENT("Entry:"); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2748 __ enter(); // required for proper stackwalking of RuntimeStub frame |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2749 __ push(rsi); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2750 __ push(rdi); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2751 __ push(rbx); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2752 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2753 Address crc_arg(rbp, 8 + 0); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2754 Address buf_arg(rbp, 8 + 4); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2755 Address len_arg(rbp, 8 + 8); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2756 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2757 // Load up: |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2758 __ movl(crc, crc_arg); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2759 __ movptr(buf, buf_arg); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2760 __ movl(len, len_arg); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2761 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2762 __ kernel_crc32(crc, buf, len, table, tmp); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2763 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2764 __ movl(rax, crc); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2765 __ pop(rbx); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2766 __ pop(rdi); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2767 __ pop(rsi); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2768 __ leave(); // required for proper stackwalking of RuntimeStub frame |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2769 __ ret(0); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2770 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2771 return start; |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2772 } |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2773 |
11127 | 2774 // Safefetch stubs. |
2775 void generate_safefetch(const char* name, int size, address* entry, | |
2776 address* fault_pc, address* continuation_pc) { | |
2777 // safefetch signatures: | |
2778 // int SafeFetch32(int* adr, int errValue); | |
2779 // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); | |
2780 | |
2781 StubCodeMark mark(this, "StubRoutines", name); | |
2782 | |
2783 // Entry point, pc or function descriptor. | |
2784 *entry = __ pc(); | |
2785 | |
2786 __ movl(rax, Address(rsp, 0x8)); | |
2787 __ movl(rcx, Address(rsp, 0x4)); | |
2788 // Load *adr into eax, may fault. | |
2789 *fault_pc = __ pc(); | |
2790 switch (size) { | |
2791 case 4: | |
2792 // int32_t | |
2793 __ movl(rax, Address(rcx, 0)); | |
2794 break; | |
2795 case 8: | |
2796 // int64_t | |
2797 Unimplemented(); | |
2798 break; | |
2799 default: | |
2800 ShouldNotReachHere(); | |
2801 } | |
2802 | |
2803 // Return errValue or *adr. | |
2804 *continuation_pc = __ pc(); | |
2805 __ ret(0); | |
2806 } | |
6894 | 2807 |
0 | 2808 public: |
2809 // Information about frame layout at time of blocking runtime call. | |
2810 // Note that we only have to preserve callee-saved registers since | |
2811 // the compilers are responsible for supplying a continuation point | |
2812 // if they expect all registers to be preserved. | |
2813 enum layout { | |
2814 thread_off, // last_java_sp | |
3781
d83ac25d0304
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
2324
diff
changeset
|
2815 arg1_off, |
d83ac25d0304
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
2324
diff
changeset
|
2816 arg2_off, |
0 | 2817 rbp_off, // callee saved register |
2818 ret_pc, | |
2819 framesize | |
2820 }; | |
2821 | |
2822 private: | |
2823 | |
2824 #undef __ | |
2825 #define __ masm-> | |
2826 | |
2827 //------------------------------------------------------------------------------------------------------------------------ | |
2828 // Continuation point for throwing of implicit exceptions that are not handled in | |
2829 // the current activation. Fabricates an exception oop and initiates normal | |
2830 // exception dispatching in this frame. | |
2831 // | |
2832 // Previously the compiler (c2) allowed for callee save registers on Java calls. | |
2833 // This is no longer true after adapter frames were removed but could possibly | |
2834 // be brought back in the future if the interpreter code was reworked and it | |
2835 // was deemed worthwhile. The comment below was left to describe what must | |
2836 // happen here if callee saves were resurrected. As it stands now this stub | |
2837 // could actually be a vanilla BufferBlob and have now oopMap at all. | |
2838 // Since it doesn't make much difference we've chosen to leave it the | |
2839 // way it was in the callee save days and keep the comment. | |
2840 | |
2841 // If we need to preserve callee-saved values we need a callee-saved oop map and | |
2842 // therefore have to make these stubs into RuntimeStubs rather than BufferBlobs. | |
2843 // If the compiler needs all registers to be preserved between the fault | |
2844 // point and the exception handler then it must assume responsibility for that in | |
2845 // AbstractCompiler::continuation_for_implicit_null_exception or | |
2846 // continuation_for_implicit_division_by_zero_exception. All other implicit | |
2847 // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are | |
2848 // either at call sites or otherwise assume that stack unwinding will be initiated, | |
2849 // so caller saved registers were assumed volatile in the compiler. | |
2850 address generate_throw_exception(const char* name, address runtime_entry, | |
3937 | 2851 Register arg1 = noreg, Register arg2 = noreg) { |
0 | 2852 |
2853 int insts_size = 256; | |
2854 int locs_size = 32; | |
2855 | |
2856 CodeBuffer code(name, insts_size, locs_size); | |
2857 OopMapSet* oop_maps = new OopMapSet(); | |
2858 MacroAssembler* masm = new MacroAssembler(&code); | |
2859 | |
2860 address start = __ pc(); | |
2861 | |
2862 // This is an inlined and slightly modified version of call_VM | |
2863 // which has the ability to fetch the return PC out of | |
2864 // thread-local storage and also sets up last_Java_sp slightly | |
2865 // differently than the real call_VM | |
2866 Register java_thread = rbx; | |
2867 __ get_thread(java_thread); | |
2868 | |
2869 __ enter(); // required for proper stackwalking of RuntimeStub frame | |
2870 | |
2871 // pc and rbp, already pushed | |
304 | 2872 __ subptr(rsp, (framesize-2) * wordSize); // prolog |
0 | 2873 |
2874 // Frame is now completed as far as size and linkage. | |
2875 | |
2876 int frame_complete = __ pc() - start; | |
2877 | |
2878 // push java thread (becomes first argument of C function) | |
304 | 2879 __ movptr(Address(rsp, thread_off * wordSize), java_thread); |
3781
d83ac25d0304
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
2324
diff
changeset
|
2880 if (arg1 != noreg) { |
d83ac25d0304
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
2324
diff
changeset
|
2881 __ movptr(Address(rsp, arg1_off * wordSize), arg1); |
d83ac25d0304
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
2324
diff
changeset
|
2882 } |
d83ac25d0304
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
2324
diff
changeset
|
2883 if (arg2 != noreg) { |
d83ac25d0304
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
2324
diff
changeset
|
2884 assert(arg1 != noreg, "missing reg arg"); |
d83ac25d0304
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
2324
diff
changeset
|
2885 __ movptr(Address(rsp, arg2_off * wordSize), arg2); |
d83ac25d0304
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
2324
diff
changeset
|
2886 } |
0 | 2887 |
2888 // Set up last_Java_sp and last_Java_fp | |
2889 __ set_last_Java_frame(java_thread, rsp, rbp, NULL); | |
2890 | |
2891 // Call runtime | |
2892 BLOCK_COMMENT("call runtime_entry"); | |
2893 __ call(RuntimeAddress(runtime_entry)); | |
2894 // Generate oop map | |
2895 OopMap* map = new OopMap(framesize, 0); | |
2896 oop_maps->add_gc_map(__ pc() - start, map); | |
2897 | |
2898 // restore the thread (cannot use the pushed argument since arguments | |
2899 // may be overwritten by C code generated by an optimizing compiler); | |
2900 // however can use the register value directly if it is callee saved. | |
2901 __ get_thread(java_thread); | |
2902 | |
2903 __ reset_last_Java_frame(java_thread, true, false); | |
2904 | |
2905 __ leave(); // required for proper stackwalking of RuntimeStub frame | |
2906 | |
2907 // check for pending exceptions | |
2908 #ifdef ASSERT | |
2909 Label L; | |
304 | 2910 __ cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); |
0 | 2911 __ jcc(Assembler::notEqual, L); |
2912 __ should_not_reach_here(); | |
2913 __ bind(L); | |
2914 #endif /* ASSERT */ | |
2915 __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); | |
2916 | |
2917 | |
2918 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, framesize, oop_maps, false); | |
2919 return stub->entry_point(); | |
2920 } | |
2921 | |
2922 | |
2923 void create_control_words() { | |
2924 // Round to nearest, 53-bit mode, exceptions masked | |
2925 StubRoutines::_fpu_cntrl_wrd_std = 0x027F; | |
2926 // Round to zero, 53-bit mode, exception mased | |
2927 StubRoutines::_fpu_cntrl_wrd_trunc = 0x0D7F; | |
2928 // Round to nearest, 24-bit mode, exceptions masked | |
2929 StubRoutines::_fpu_cntrl_wrd_24 = 0x007F; | |
2930 // Round to nearest, 64-bit mode, exceptions masked | |
2931 StubRoutines::_fpu_cntrl_wrd_64 = 0x037F; | |
2932 // Round to nearest, 64-bit mode, exceptions masked | |
2933 StubRoutines::_mxcsr_std = 0x1F80; | |
2934 // Note: the following two constants are 80-bit values | |
2935 // layout is critical for correct loading by FPU. | |
2936 // Bias for strict fp multiply/divide | |
2937 StubRoutines::_fpu_subnormal_bias1[0]= 0x00000000; // 2^(-15360) == 0x03ff 8000 0000 0000 0000 | |
2938 StubRoutines::_fpu_subnormal_bias1[1]= 0x80000000; | |
2939 StubRoutines::_fpu_subnormal_bias1[2]= 0x03ff; | |
2940 // Un-Bias for strict fp multiply/divide | |
2941 StubRoutines::_fpu_subnormal_bias2[0]= 0x00000000; // 2^(+15360) == 0x7bff 8000 0000 0000 0000 | |
2942 StubRoutines::_fpu_subnormal_bias2[1]= 0x80000000; | |
2943 StubRoutines::_fpu_subnormal_bias2[2]= 0x7bff; | |
2944 } | |
2945 | |
2946 //--------------------------------------------------------------------------- | |
2947 // Initialization | |
2948 | |
2949 void generate_initial() { | |
2950 // Generates all stubs and initializes the entry points | |
2951 | |
2952 //------------------------------------------------------------------------------------------------------------------------ | |
2953 // entry points that exist in all platforms | |
2954 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than | |
2955 // the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp. | |
2956 StubRoutines::_forward_exception_entry = generate_forward_exception(); | |
2957 | |
2958 StubRoutines::_call_stub_entry = | |
2959 generate_call_stub(StubRoutines::_call_stub_return_address); | |
2960 // is referenced by megamorphic call | |
2961 StubRoutines::_catch_exception_entry = generate_catch_exception(); | |
2962 | |
2963 // These are currently used by Solaris/Intel | |
2964 StubRoutines::_atomic_xchg_entry = generate_atomic_xchg(); | |
2965 | |
2966 StubRoutines::_handler_for_unsafe_access_entry = | |
2967 generate_handler_for_unsafe_access(); | |
2968 | |
2969 // platform dependent | |
2970 create_control_words(); | |
2971 | |
304 | 2972 StubRoutines::x86::_verify_mxcsr_entry = generate_verify_mxcsr(); |
2973 StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = generate_verify_fpu_cntrl_wrd(); | |
0 | 2974 StubRoutines::_d2i_wrapper = generate_d2i_wrapper(T_INT, |
2975 CAST_FROM_FN_PTR(address, SharedRuntime::d2i)); | |
2976 StubRoutines::_d2l_wrapper = generate_d2i_wrapper(T_LONG, | |
2977 CAST_FROM_FN_PTR(address, SharedRuntime::d2l)); | |
3781
d83ac25d0304
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
2324
diff
changeset
|
2978 |
d83ac25d0304
7055355: JSR 292: crash while throwing WrongMethodTypeException
never
parents:
2324
diff
changeset
|
2979 // Build this early so it's available for the interpreter |
4743 | 2980 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError)); |
11080
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2981 |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2982 if (UseCRC32Intrinsics) { |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2983 // set table address before stub generation which use it |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2984 StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table; |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2985 StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); |
b800986664f4
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
drchase
parents:
10973
diff
changeset
|
2986 } |
0 | 2987 } |
2988 | |
2989 | |
2990 void generate_all() { | |
2991 // Generates all stubs and initializes the entry points | |
2992 | |
2993 // These entry points require SharedInfo::stack0 to be set up in non-core builds | |
2994 // and need to be relocatable, so they each fabricate a RuntimeStub internally. | |
3937 | 2995 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError)); |
2996 StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError)); | |
2997 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call)); | |
0 | 2998 |
2999 //------------------------------------------------------------------------------------------------------------------------ | |
3000 // entry points that are platform specific | |
3001 | |
3002 // support for verify_oop (must happen after universe_init) | |
3003 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); | |
3004 | |
3005 // arraycopy stubs used by compilers | |
3006 generate_arraycopy_stubs(); | |
710 | 3007 |
1174
ddb7834449d0
6849984: Value methods for platform dependent math functions constant fold incorrectly
never
parents:
845
diff
changeset
|
3008 generate_math_stubs(); |
6894 | 3009 |
3010 // don't bother generating these AES intrinsic stubs unless global flag is set | |
3011 if (UseAESIntrinsics) { | |
3012 StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others | |
3013 | |
3014 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); | |
3015 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); | |
3016 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); | |
3017 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(); | |
3018 } | |
11127 | 3019 |
3020 // Safefetch stubs. | |
3021 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, | |
3022 &StubRoutines::_safefetch32_fault_pc, | |
3023 &StubRoutines::_safefetch32_continuation_pc); | |
3024 StubRoutines::_safefetchN_entry = StubRoutines::_safefetch32_entry; | |
3025 StubRoutines::_safefetchN_fault_pc = StubRoutines::_safefetch32_fault_pc; | |
3026 StubRoutines::_safefetchN_continuation_pc = StubRoutines::_safefetch32_continuation_pc; | |
0 | 3027 } |
3028 | |
3029 | |
3030 public: | |
3031 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { | |
3032 if (all) { | |
3033 generate_all(); | |
3034 } else { | |
3035 generate_initial(); | |
3036 } | |
3037 } | |
3038 }; // end class declaration | |
3039 | |
3040 | |
3041 void StubGenerator_generate(CodeBuffer* code, bool all) { | |
3042 StubGenerator g(code, all); | |
3043 } |