0
|
1 /*
|
|
2 * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
|
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
4 *
|
|
5 * This code is free software; you can redistribute it and/or modify it
|
|
6 * under the terms of the GNU General Public License version 2 only, as
|
|
7 * published by the Free Software Foundation.
|
|
8 *
|
|
9 * This code is distributed in the hope that it will be useful, but WITHOUT
|
|
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
12 * version 2 for more details (a copy is included in the LICENSE file that
|
|
13 * accompanied this code).
|
|
14 *
|
|
15 * You should have received a copy of the GNU General Public License version
|
|
16 * 2 along with this work; if not, write to the Free Software Foundation,
|
|
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
18 *
|
|
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
|
|
20 * CA 95054 USA or visit www.sun.com if you need additional information or
|
|
21 * have any questions.
|
|
22 *
|
|
23 */
|
|
24
|
|
25 #include "incls/_precompiled.incl"
|
|
26 #include "incls/_stubGenerator_sparc.cpp.incl"
|
|
27
|
|
28 // Declaration and definition of StubGenerator (no .hpp file).
|
|
29 // For a more detailed description of the stub routine structure
|
|
30 // see the comment in stubRoutines.hpp.
|
|
31
|
|
32 #define __ _masm->
|
|
33
|
|
34 #ifdef PRODUCT
|
|
35 #define BLOCK_COMMENT(str) /* nothing */
|
|
36 #else
|
|
37 #define BLOCK_COMMENT(str) __ block_comment(str)
|
|
38 #endif
|
|
39
|
|
40 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
|
|
41
|
|
42 // Note: The register L7 is used as L7_thread_cache, and may not be used
|
|
43 // any other way within this module.
|
|
44
|
|
45
|
|
46 static const Register& Lstub_temp = L2;
|
|
47
|
|
48 // -------------------------------------------------------------------------------------------------------------------------
|
|
49 // Stub Code definitions
|
|
50
|
|
51 static address handle_unsafe_access() {
|
|
52 JavaThread* thread = JavaThread::current();
|
|
53 address pc = thread->saved_exception_pc();
|
|
54 address npc = thread->saved_exception_npc();
|
|
55 // pc is the instruction which we must emulate
|
|
56 // doing a no-op is fine: return garbage from the load
|
|
57
|
|
58 // request an async exception
|
|
59 thread->set_pending_unsafe_access_error();
|
|
60
|
|
61 // return address of next instruction to execute
|
|
62 return npc;
|
|
63 }
|
|
64
|
|
65 class StubGenerator: public StubCodeGenerator {
|
|
66 private:
|
|
67
|
|
68 #ifdef PRODUCT
|
|
69 #define inc_counter_np(a,b,c) (0)
|
|
70 #else
|
|
71 void inc_counter_np_(int& counter, Register t1, Register t2) {
|
|
72 Address counter_addr(t2, (address) &counter);
|
|
73 __ sethi(counter_addr);
|
|
74 __ ld(counter_addr, t1);
|
|
75 __ inc(t1);
|
|
76 __ st(t1, counter_addr);
|
|
77 }
|
|
78 #define inc_counter_np(counter, t1, t2) \
|
|
79 BLOCK_COMMENT("inc_counter " #counter); \
|
|
80 inc_counter_np_(counter, t1, t2);
|
|
81 #endif
|
|
82
|
|
83 //----------------------------------------------------------------------------------------------------
|
|
84 // Call stubs are used to call Java from C
|
|
85
|
|
86 address generate_call_stub(address& return_pc) {
|
|
87 StubCodeMark mark(this, "StubRoutines", "call_stub");
|
|
88 address start = __ pc();
|
|
89
|
|
90 // Incoming arguments:
|
|
91 //
|
|
92 // o0 : call wrapper address
|
|
93 // o1 : result (address)
|
|
94 // o2 : result type
|
|
95 // o3 : method
|
|
96 // o4 : (interpreter) entry point
|
|
97 // o5 : parameters (address)
|
|
98 // [sp + 0x5c]: parameter size (in words)
|
|
99 // [sp + 0x60]: thread
|
|
100 //
|
|
101 // +---------------+ <--- sp + 0
|
|
102 // | |
|
|
103 // . reg save area .
|
|
104 // | |
|
|
105 // +---------------+ <--- sp + 0x40
|
|
106 // | |
|
|
107 // . extra 7 slots .
|
|
108 // | |
|
|
109 // +---------------+ <--- sp + 0x5c
|
|
110 // | param. size |
|
|
111 // +---------------+ <--- sp + 0x60
|
|
112 // | thread |
|
|
113 // +---------------+
|
|
114 // | |
|
|
115
|
|
116 // note: if the link argument position changes, adjust
|
|
117 // the code in frame::entry_frame_call_wrapper()
|
|
118
|
|
119 const Argument link = Argument(0, false); // used only for GC
|
|
120 const Argument result = Argument(1, false);
|
|
121 const Argument result_type = Argument(2, false);
|
|
122 const Argument method = Argument(3, false);
|
|
123 const Argument entry_point = Argument(4, false);
|
|
124 const Argument parameters = Argument(5, false);
|
|
125 const Argument parameter_size = Argument(6, false);
|
|
126 const Argument thread = Argument(7, false);
|
|
127
|
|
128 // setup thread register
|
|
129 __ ld_ptr(thread.as_address(), G2_thread);
|
|
130
|
|
131 #ifdef ASSERT
|
|
132 // make sure we have no pending exceptions
|
|
133 { const Register t = G3_scratch;
|
|
134 Label L;
|
|
135 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t);
|
|
136 __ br_null(t, false, Assembler::pt, L);
|
|
137 __ delayed()->nop();
|
|
138 __ stop("StubRoutines::call_stub: entered with pending exception");
|
|
139 __ bind(L);
|
|
140 }
|
|
141 #endif
|
|
142
|
|
143 // create activation frame & allocate space for parameters
|
|
144 { const Register t = G3_scratch;
|
|
145 __ ld_ptr(parameter_size.as_address(), t); // get parameter size (in words)
|
|
146 __ add(t, frame::memory_parameter_word_sp_offset, t); // add space for save area (in words)
|
|
147 __ round_to(t, WordsPerLong); // make sure it is multiple of 2 (in words)
|
|
148 __ sll(t, Interpreter::logStackElementSize(), t); // compute number of bytes
|
|
149 __ neg(t); // negate so it can be used with save
|
|
150 __ save(SP, t, SP); // setup new frame
|
|
151 }
|
|
152
|
|
153 // +---------------+ <--- sp + 0
|
|
154 // | |
|
|
155 // . reg save area .
|
|
156 // | |
|
|
157 // +---------------+ <--- sp + 0x40
|
|
158 // | |
|
|
159 // . extra 7 slots .
|
|
160 // | |
|
|
161 // +---------------+ <--- sp + 0x5c
|
|
162 // | empty slot | (only if parameter size is even)
|
|
163 // +---------------+
|
|
164 // | |
|
|
165 // . parameters .
|
|
166 // | |
|
|
167 // +---------------+ <--- fp + 0
|
|
168 // | |
|
|
169 // . reg save area .
|
|
170 // | |
|
|
171 // +---------------+ <--- fp + 0x40
|
|
172 // | |
|
|
173 // . extra 7 slots .
|
|
174 // | |
|
|
175 // +---------------+ <--- fp + 0x5c
|
|
176 // | param. size |
|
|
177 // +---------------+ <--- fp + 0x60
|
|
178 // | thread |
|
|
179 // +---------------+
|
|
180 // | |
|
|
181
|
|
182 // pass parameters if any
|
|
183 BLOCK_COMMENT("pass parameters if any");
|
|
184 { const Register src = parameters.as_in().as_register();
|
|
185 const Register dst = Lentry_args;
|
|
186 const Register tmp = G3_scratch;
|
|
187 const Register cnt = G4_scratch;
|
|
188
|
|
189 // test if any parameters & setup of Lentry_args
|
|
190 Label exit;
|
|
191 __ ld_ptr(parameter_size.as_in().as_address(), cnt); // parameter counter
|
|
192 __ add( FP, STACK_BIAS, dst );
|
|
193 __ tst(cnt);
|
|
194 __ br(Assembler::zero, false, Assembler::pn, exit);
|
|
195 __ delayed()->sub(dst, BytesPerWord, dst); // setup Lentry_args
|
|
196
|
|
197 // copy parameters if any
|
|
198 Label loop;
|
|
199 __ BIND(loop);
|
|
200 // Store tag first.
|
|
201 if (TaggedStackInterpreter) {
|
|
202 __ ld_ptr(src, 0, tmp);
|
|
203 __ add(src, BytesPerWord, src); // get next
|
|
204 __ st_ptr(tmp, dst, Interpreter::tag_offset_in_bytes());
|
|
205 }
|
|
206 // Store parameter value
|
|
207 __ ld_ptr(src, 0, tmp);
|
|
208 __ add(src, BytesPerWord, src);
|
|
209 __ st_ptr(tmp, dst, Interpreter::value_offset_in_bytes());
|
|
210 __ deccc(cnt);
|
|
211 __ br(Assembler::greater, false, Assembler::pt, loop);
|
|
212 __ delayed()->sub(dst, Interpreter::stackElementSize(), dst);
|
|
213
|
|
214 // done
|
|
215 __ BIND(exit);
|
|
216 }
|
|
217
|
|
218 // setup parameters, method & call Java function
|
|
219 #ifdef ASSERT
|
|
220 // layout_activation_impl checks it's notion of saved SP against
|
|
221 // this register, so if this changes update it as well.
|
|
222 const Register saved_SP = Lscratch;
|
|
223 __ mov(SP, saved_SP); // keep track of SP before call
|
|
224 #endif
|
|
225
|
|
226 // setup parameters
|
|
227 const Register t = G3_scratch;
|
|
228 __ ld_ptr(parameter_size.as_in().as_address(), t); // get parameter size (in words)
|
|
229 __ sll(t, Interpreter::logStackElementSize(), t); // compute number of bytes
|
|
230 __ sub(FP, t, Gargs); // setup parameter pointer
|
|
231 #ifdef _LP64
|
|
232 __ add( Gargs, STACK_BIAS, Gargs ); // Account for LP64 stack bias
|
|
233 #endif
|
|
234 __ mov(SP, O5_savedSP);
|
|
235
|
|
236
|
|
237 // do the call
|
|
238 //
|
|
239 // the following register must be setup:
|
|
240 //
|
|
241 // G2_thread
|
|
242 // G5_method
|
|
243 // Gargs
|
|
244 BLOCK_COMMENT("call Java function");
|
|
245 __ jmpl(entry_point.as_in().as_register(), G0, O7);
|
|
246 __ delayed()->mov(method.as_in().as_register(), G5_method); // setup method
|
|
247
|
|
248 BLOCK_COMMENT("call_stub_return_address:");
|
|
249 return_pc = __ pc();
|
|
250
|
|
251 // The callee, if it wasn't interpreted, can return with SP changed so
|
|
252 // we can no longer assert of change of SP.
|
|
253
|
|
254 // store result depending on type
|
|
255 // (everything that is not T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE
|
|
256 // is treated as T_INT)
|
|
257 { const Register addr = result .as_in().as_register();
|
|
258 const Register type = result_type.as_in().as_register();
|
|
259 Label is_long, is_float, is_double, is_object, exit;
|
|
260 __ cmp(type, T_OBJECT); __ br(Assembler::equal, false, Assembler::pn, is_object);
|
|
261 __ delayed()->cmp(type, T_FLOAT); __ br(Assembler::equal, false, Assembler::pn, is_float);
|
|
262 __ delayed()->cmp(type, T_DOUBLE); __ br(Assembler::equal, false, Assembler::pn, is_double);
|
|
263 __ delayed()->cmp(type, T_LONG); __ br(Assembler::equal, false, Assembler::pn, is_long);
|
|
264 __ delayed()->nop();
|
|
265
|
|
266 // store int result
|
|
267 __ st(O0, addr, G0);
|
|
268
|
|
269 __ BIND(exit);
|
|
270 __ ret();
|
|
271 __ delayed()->restore();
|
|
272
|
|
273 __ BIND(is_object);
|
|
274 __ ba(false, exit);
|
|
275 __ delayed()->st_ptr(O0, addr, G0);
|
|
276
|
|
277 __ BIND(is_float);
|
|
278 __ ba(false, exit);
|
|
279 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
|
|
280
|
|
281 __ BIND(is_double);
|
|
282 __ ba(false, exit);
|
|
283 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
|
|
284
|
|
285 __ BIND(is_long);
|
|
286 #ifdef _LP64
|
|
287 __ ba(false, exit);
|
|
288 __ delayed()->st_long(O0, addr, G0); // store entire long
|
|
289 #else
|
|
290 #if defined(COMPILER2)
|
|
291 // All return values are where we want them, except for Longs. C2 returns
|
|
292 // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
|
|
293 // Since the interpreter will return longs in G1 and O0/O1 in the 32bit
|
|
294 // build we simply always use G1.
|
|
295 // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to
|
|
296 // do this here. Unfortunately if we did a rethrow we'd see an machepilog node
|
|
297 // first which would move g1 -> O0/O1 and destroy the exception we were throwing.
|
|
298
|
|
299 __ ba(false, exit);
|
|
300 __ delayed()->stx(G1, addr, G0); // store entire long
|
|
301 #else
|
|
302 __ st(O1, addr, BytesPerInt);
|
|
303 __ ba(false, exit);
|
|
304 __ delayed()->st(O0, addr, G0);
|
|
305 #endif /* COMPILER2 */
|
|
306 #endif /* _LP64 */
|
|
307 }
|
|
308 return start;
|
|
309 }
|
|
310
|
|
311
|
|
312 //----------------------------------------------------------------------------------------------------
|
|
313 // Return point for a Java call if there's an exception thrown in Java code.
|
|
314 // The exception is caught and transformed into a pending exception stored in
|
|
315 // JavaThread that can be tested from within the VM.
|
|
316 //
|
|
317 // Oexception: exception oop
|
|
318
|
|
319 address generate_catch_exception() {
|
|
320 StubCodeMark mark(this, "StubRoutines", "catch_exception");
|
|
321
|
|
322 address start = __ pc();
|
|
323 // verify that thread corresponds
|
|
324 __ verify_thread();
|
|
325
|
|
326 const Register& temp_reg = Gtemp;
|
|
327 Address pending_exception_addr (G2_thread, 0, in_bytes(Thread::pending_exception_offset()));
|
|
328 Address exception_file_offset_addr(G2_thread, 0, in_bytes(Thread::exception_file_offset ()));
|
|
329 Address exception_line_offset_addr(G2_thread, 0, in_bytes(Thread::exception_line_offset ()));
|
|
330
|
|
331 // set pending exception
|
|
332 __ verify_oop(Oexception);
|
|
333 __ st_ptr(Oexception, pending_exception_addr);
|
|
334 __ set((intptr_t)__FILE__, temp_reg);
|
|
335 __ st_ptr(temp_reg, exception_file_offset_addr);
|
|
336 __ set((intptr_t)__LINE__, temp_reg);
|
|
337 __ st(temp_reg, exception_line_offset_addr);
|
|
338
|
|
339 // complete return to VM
|
|
340 assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before");
|
|
341
|
|
342 Address stub_ret(temp_reg, StubRoutines::_call_stub_return_address);
|
|
343 __ jump_to(stub_ret);
|
|
344 __ delayed()->nop();
|
|
345
|
|
346 return start;
|
|
347 }
|
|
348
|
|
349
|
|
350 //----------------------------------------------------------------------------------------------------
|
|
351 // Continuation point for runtime calls returning with a pending exception
|
|
352 // The pending exception check happened in the runtime or native call stub
|
|
353 // The pending exception in Thread is converted into a Java-level exception
|
|
354 //
|
|
355 // Contract with Java-level exception handler: O0 = exception
|
|
356 // O1 = throwing pc
|
|
357
|
|
358 address generate_forward_exception() {
|
|
359 StubCodeMark mark(this, "StubRoutines", "forward_exception");
|
|
360 address start = __ pc();
|
|
361
|
|
362 // Upon entry, O7 has the return address returning into Java
|
|
363 // (interpreted or compiled) code; i.e. the return address
|
|
364 // becomes the throwing pc.
|
|
365
|
|
366 const Register& handler_reg = Gtemp;
|
|
367
|
|
368 Address exception_addr (G2_thread, 0, in_bytes(Thread::pending_exception_offset()));
|
|
369
|
|
370 #ifdef ASSERT
|
|
371 // make sure that this code is only executed if there is a pending exception
|
|
372 { Label L;
|
|
373 __ ld_ptr(exception_addr, Gtemp);
|
|
374 __ br_notnull(Gtemp, false, Assembler::pt, L);
|
|
375 __ delayed()->nop();
|
|
376 __ stop("StubRoutines::forward exception: no pending exception (1)");
|
|
377 __ bind(L);
|
|
378 }
|
|
379 #endif
|
|
380
|
|
381 // compute exception handler into handler_reg
|
|
382 __ get_thread();
|
|
383 __ ld_ptr(exception_addr, Oexception);
|
|
384 __ verify_oop(Oexception);
|
|
385 __ save_frame(0); // compensates for compiler weakness
|
|
386 __ add(O7->after_save(), frame::pc_return_offset, Lscratch); // save the issuing PC
|
|
387 BLOCK_COMMENT("call exception_handler_for_return_address");
|
|
388 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), Lscratch);
|
|
389 __ mov(O0, handler_reg);
|
|
390 __ restore(); // compensates for compiler weakness
|
|
391
|
|
392 __ ld_ptr(exception_addr, Oexception);
|
|
393 __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC
|
|
394
|
|
395 #ifdef ASSERT
|
|
396 // make sure exception is set
|
|
397 { Label L;
|
|
398 __ br_notnull(Oexception, false, Assembler::pt, L);
|
|
399 __ delayed()->nop();
|
|
400 __ stop("StubRoutines::forward exception: no pending exception (2)");
|
|
401 __ bind(L);
|
|
402 }
|
|
403 #endif
|
|
404 // jump to exception handler
|
|
405 __ jmp(handler_reg, 0);
|
|
406 // clear pending exception
|
|
407 __ delayed()->st_ptr(G0, exception_addr);
|
|
408
|
|
409 return start;
|
|
410 }
|
|
411
|
|
412
|
|
413 //------------------------------------------------------------------------------------------------------------------------
|
|
414 // Continuation point for throwing of implicit exceptions that are not handled in
|
|
415 // the current activation. Fabricates an exception oop and initiates normal
|
|
416 // exception dispatching in this frame. Only callee-saved registers are preserved
|
|
417 // (through the normal register window / RegisterMap handling).
|
|
418 // If the compiler needs all registers to be preserved between the fault
|
|
419 // point and the exception handler then it must assume responsibility for that in
|
|
420 // AbstractCompiler::continuation_for_implicit_null_exception or
|
|
421 // continuation_for_implicit_division_by_zero_exception. All other implicit
|
|
422 // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are
|
|
423 // either at call sites or otherwise assume that stack unwinding will be initiated,
|
|
424 // so caller saved registers were assumed volatile in the compiler.
|
|
425
|
|
426 // Note that we generate only this stub into a RuntimeStub, because it needs to be
|
|
427 // properly traversed and ignored during GC, so we change the meaning of the "__"
|
|
428 // macro within this method.
|
|
429 #undef __
|
|
430 #define __ masm->
|
|
431
|
|
432 address generate_throw_exception(const char* name, address runtime_entry, bool restore_saved_exception_pc) {
|
|
433 #ifdef ASSERT
|
|
434 int insts_size = VerifyThread ? 1 * K : 600;
|
|
435 #else
|
|
436 int insts_size = VerifyThread ? 1 * K : 256;
|
|
437 #endif /* ASSERT */
|
|
438 int locs_size = 32;
|
|
439
|
|
440 CodeBuffer code(name, insts_size, locs_size);
|
|
441 MacroAssembler* masm = new MacroAssembler(&code);
|
|
442
|
|
443 __ verify_thread();
|
|
444
|
|
445 // This is an inlined and slightly modified version of call_VM
|
|
446 // which has the ability to fetch the return PC out of thread-local storage
|
|
447 __ assert_not_delayed();
|
|
448
|
|
449 // Note that we always push a frame because on the SPARC
|
|
450 // architecture, for all of our implicit exception kinds at call
|
|
451 // sites, the implicit exception is taken before the callee frame
|
|
452 // is pushed.
|
|
453 __ save_frame(0);
|
|
454
|
|
455 int frame_complete = __ offset();
|
|
456
|
|
457 if (restore_saved_exception_pc) {
|
|
458 Address saved_exception_pc(G2_thread, 0, in_bytes(JavaThread::saved_exception_pc_offset()));
|
|
459 __ ld_ptr(saved_exception_pc, I7);
|
|
460 __ sub(I7, frame::pc_return_offset, I7);
|
|
461 }
|
|
462
|
|
463 // Note that we always have a runtime stub frame on the top of stack by this point
|
|
464 Register last_java_sp = SP;
|
|
465 // 64-bit last_java_sp is biased!
|
|
466 __ set_last_Java_frame(last_java_sp, G0);
|
|
467 if (VerifyThread) __ mov(G2_thread, O0); // about to be smashed; pass early
|
|
468 __ save_thread(noreg);
|
|
469 // do the call
|
|
470 BLOCK_COMMENT("call runtime_entry");
|
|
471 __ call(runtime_entry, relocInfo::runtime_call_type);
|
|
472 if (!VerifyThread)
|
|
473 __ delayed()->mov(G2_thread, O0); // pass thread as first argument
|
|
474 else
|
|
475 __ delayed()->nop(); // (thread already passed)
|
|
476 __ restore_thread(noreg);
|
|
477 __ reset_last_Java_frame();
|
|
478
|
|
479 // check for pending exceptions. use Gtemp as scratch register.
|
|
480 #ifdef ASSERT
|
|
481 Label L;
|
|
482
|
|
483 Address exception_addr(G2_thread, 0, in_bytes(Thread::pending_exception_offset()));
|
|
484 Register scratch_reg = Gtemp;
|
|
485 __ ld_ptr(exception_addr, scratch_reg);
|
|
486 __ br_notnull(scratch_reg, false, Assembler::pt, L);
|
|
487 __ delayed()->nop();
|
|
488 __ should_not_reach_here();
|
|
489 __ bind(L);
|
|
490 #endif // ASSERT
|
|
491 BLOCK_COMMENT("call forward_exception_entry");
|
|
492 __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
|
|
493 // we use O7 linkage so that forward_exception_entry has the issuing PC
|
|
494 __ delayed()->restore();
|
|
495
|
|
496 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, masm->total_frame_size_in_bytes(0), NULL, false);
|
|
497 return stub->entry_point();
|
|
498 }
|
|
499
|
|
500 #undef __
|
|
501 #define __ _masm->
|
|
502
|
|
503
|
|
504 // Generate a routine that sets all the registers so we
|
|
505 // can tell if the stop routine prints them correctly.
|
|
506 address generate_test_stop() {
|
|
507 StubCodeMark mark(this, "StubRoutines", "test_stop");
|
|
508 address start = __ pc();
|
|
509
|
|
510 int i;
|
|
511
|
|
512 __ save_frame(0);
|
|
513
|
|
514 static jfloat zero = 0.0, one = 1.0;
|
|
515
|
|
516 // put addr in L0, then load through L0 to F0
|
|
517 __ set((intptr_t)&zero, L0); __ ldf( FloatRegisterImpl::S, L0, 0, F0);
|
|
518 __ set((intptr_t)&one, L0); __ ldf( FloatRegisterImpl::S, L0, 0, F1); // 1.0 to F1
|
|
519
|
|
520 // use add to put 2..18 in F2..F18
|
|
521 for ( i = 2; i <= 18; ++i ) {
|
|
522 __ fadd( FloatRegisterImpl::S, F1, as_FloatRegister(i-1), as_FloatRegister(i));
|
|
523 }
|
|
524
|
|
525 // Now put double 2 in F16, double 18 in F18
|
|
526 __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F2, F16 );
|
|
527 __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F18, F18 );
|
|
528
|
|
529 // use add to put 20..32 in F20..F32
|
|
530 for (i = 20; i < 32; i += 2) {
|
|
531 __ fadd( FloatRegisterImpl::D, F16, as_FloatRegister(i-2), as_FloatRegister(i));
|
|
532 }
|
|
533
|
|
534 // put 0..7 in i's, 8..15 in l's, 16..23 in o's, 24..31 in g's
|
|
535 for ( i = 0; i < 8; ++i ) {
|
|
536 if (i < 6) {
|
|
537 __ set( i, as_iRegister(i));
|
|
538 __ set(16 + i, as_oRegister(i));
|
|
539 __ set(24 + i, as_gRegister(i));
|
|
540 }
|
|
541 __ set( 8 + i, as_lRegister(i));
|
|
542 }
|
|
543
|
|
544 __ stop("testing stop");
|
|
545
|
|
546
|
|
547 __ ret();
|
|
548 __ delayed()->restore();
|
|
549
|
|
550 return start;
|
|
551 }
|
|
552
|
|
553
|
|
554 address generate_stop_subroutine() {
|
|
555 StubCodeMark mark(this, "StubRoutines", "stop_subroutine");
|
|
556 address start = __ pc();
|
|
557
|
|
558 __ stop_subroutine();
|
|
559
|
|
560 return start;
|
|
561 }
|
|
562
|
|
563 address generate_flush_callers_register_windows() {
|
|
564 StubCodeMark mark(this, "StubRoutines", "flush_callers_register_windows");
|
|
565 address start = __ pc();
|
|
566
|
|
567 __ flush_windows();
|
|
568 __ retl(false);
|
|
569 __ delayed()->add( FP, STACK_BIAS, O0 );
|
|
570 // The returned value must be a stack pointer whose register save area
|
|
571 // is flushed, and will stay flushed while the caller executes.
|
|
572
|
|
573 return start;
|
|
574 }
|
|
575
|
|
576 // Helper functions for v8 atomic operations.
|
|
577 //
|
|
578 void get_v8_oop_lock_ptr(Register lock_ptr_reg, Register mark_oop_reg, Register scratch_reg) {
|
|
579 if (mark_oop_reg == noreg) {
|
|
580 address lock_ptr = (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr();
|
|
581 __ set((intptr_t)lock_ptr, lock_ptr_reg);
|
|
582 } else {
|
|
583 assert(scratch_reg != noreg, "just checking");
|
|
584 address lock_ptr = (address)StubRoutines::Sparc::_v8_oop_lock_cache;
|
|
585 __ set((intptr_t)lock_ptr, lock_ptr_reg);
|
|
586 __ and3(mark_oop_reg, StubRoutines::Sparc::v8_oop_lock_mask_in_place, scratch_reg);
|
|
587 __ add(lock_ptr_reg, scratch_reg, lock_ptr_reg);
|
|
588 }
|
|
589 }
|
|
590
|
|
591 void generate_v8_lock_prologue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) {
|
|
592
|
|
593 get_v8_oop_lock_ptr(lock_ptr_reg, mark_oop_reg, scratch_reg);
|
|
594 __ set(StubRoutines::Sparc::locked, lock_reg);
|
|
595 // Initialize yield counter
|
|
596 __ mov(G0,yield_reg);
|
|
597
|
|
598 __ BIND(retry);
|
|
599 __ cmp(yield_reg, V8AtomicOperationUnderLockSpinCount);
|
|
600 __ br(Assembler::less, false, Assembler::pt, dontyield);
|
|
601 __ delayed()->nop();
|
|
602
|
|
603 // This code can only be called from inside the VM, this
|
|
604 // stub is only invoked from Atomic::add(). We do not
|
|
605 // want to use call_VM, because _last_java_sp and such
|
|
606 // must already be set.
|
|
607 //
|
|
608 // Save the regs and make space for a C call
|
|
609 __ save(SP, -96, SP);
|
|
610 __ save_all_globals_into_locals();
|
|
611 BLOCK_COMMENT("call os::naked_sleep");
|
|
612 __ call(CAST_FROM_FN_PTR(address, os::naked_sleep));
|
|
613 __ delayed()->nop();
|
|
614 __ restore_globals_from_locals();
|
|
615 __ restore();
|
|
616 // reset the counter
|
|
617 __ mov(G0,yield_reg);
|
|
618
|
|
619 __ BIND(dontyield);
|
|
620
|
|
621 // try to get lock
|
|
622 __ swap(lock_ptr_reg, 0, lock_reg);
|
|
623
|
|
624 // did we get the lock?
|
|
625 __ cmp(lock_reg, StubRoutines::Sparc::unlocked);
|
|
626 __ br(Assembler::notEqual, true, Assembler::pn, retry);
|
|
627 __ delayed()->add(yield_reg,1,yield_reg);
|
|
628
|
|
629 // yes, got lock. do the operation here.
|
|
630 }
|
|
631
|
|
632 void generate_v8_lock_epilogue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) {
|
|
633 __ st(lock_reg, lock_ptr_reg, 0); // unlock
|
|
634 }
|
|
635
|
|
636 // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest).
|
|
637 //
|
|
638 // Arguments :
|
|
639 //
|
|
640 // exchange_value: O0
|
|
641 // dest: O1
|
|
642 //
|
|
643 // Results:
|
|
644 //
|
|
645 // O0: the value previously stored in dest
|
|
646 //
|
|
647 address generate_atomic_xchg() {
|
|
648 StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
|
|
649 address start = __ pc();
|
|
650
|
|
651 if (UseCASForSwap) {
|
|
652 // Use CAS instead of swap, just in case the MP hardware
|
|
653 // prefers to work with just one kind of synch. instruction.
|
|
654 Label retry;
|
|
655 __ BIND(retry);
|
|
656 __ mov(O0, O3); // scratch copy of exchange value
|
|
657 __ ld(O1, 0, O2); // observe the previous value
|
|
658 // try to replace O2 with O3
|
|
659 __ cas_under_lock(O1, O2, O3,
|
|
660 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false);
|
|
661 __ cmp(O2, O3);
|
|
662 __ br(Assembler::notEqual, false, Assembler::pn, retry);
|
|
663 __ delayed()->nop();
|
|
664
|
|
665 __ retl(false);
|
|
666 __ delayed()->mov(O2, O0); // report previous value to caller
|
|
667
|
|
668 } else {
|
|
669 if (VM_Version::v9_instructions_work()) {
|
|
670 __ retl(false);
|
|
671 __ delayed()->swap(O1, 0, O0);
|
|
672 } else {
|
|
673 const Register& lock_reg = O2;
|
|
674 const Register& lock_ptr_reg = O3;
|
|
675 const Register& yield_reg = O4;
|
|
676
|
|
677 Label retry;
|
|
678 Label dontyield;
|
|
679
|
|
680 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
|
|
681 // got the lock, do the swap
|
|
682 __ swap(O1, 0, O0);
|
|
683
|
|
684 generate_v8_lock_epilogue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
|
|
685 __ retl(false);
|
|
686 __ delayed()->nop();
|
|
687 }
|
|
688 }
|
|
689
|
|
690 return start;
|
|
691 }
|
|
692
|
|
693
|
|
694 // Support for jint Atomic::cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value)
|
|
695 //
|
|
696 // Arguments :
|
|
697 //
|
|
698 // exchange_value: O0
|
|
699 // dest: O1
|
|
700 // compare_value: O2
|
|
701 //
|
|
702 // Results:
|
|
703 //
|
|
704 // O0: the value previously stored in dest
|
|
705 //
|
|
706 // Overwrites (v8): O3,O4,O5
|
|
707 //
|
|
708 address generate_atomic_cmpxchg() {
|
|
709 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg");
|
|
710 address start = __ pc();
|
|
711
|
|
712 // cmpxchg(dest, compare_value, exchange_value)
|
|
713 __ cas_under_lock(O1, O2, O0,
|
|
714 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false);
|
|
715 __ retl(false);
|
|
716 __ delayed()->nop();
|
|
717
|
|
718 return start;
|
|
719 }
|
|
720
|
|
721 // Support for jlong Atomic::cmpxchg(jlong exchange_value, volatile jlong *dest, jlong compare_value)
|
|
722 //
|
|
723 // Arguments :
|
|
724 //
|
|
725 // exchange_value: O1:O0
|
|
726 // dest: O2
|
|
727 // compare_value: O4:O3
|
|
728 //
|
|
729 // Results:
|
|
730 //
|
|
731 // O1:O0: the value previously stored in dest
|
|
732 //
|
|
733 // This only works on V9, on V8 we don't generate any
|
|
734 // code and just return NULL.
|
|
735 //
|
|
736 // Overwrites: G1,G2,G3
|
|
737 //
|
|
738 address generate_atomic_cmpxchg_long() {
|
|
739 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg_long");
|
|
740 address start = __ pc();
|
|
741
|
|
742 if (!VM_Version::supports_cx8())
|
|
743 return NULL;;
|
|
744 __ sllx(O0, 32, O0);
|
|
745 __ srl(O1, 0, O1);
|
|
746 __ or3(O0,O1,O0); // O0 holds 64-bit value from compare_value
|
|
747 __ sllx(O3, 32, O3);
|
|
748 __ srl(O4, 0, O4);
|
|
749 __ or3(O3,O4,O3); // O3 holds 64-bit value from exchange_value
|
|
750 __ casx(O2, O3, O0);
|
|
751 __ srl(O0, 0, O1); // unpacked return value in O1:O0
|
|
752 __ retl(false);
|
|
753 __ delayed()->srlx(O0, 32, O0);
|
|
754
|
|
755 return start;
|
|
756 }
|
|
757
|
|
758
|
|
759 // Support for jint Atomic::add(jint add_value, volatile jint* dest).
|
|
760 //
|
|
761 // Arguments :
|
|
762 //
|
|
763 // add_value: O0 (e.g., +1 or -1)
|
|
764 // dest: O1
|
|
765 //
|
|
766 // Results:
|
|
767 //
|
|
768 // O0: the new value stored in dest
|
|
769 //
|
|
770 // Overwrites (v9): O3
|
|
771 // Overwrites (v8): O3,O4,O5
|
|
772 //
|
|
773 address generate_atomic_add() {
|
|
774 StubCodeMark mark(this, "StubRoutines", "atomic_add");
|
|
775 address start = __ pc();
|
|
776 __ BIND(_atomic_add_stub);
|
|
777
|
|
778 if (VM_Version::v9_instructions_work()) {
|
|
779 Label(retry);
|
|
780 __ BIND(retry);
|
|
781
|
|
782 __ lduw(O1, 0, O2);
|
|
783 __ add(O0, O2, O3);
|
|
784 __ cas(O1, O2, O3);
|
|
785 __ cmp( O2, O3);
|
|
786 __ br(Assembler::notEqual, false, Assembler::pn, retry);
|
|
787 __ delayed()->nop();
|
|
788 __ retl(false);
|
|
789 __ delayed()->add(O0, O2, O0); // note that cas made O2==O3
|
|
790 } else {
|
|
791 const Register& lock_reg = O2;
|
|
792 const Register& lock_ptr_reg = O3;
|
|
793 const Register& value_reg = O4;
|
|
794 const Register& yield_reg = O5;
|
|
795
|
|
796 Label(retry);
|
|
797 Label(dontyield);
|
|
798
|
|
799 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
|
|
800 // got lock, do the increment
|
|
801 __ ld(O1, 0, value_reg);
|
|
802 __ add(O0, value_reg, value_reg);
|
|
803 __ st(value_reg, O1, 0);
|
|
804
|
|
805 // %%% only for RMO and PSO
|
|
806 __ membar(Assembler::StoreStore);
|
|
807
|
|
808 generate_v8_lock_epilogue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
|
|
809
|
|
810 __ retl(false);
|
|
811 __ delayed()->mov(value_reg, O0);
|
|
812 }
|
|
813
|
|
814 return start;
|
|
815 }
|
|
816 Label _atomic_add_stub; // called from other stubs
|
|
817
|
|
818
|
|
819 // Support for void OrderAccess::fence().
|
|
820 //
|
|
821 address generate_fence() {
|
|
822 StubCodeMark mark(this, "StubRoutines", "fence");
|
|
823 address start = __ pc();
|
|
824
|
|
825 __ membar(Assembler::Membar_mask_bits(Assembler::LoadLoad | Assembler::LoadStore |
|
|
826 Assembler::StoreLoad | Assembler::StoreStore));
|
|
827 __ retl(false);
|
|
828 __ delayed()->nop();
|
|
829
|
|
830 return start;
|
|
831 }
|
|
832
|
|
833
|
|
834 //------------------------------------------------------------------------------------------------------------------------
|
|
835 // The following routine generates a subroutine to throw an asynchronous
|
|
836 // UnknownError when an unsafe access gets a fault that could not be
|
|
837 // reasonably prevented by the programmer. (Example: SIGBUS/OBJERR.)
|
|
838 //
|
|
839 // Arguments :
|
|
840 //
|
|
841 // trapping PC: O7
|
|
842 //
|
|
843 // Results:
|
|
844 // posts an asynchronous exception, skips the trapping instruction
|
|
845 //
|
|
846
|
|
847 address generate_handler_for_unsafe_access() {
|
|
848 StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
|
|
849 address start = __ pc();
|
|
850
|
|
851 const int preserve_register_words = (64 * 2);
|
|
852 Address preserve_addr(FP, 0, (-preserve_register_words * wordSize) + STACK_BIAS);
|
|
853
|
|
854 Register Lthread = L7_thread_cache;
|
|
855 int i;
|
|
856
|
|
857 __ save_frame(0);
|
|
858 __ mov(G1, L1);
|
|
859 __ mov(G2, L2);
|
|
860 __ mov(G3, L3);
|
|
861 __ mov(G4, L4);
|
|
862 __ mov(G5, L5);
|
|
863 for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) {
|
|
864 __ stf(FloatRegisterImpl::D, as_FloatRegister(i), preserve_addr, i * wordSize);
|
|
865 }
|
|
866
|
|
867 address entry_point = CAST_FROM_FN_PTR(address, handle_unsafe_access);
|
|
868 BLOCK_COMMENT("call handle_unsafe_access");
|
|
869 __ call(entry_point, relocInfo::runtime_call_type);
|
|
870 __ delayed()->nop();
|
|
871
|
|
872 __ mov(L1, G1);
|
|
873 __ mov(L2, G2);
|
|
874 __ mov(L3, G3);
|
|
875 __ mov(L4, G4);
|
|
876 __ mov(L5, G5);
|
|
877 for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) {
|
|
878 __ ldf(FloatRegisterImpl::D, preserve_addr, as_FloatRegister(i), i * wordSize);
|
|
879 }
|
|
880
|
|
881 __ verify_thread();
|
|
882
|
|
883 __ jmp(O0, 0);
|
|
884 __ delayed()->restore();
|
|
885
|
|
886 return start;
|
|
887 }
|
|
888
|
|
889
|
|
890 // Support for uint StubRoutine::Sparc::partial_subtype_check( Klass sub, Klass super );
|
|
891 // Arguments :
|
|
892 //
|
|
893 // ret : O0, returned
|
|
894 // icc/xcc: set as O0 (depending on wordSize)
|
|
895 // sub : O1, argument, not changed
|
|
896 // super: O2, argument, not changed
|
|
897 // raddr: O7, blown by call
|
|
898 address generate_partial_subtype_check() {
|
|
899 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
|
|
900 address start = __ pc();
|
|
901 Label loop, miss;
|
|
902
|
|
903 // Compare super with sub directly, since super is not in its own SSA.
|
|
904 // The compiler used to emit this test, but we fold it in here,
|
|
905 // to increase overall code density, with no real loss of speed.
|
|
906 { Label L;
|
|
907 __ cmp(O1, O2);
|
|
908 __ brx(Assembler::notEqual, false, Assembler::pt, L);
|
|
909 __ delayed()->nop();
|
|
910 __ retl();
|
|
911 __ delayed()->addcc(G0,0,O0); // set Z flags, zero result
|
|
912 __ bind(L);
|
|
913 }
|
|
914
|
|
915 #if defined(COMPILER2) && !defined(_LP64)
|
|
916 // Do not use a 'save' because it blows the 64-bit O registers.
|
|
917 __ add(SP,-4*wordSize,SP); // Make space for 4 temps
|
|
918 __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize);
|
|
919 __ st_ptr(L1,SP,(frame::register_save_words+1)*wordSize);
|
|
920 __ st_ptr(L2,SP,(frame::register_save_words+2)*wordSize);
|
|
921 __ st_ptr(L3,SP,(frame::register_save_words+3)*wordSize);
|
|
922 Register Rret = O0;
|
|
923 Register Rsub = O1;
|
|
924 Register Rsuper = O2;
|
|
925 #else
|
|
926 __ save_frame(0);
|
|
927 Register Rret = I0;
|
|
928 Register Rsub = I1;
|
|
929 Register Rsuper = I2;
|
|
930 #endif
|
|
931
|
|
932 Register L0_ary_len = L0;
|
|
933 Register L1_ary_ptr = L1;
|
|
934 Register L2_super = L2;
|
|
935 Register L3_index = L3;
|
|
936
|
|
937 inc_counter_np(SharedRuntime::_partial_subtype_ctr, L0, L1);
|
|
938
|
|
939 __ ld_ptr( Rsub, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 );
|
|
940 __ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0_ary_len);
|
|
941 __ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1_ary_ptr);
|
|
942 __ clr(L3_index); // zero index
|
|
943 // Load a little early; will load 1 off the end of the array.
|
|
944 // Ok for now; revisit if we have other uses of this routine.
|
|
945 __ ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early
|
|
946 __ align(CodeEntryAlignment);
|
|
947
|
|
948 // The scan loop
|
|
949 __ BIND(loop);
|
|
950 __ add(L1_ary_ptr,wordSize,L1_ary_ptr); // Bump by OOP size
|
|
951 __ cmp(L3_index,L0_ary_len);
|
|
952 __ br(Assembler::equal,false,Assembler::pn,miss);
|
|
953 __ delayed()->inc(L3_index); // Bump index
|
|
954 __ subcc(L2_super,Rsuper,Rret); // Check for match; zero in Rret for a hit
|
|
955 __ brx( Assembler::notEqual, false, Assembler::pt, loop );
|
|
956 __ delayed()->ld_ptr(L1_ary_ptr,0,L2_super); // Will load a little early
|
|
957
|
|
958 // Got a hit; report success; set cache. Cache load doesn't
|
|
959 // happen here; for speed it is directly emitted by the compiler.
|
|
960 __ st_ptr( Rsuper, Rsub, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
|
|
961
|
|
962 #if defined(COMPILER2) && !defined(_LP64)
|
|
963 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
|
|
964 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1);
|
|
965 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2);
|
|
966 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3);
|
|
967 __ retl(); // Result in Rret is zero; flags set to Z
|
|
968 __ delayed()->add(SP,4*wordSize,SP);
|
|
969 #else
|
|
970 __ ret(); // Result in Rret is zero; flags set to Z
|
|
971 __ delayed()->restore();
|
|
972 #endif
|
|
973
|
|
974 // Hit or miss falls through here
|
|
975 __ BIND(miss);
|
|
976 __ addcc(G0,1,Rret); // set NZ flags, NZ result
|
|
977
|
|
978 #if defined(COMPILER2) && !defined(_LP64)
|
|
979 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
|
|
980 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1);
|
|
981 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2);
|
|
982 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3);
|
|
983 __ retl(); // Result in Rret is != 0; flags set to NZ
|
|
984 __ delayed()->add(SP,4*wordSize,SP);
|
|
985 #else
|
|
986 __ ret(); // Result in Rret is != 0; flags set to NZ
|
|
987 __ delayed()->restore();
|
|
988 #endif
|
|
989
|
|
990 return start;
|
|
991 }
|
|
992
|
|
993
|
|
994 // Called from MacroAssembler::verify_oop
|
|
995 //
|
|
996 address generate_verify_oop_subroutine() {
|
|
997 StubCodeMark mark(this, "StubRoutines", "verify_oop_stub");
|
|
998
|
|
999 address start = __ pc();
|
|
1000
|
|
1001 __ verify_oop_subroutine();
|
|
1002
|
|
1003 return start;
|
|
1004 }
|
|
1005
|
|
1006 static address disjoint_byte_copy_entry;
|
|
1007 static address disjoint_short_copy_entry;
|
|
1008 static address disjoint_int_copy_entry;
|
|
1009 static address disjoint_long_copy_entry;
|
|
1010 static address disjoint_oop_copy_entry;
|
|
1011
|
|
1012 static address byte_copy_entry;
|
|
1013 static address short_copy_entry;
|
|
1014 static address int_copy_entry;
|
|
1015 static address long_copy_entry;
|
|
1016 static address oop_copy_entry;
|
|
1017
|
|
1018 static address checkcast_copy_entry;
|
|
1019
|
|
1020 //
|
|
1021 // Verify that a register contains clean 32-bits positive value
|
|
1022 // (high 32-bits are 0) so it could be used in 64-bits shifts (sllx, srax).
|
|
1023 //
|
|
1024 // Input:
|
|
1025 // Rint - 32-bits value
|
|
1026 // Rtmp - scratch
|
|
1027 //
|
|
1028 void assert_clean_int(Register Rint, Register Rtmp) {
|
|
1029 #if defined(ASSERT) && defined(_LP64)
|
|
1030 __ signx(Rint, Rtmp);
|
|
1031 __ cmp(Rint, Rtmp);
|
|
1032 __ breakpoint_trap(Assembler::notEqual, Assembler::xcc);
|
|
1033 #endif
|
|
1034 }
|
|
1035
|
|
1036 //
|
|
1037 // Generate overlap test for array copy stubs
|
|
1038 //
|
|
1039 // Input:
|
|
1040 // O0 - array1
|
|
1041 // O1 - array2
|
|
1042 // O2 - element count
|
|
1043 //
|
|
1044 // Kills temps: O3, O4
|
|
1045 //
|
|
1046 void array_overlap_test(address no_overlap_target, int log2_elem_size) {
|
|
1047 assert(no_overlap_target != NULL, "must be generated");
|
|
1048 array_overlap_test(no_overlap_target, NULL, log2_elem_size);
|
|
1049 }
|
|
1050 void array_overlap_test(Label& L_no_overlap, int log2_elem_size) {
|
|
1051 array_overlap_test(NULL, &L_no_overlap, log2_elem_size);
|
|
1052 }
|
|
1053 void array_overlap_test(address no_overlap_target, Label* NOLp, int log2_elem_size) {
|
|
1054 const Register from = O0;
|
|
1055 const Register to = O1;
|
|
1056 const Register count = O2;
|
|
1057 const Register to_from = O3; // to - from
|
|
1058 const Register byte_count = O4; // count << log2_elem_size
|
|
1059
|
|
1060 __ subcc(to, from, to_from);
|
|
1061 __ sll_ptr(count, log2_elem_size, byte_count);
|
|
1062 if (NOLp == NULL)
|
|
1063 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, no_overlap_target);
|
|
1064 else
|
|
1065 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, (*NOLp));
|
|
1066 __ delayed()->cmp(to_from, byte_count);
|
|
1067 if (NOLp == NULL)
|
|
1068 __ brx(Assembler::greaterEqual, false, Assembler::pt, no_overlap_target);
|
|
1069 else
|
|
1070 __ brx(Assembler::greaterEqual, false, Assembler::pt, (*NOLp));
|
|
1071 __ delayed()->nop();
|
|
1072 }
|
|
1073
|
|
1074 //
|
|
1075 // Generate pre-write barrier for array.
|
|
1076 //
|
|
1077 // Input:
|
|
1078 // addr - register containing starting address
|
|
1079 // count - register containing element count
|
|
1080 // tmp - scratch register
|
|
1081 //
|
|
1082 // The input registers are overwritten.
|
|
1083 //
|
|
1084 void gen_write_ref_array_pre_barrier(Register addr, Register count) {
|
|
1085 #if 0 // G1 only
|
|
1086 BarrierSet* bs = Universe::heap()->barrier_set();
|
|
1087 if (bs->has_write_ref_pre_barrier()) {
|
|
1088 assert(bs->has_write_ref_array_pre_opt(),
|
|
1089 "Else unsupported barrier set.");
|
|
1090
|
|
1091 assert(addr->is_global() && count->is_global(),
|
|
1092 "If not, then we have to fix this code to handle more "
|
|
1093 "general cases.");
|
|
1094 // Get some new fresh output registers.
|
|
1095 __ save_frame(0);
|
|
1096 // Save the necessary global regs... will be used after.
|
|
1097 __ mov(addr, L0);
|
|
1098 __ mov(count, L1);
|
|
1099
|
|
1100 __ mov(addr, O0);
|
|
1101 // Get the count into O1
|
|
1102 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
|
|
1103 __ delayed()->mov(count, O1);
|
|
1104 __ mov(L0, addr);
|
|
1105 __ mov(L1, count);
|
|
1106 __ restore();
|
|
1107 }
|
|
1108 #endif // 0
|
|
1109 }
|
|
1110
|
|
1111 //
|
|
1112 // Generate post-write barrier for array.
|
|
1113 //
|
|
1114 // Input:
|
|
1115 // addr - register containing starting address
|
|
1116 // count - register containing element count
|
|
1117 // tmp - scratch register
|
|
1118 //
|
|
1119 // The input registers are overwritten.
|
|
1120 //
|
|
1121 void gen_write_ref_array_post_barrier(Register addr, Register count,
|
|
1122 Register tmp) {
|
|
1123 BarrierSet* bs = Universe::heap()->barrier_set();
|
|
1124
|
|
1125 switch (bs->kind()) {
|
|
1126 #if 0 // G1 - only
|
|
1127 case BarrierSet::G1SATBCT:
|
|
1128 case BarrierSet::G1SATBCTLogging:
|
|
1129 {
|
|
1130 assert(addr->is_global() && count->is_global(),
|
|
1131 "If not, then we have to fix this code to handle more "
|
|
1132 "general cases.");
|
|
1133 // Get some new fresh output registers.
|
|
1134 __ save_frame(0);
|
|
1135 __ mov(addr, O0);
|
|
1136 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
|
|
1137 __ delayed()->mov(count, O1);
|
|
1138 __ restore();
|
|
1139 }
|
|
1140 break;
|
|
1141 #endif // 0 G1 - only
|
|
1142 case BarrierSet::CardTableModRef:
|
|
1143 case BarrierSet::CardTableExtension:
|
|
1144 {
|
|
1145 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
|
|
1146 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
|
|
1147 assert_different_registers(addr, count, tmp);
|
|
1148
|
|
1149 Label L_loop;
|
|
1150
|
|
1151 __ sll_ptr(count, LogBytesPerOop, count);
|
|
1152 __ sub(count, BytesPerOop, count);
|
|
1153 __ add(count, addr, count);
|
|
1154 // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.)
|
|
1155 __ srl_ptr(addr, CardTableModRefBS::card_shift, addr);
|
|
1156 __ srl_ptr(count, CardTableModRefBS::card_shift, count);
|
|
1157 __ sub(count, addr, count);
|
|
1158 Address rs(tmp, (address)ct->byte_map_base);
|
|
1159 __ load_address(rs);
|
|
1160 __ BIND(L_loop);
|
|
1161 __ stb(G0, rs.base(), addr);
|
|
1162 __ subcc(count, 1, count);
|
|
1163 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
|
|
1164 __ delayed()->add(addr, 1, addr);
|
|
1165
|
|
1166 }
|
|
1167 break;
|
|
1168 case BarrierSet::ModRef:
|
|
1169 break;
|
|
1170 default :
|
|
1171 ShouldNotReachHere();
|
|
1172
|
|
1173 }
|
|
1174
|
|
1175 }
|
|
1176
|
|
1177
|
|
1178 // Copy big chunks forward with shift
|
|
1179 //
|
|
1180 // Inputs:
|
|
1181 // from - source arrays
|
|
1182 // to - destination array aligned to 8-bytes
|
|
1183 // count - elements count to copy >= the count equivalent to 16 bytes
|
|
1184 // count_dec - elements count's decrement equivalent to 16 bytes
|
|
1185 // L_copy_bytes - copy exit label
|
|
1186 //
|
|
1187 void copy_16_bytes_forward_with_shift(Register from, Register to,
|
|
1188 Register count, int count_dec, Label& L_copy_bytes) {
|
|
1189 Label L_loop, L_aligned_copy, L_copy_last_bytes;
|
|
1190
|
|
1191 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy
|
|
1192 __ andcc(from, 7, G1); // misaligned bytes
|
|
1193 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
|
|
1194 __ delayed()->nop();
|
|
1195
|
|
1196 const Register left_shift = G1; // left shift bit counter
|
|
1197 const Register right_shift = G5; // right shift bit counter
|
|
1198
|
|
1199 __ sll(G1, LogBitsPerByte, left_shift);
|
|
1200 __ mov(64, right_shift);
|
|
1201 __ sub(right_shift, left_shift, right_shift);
|
|
1202
|
|
1203 //
|
|
1204 // Load 2 aligned 8-bytes chunks and use one from previous iteration
|
|
1205 // to form 2 aligned 8-bytes chunks to store.
|
|
1206 //
|
|
1207 __ deccc(count, count_dec); // Pre-decrement 'count'
|
|
1208 __ andn(from, 7, from); // Align address
|
|
1209 __ ldx(from, 0, O3);
|
|
1210 __ inc(from, 8);
|
|
1211 __ align(16);
|
|
1212 __ BIND(L_loop);
|
|
1213 __ ldx(from, 0, O4);
|
|
1214 __ deccc(count, count_dec); // Can we do next iteration after this one?
|
|
1215 __ ldx(from, 8, G4);
|
|
1216 __ inc(to, 16);
|
|
1217 __ inc(from, 16);
|
|
1218 __ sllx(O3, left_shift, O3);
|
|
1219 __ srlx(O4, right_shift, G3);
|
|
1220 __ bset(G3, O3);
|
|
1221 __ stx(O3, to, -16);
|
|
1222 __ sllx(O4, left_shift, O4);
|
|
1223 __ srlx(G4, right_shift, G3);
|
|
1224 __ bset(G3, O4);
|
|
1225 __ stx(O4, to, -8);
|
|
1226 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
|
|
1227 __ delayed()->mov(G4, O3);
|
|
1228
|
|
1229 __ inccc(count, count_dec>>1 ); // + 8 bytes
|
|
1230 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes);
|
|
1231 __ delayed()->inc(count, count_dec>>1); // restore 'count'
|
|
1232
|
|
1233 // copy 8 bytes, part of them already loaded in O3
|
|
1234 __ ldx(from, 0, O4);
|
|
1235 __ inc(to, 8);
|
|
1236 __ inc(from, 8);
|
|
1237 __ sllx(O3, left_shift, O3);
|
|
1238 __ srlx(O4, right_shift, G3);
|
|
1239 __ bset(O3, G3);
|
|
1240 __ stx(G3, to, -8);
|
|
1241
|
|
1242 __ BIND(L_copy_last_bytes);
|
|
1243 __ srl(right_shift, LogBitsPerByte, right_shift); // misaligned bytes
|
|
1244 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes);
|
|
1245 __ delayed()->sub(from, right_shift, from); // restore address
|
|
1246
|
|
1247 __ BIND(L_aligned_copy);
|
|
1248 }
|
|
1249
|
|
1250 // Copy big chunks backward with shift
|
|
1251 //
|
|
1252 // Inputs:
|
|
1253 // end_from - source arrays end address
|
|
1254 // end_to - destination array end address aligned to 8-bytes
|
|
1255 // count - elements count to copy >= the count equivalent to 16 bytes
|
|
1256 // count_dec - elements count's decrement equivalent to 16 bytes
|
|
1257 // L_aligned_copy - aligned copy exit label
|
|
1258 // L_copy_bytes - copy exit label
|
|
1259 //
|
|
1260 void copy_16_bytes_backward_with_shift(Register end_from, Register end_to,
|
|
1261 Register count, int count_dec,
|
|
1262 Label& L_aligned_copy, Label& L_copy_bytes) {
|
|
1263 Label L_loop, L_copy_last_bytes;
|
|
1264
|
|
1265 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy
|
|
1266 __ andcc(end_from, 7, G1); // misaligned bytes
|
|
1267 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
|
|
1268 __ delayed()->deccc(count, count_dec); // Pre-decrement 'count'
|
|
1269
|
|
1270 const Register left_shift = G1; // left shift bit counter
|
|
1271 const Register right_shift = G5; // right shift bit counter
|
|
1272
|
|
1273 __ sll(G1, LogBitsPerByte, left_shift);
|
|
1274 __ mov(64, right_shift);
|
|
1275 __ sub(right_shift, left_shift, right_shift);
|
|
1276
|
|
1277 //
|
|
1278 // Load 2 aligned 8-bytes chunks and use one from previous iteration
|
|
1279 // to form 2 aligned 8-bytes chunks to store.
|
|
1280 //
|
|
1281 __ andn(end_from, 7, end_from); // Align address
|
|
1282 __ ldx(end_from, 0, O3);
|
|
1283 __ align(16);
|
|
1284 __ BIND(L_loop);
|
|
1285 __ ldx(end_from, -8, O4);
|
|
1286 __ deccc(count, count_dec); // Can we do next iteration after this one?
|
|
1287 __ ldx(end_from, -16, G4);
|
|
1288 __ dec(end_to, 16);
|
|
1289 __ dec(end_from, 16);
|
|
1290 __ srlx(O3, right_shift, O3);
|
|
1291 __ sllx(O4, left_shift, G3);
|
|
1292 __ bset(G3, O3);
|
|
1293 __ stx(O3, end_to, 8);
|
|
1294 __ srlx(O4, right_shift, O4);
|
|
1295 __ sllx(G4, left_shift, G3);
|
|
1296 __ bset(G3, O4);
|
|
1297 __ stx(O4, end_to, 0);
|
|
1298 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
|
|
1299 __ delayed()->mov(G4, O3);
|
|
1300
|
|
1301 __ inccc(count, count_dec>>1 ); // + 8 bytes
|
|
1302 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes);
|
|
1303 __ delayed()->inc(count, count_dec>>1); // restore 'count'
|
|
1304
|
|
1305 // copy 8 bytes, part of them already loaded in O3
|
|
1306 __ ldx(end_from, -8, O4);
|
|
1307 __ dec(end_to, 8);
|
|
1308 __ dec(end_from, 8);
|
|
1309 __ srlx(O3, right_shift, O3);
|
|
1310 __ sllx(O4, left_shift, G3);
|
|
1311 __ bset(O3, G3);
|
|
1312 __ stx(G3, end_to, 0);
|
|
1313
|
|
1314 __ BIND(L_copy_last_bytes);
|
|
1315 __ srl(left_shift, LogBitsPerByte, left_shift); // misaligned bytes
|
|
1316 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes);
|
|
1317 __ delayed()->add(end_from, left_shift, end_from); // restore address
|
|
1318 }
|
|
1319
|
|
1320 //
|
|
1321 // Generate stub for disjoint byte copy. If "aligned" is true, the
|
|
1322 // "from" and "to" addresses are assumed to be heapword aligned.
|
|
1323 //
|
|
1324 // Arguments for generated stub:
|
|
1325 // from: O0
|
|
1326 // to: O1
|
|
1327 // count: O2 treated as signed
|
|
1328 //
|
|
1329 address generate_disjoint_byte_copy(bool aligned, const char * name) {
|
|
1330 __ align(CodeEntryAlignment);
|
|
1331 StubCodeMark mark(this, "StubRoutines", name);
|
|
1332 address start = __ pc();
|
|
1333
|
|
1334 Label L_skip_alignment, L_align;
|
|
1335 Label L_copy_byte, L_copy_byte_loop, L_exit;
|
|
1336
|
|
1337 const Register from = O0; // source array address
|
|
1338 const Register to = O1; // destination array address
|
|
1339 const Register count = O2; // elements count
|
|
1340 const Register offset = O5; // offset from start of arrays
|
|
1341 // O3, O4, G3, G4 are used as temp registers
|
|
1342
|
|
1343 assert_clean_int(count, O3); // Make sure 'count' is clean int.
|
|
1344
|
|
1345 if (!aligned) disjoint_byte_copy_entry = __ pc();
|
|
1346 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
|
|
1347 if (!aligned) BLOCK_COMMENT("Entry:");
|
|
1348
|
|
1349 // for short arrays, just do single element copy
|
|
1350 __ cmp(count, 23); // 16 + 7
|
|
1351 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
|
|
1352 __ delayed()->mov(G0, offset);
|
|
1353
|
|
1354 if (aligned) {
|
|
1355 // 'aligned' == true when it is known statically during compilation
|
|
1356 // of this arraycopy call site that both 'from' and 'to' addresses
|
|
1357 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
|
|
1358 //
|
|
1359 // Aligned arrays have 4 bytes alignment in 32-bits VM
|
|
1360 // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM
|
|
1361 //
|
|
1362 #ifndef _LP64
|
|
1363 // copy a 4-bytes word if necessary to align 'to' to 8 bytes
|
|
1364 __ andcc(to, 7, G0);
|
|
1365 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment);
|
|
1366 __ delayed()->ld(from, 0, O3);
|
|
1367 __ inc(from, 4);
|
|
1368 __ inc(to, 4);
|
|
1369 __ dec(count, 4);
|
|
1370 __ st(O3, to, -4);
|
|
1371 __ BIND(L_skip_alignment);
|
|
1372 #endif
|
|
1373 } else {
|
|
1374 // copy bytes to align 'to' on 8 byte boundary
|
|
1375 __ andcc(to, 7, G1); // misaligned bytes
|
|
1376 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
|
|
1377 __ delayed()->neg(G1);
|
|
1378 __ inc(G1, 8); // bytes need to copy to next 8-bytes alignment
|
|
1379 __ sub(count, G1, count);
|
|
1380 __ BIND(L_align);
|
|
1381 __ ldub(from, 0, O3);
|
|
1382 __ deccc(G1);
|
|
1383 __ inc(from);
|
|
1384 __ stb(O3, to, 0);
|
|
1385 __ br(Assembler::notZero, false, Assembler::pt, L_align);
|
|
1386 __ delayed()->inc(to);
|
|
1387 __ BIND(L_skip_alignment);
|
|
1388 }
|
|
1389 #ifdef _LP64
|
|
1390 if (!aligned)
|
|
1391 #endif
|
|
1392 {
|
|
1393 // Copy with shift 16 bytes per iteration if arrays do not have
|
|
1394 // the same alignment mod 8, otherwise fall through to the next
|
|
1395 // code for aligned copy.
|
|
1396 // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
|
|
1397 // Also jump over aligned copy after the copy with shift completed.
|
|
1398
|
|
1399 copy_16_bytes_forward_with_shift(from, to, count, 16, L_copy_byte);
|
|
1400 }
|
|
1401
|
|
1402 // Both array are 8 bytes aligned, copy 16 bytes at a time
|
|
1403 __ and3(count, 7, G4); // Save count
|
|
1404 __ srl(count, 3, count);
|
|
1405 generate_disjoint_long_copy_core(aligned);
|
|
1406 __ mov(G4, count); // Restore count
|
|
1407
|
|
1408 // copy tailing bytes
|
|
1409 __ BIND(L_copy_byte);
|
|
1410 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
|
|
1411 __ delayed()->nop();
|
|
1412 __ align(16);
|
|
1413 __ BIND(L_copy_byte_loop);
|
|
1414 __ ldub(from, offset, O3);
|
|
1415 __ deccc(count);
|
|
1416 __ stb(O3, to, offset);
|
|
1417 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
|
|
1418 __ delayed()->inc(offset);
|
|
1419
|
|
1420 __ BIND(L_exit);
|
|
1421 // O3, O4 are used as temp registers
|
|
1422 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
|
|
1423 __ retl();
|
|
1424 __ delayed()->mov(G0, O0); // return 0
|
|
1425 return start;
|
|
1426 }
|
|
1427
|
|
1428 //
|
|
1429 // Generate stub for conjoint byte copy. If "aligned" is true, the
|
|
1430 // "from" and "to" addresses are assumed to be heapword aligned.
|
|
1431 //
|
|
1432 // Arguments for generated stub:
|
|
1433 // from: O0
|
|
1434 // to: O1
|
|
1435 // count: O2 treated as signed
|
|
1436 //
|
|
1437 address generate_conjoint_byte_copy(bool aligned, const char * name) {
|
|
1438 // Do reverse copy.
|
|
1439
|
|
1440 __ align(CodeEntryAlignment);
|
|
1441 StubCodeMark mark(this, "StubRoutines", name);
|
|
1442 address start = __ pc();
|
|
1443 address nooverlap_target = aligned ?
|
|
1444 StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
|
|
1445 disjoint_byte_copy_entry;
|
|
1446
|
|
1447 Label L_skip_alignment, L_align, L_aligned_copy;
|
|
1448 Label L_copy_byte, L_copy_byte_loop, L_exit;
|
|
1449
|
|
1450 const Register from = O0; // source array address
|
|
1451 const Register to = O1; // destination array address
|
|
1452 const Register count = O2; // elements count
|
|
1453 const Register end_from = from; // source array end address
|
|
1454 const Register end_to = to; // destination array end address
|
|
1455
|
|
1456 assert_clean_int(count, O3); // Make sure 'count' is clean int.
|
|
1457
|
|
1458 if (!aligned) byte_copy_entry = __ pc();
|
|
1459 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
|
|
1460 if (!aligned) BLOCK_COMMENT("Entry:");
|
|
1461
|
|
1462 array_overlap_test(nooverlap_target, 0);
|
|
1463
|
|
1464 __ add(to, count, end_to); // offset after last copied element
|
|
1465
|
|
1466 // for short arrays, just do single element copy
|
|
1467 __ cmp(count, 23); // 16 + 7
|
|
1468 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
|
|
1469 __ delayed()->add(from, count, end_from);
|
|
1470
|
|
1471 {
|
|
1472 // Align end of arrays since they could be not aligned even
|
|
1473 // when arrays itself are aligned.
|
|
1474
|
|
1475 // copy bytes to align 'end_to' on 8 byte boundary
|
|
1476 __ andcc(end_to, 7, G1); // misaligned bytes
|
|
1477 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
|
|
1478 __ delayed()->nop();
|
|
1479 __ sub(count, G1, count);
|
|
1480 __ BIND(L_align);
|
|
1481 __ dec(end_from);
|
|
1482 __ dec(end_to);
|
|
1483 __ ldub(end_from, 0, O3);
|
|
1484 __ deccc(G1);
|
|
1485 __ brx(Assembler::notZero, false, Assembler::pt, L_align);
|
|
1486 __ delayed()->stb(O3, end_to, 0);
|
|
1487 __ BIND(L_skip_alignment);
|
|
1488 }
|
|
1489 #ifdef _LP64
|
|
1490 if (aligned) {
|
|
1491 // Both arrays are aligned to 8-bytes in 64-bits VM.
|
|
1492 // The 'count' is decremented in copy_16_bytes_backward_with_shift()
|
|
1493 // in unaligned case.
|
|
1494 __ dec(count, 16);
|
|
1495 } else
|
|
1496 #endif
|
|
1497 {
|
|
1498 // Copy with shift 16 bytes per iteration if arrays do not have
|
|
1499 // the same alignment mod 8, otherwise jump to the next
|
|
1500 // code for aligned copy (and substracting 16 from 'count' before jump).
|
|
1501 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
|
|
1502 // Also jump over aligned copy after the copy with shift completed.
|
|
1503
|
|
1504 copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
|
|
1505 L_aligned_copy, L_copy_byte);
|
|
1506 }
|
|
1507 // copy 4 elements (16 bytes) at a time
|
|
1508 __ align(16);
|
|
1509 __ BIND(L_aligned_copy);
|
|
1510 __ dec(end_from, 16);
|
|
1511 __ ldx(end_from, 8, O3);
|
|
1512 __ ldx(end_from, 0, O4);
|
|
1513 __ dec(end_to, 16);
|
|
1514 __ deccc(count, 16);
|
|
1515 __ stx(O3, end_to, 8);
|
|
1516 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
|
|
1517 __ delayed()->stx(O4, end_to, 0);
|
|
1518 __ inc(count, 16);
|
|
1519
|
|
1520 // copy 1 element (2 bytes) at a time
|
|
1521 __ BIND(L_copy_byte);
|
|
1522 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
|
|
1523 __ delayed()->nop();
|
|
1524 __ align(16);
|
|
1525 __ BIND(L_copy_byte_loop);
|
|
1526 __ dec(end_from);
|
|
1527 __ dec(end_to);
|
|
1528 __ ldub(end_from, 0, O4);
|
|
1529 __ deccc(count);
|
|
1530 __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
|
|
1531 __ delayed()->stb(O4, end_to, 0);
|
|
1532
|
|
1533 __ BIND(L_exit);
|
|
1534 // O3, O4 are used as temp registers
|
|
1535 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
|
|
1536 __ retl();
|
|
1537 __ delayed()->mov(G0, O0); // return 0
|
|
1538 return start;
|
|
1539 }
|
|
1540
|
|
1541 //
|
|
1542 // Generate stub for disjoint short copy. If "aligned" is true, the
|
|
1543 // "from" and "to" addresses are assumed to be heapword aligned.
|
|
1544 //
|
|
1545 // Arguments for generated stub:
|
|
1546 // from: O0
|
|
1547 // to: O1
|
|
1548 // count: O2 treated as signed
|
|
1549 //
|
|
1550 address generate_disjoint_short_copy(bool aligned, const char * name) {
|
|
1551 __ align(CodeEntryAlignment);
|
|
1552 StubCodeMark mark(this, "StubRoutines", name);
|
|
1553 address start = __ pc();
|
|
1554
|
|
1555 Label L_skip_alignment, L_skip_alignment2;
|
|
1556 Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit;
|
|
1557
|
|
1558 const Register from = O0; // source array address
|
|
1559 const Register to = O1; // destination array address
|
|
1560 const Register count = O2; // elements count
|
|
1561 const Register offset = O5; // offset from start of arrays
|
|
1562 // O3, O4, G3, G4 are used as temp registers
|
|
1563
|
|
1564 assert_clean_int(count, O3); // Make sure 'count' is clean int.
|
|
1565
|
|
1566 if (!aligned) disjoint_short_copy_entry = __ pc();
|
|
1567 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
|
|
1568 if (!aligned) BLOCK_COMMENT("Entry:");
|
|
1569
|
|
1570 // for short arrays, just do single element copy
|
|
1571 __ cmp(count, 11); // 8 + 3 (22 bytes)
|
|
1572 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
|
|
1573 __ delayed()->mov(G0, offset);
|
|
1574
|
|
1575 if (aligned) {
|
|
1576 // 'aligned' == true when it is known statically during compilation
|
|
1577 // of this arraycopy call site that both 'from' and 'to' addresses
|
|
1578 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
|
|
1579 //
|
|
1580 // Aligned arrays have 4 bytes alignment in 32-bits VM
|
|
1581 // and 8 bytes - in 64-bits VM.
|
|
1582 //
|
|
1583 #ifndef _LP64
|
|
1584 // copy a 2-elements word if necessary to align 'to' to 8 bytes
|
|
1585 __ andcc(to, 7, G0);
|
|
1586 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
|
|
1587 __ delayed()->ld(from, 0, O3);
|
|
1588 __ inc(from, 4);
|
|
1589 __ inc(to, 4);
|
|
1590 __ dec(count, 2);
|
|
1591 __ st(O3, to, -4);
|
|
1592 __ BIND(L_skip_alignment);
|
|
1593 #endif
|
|
1594 } else {
|
|
1595 // copy 1 element if necessary to align 'to' on an 4 bytes
|
|
1596 __ andcc(to, 3, G0);
|
|
1597 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
|
|
1598 __ delayed()->lduh(from, 0, O3);
|
|
1599 __ inc(from, 2);
|
|
1600 __ inc(to, 2);
|
|
1601 __ dec(count);
|
|
1602 __ sth(O3, to, -2);
|
|
1603 __ BIND(L_skip_alignment);
|
|
1604
|
|
1605 // copy 2 elements to align 'to' on an 8 byte boundary
|
|
1606 __ andcc(to, 7, G0);
|
|
1607 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
|
|
1608 __ delayed()->lduh(from, 0, O3);
|
|
1609 __ dec(count, 2);
|
|
1610 __ lduh(from, 2, O4);
|
|
1611 __ inc(from, 4);
|
|
1612 __ inc(to, 4);
|
|
1613 __ sth(O3, to, -4);
|
|
1614 __ sth(O4, to, -2);
|
|
1615 __ BIND(L_skip_alignment2);
|
|
1616 }
|
|
1617 #ifdef _LP64
|
|
1618 if (!aligned)
|
|
1619 #endif
|
|
1620 {
|
|
1621 // Copy with shift 16 bytes per iteration if arrays do not have
|
|
1622 // the same alignment mod 8, otherwise fall through to the next
|
|
1623 // code for aligned copy.
|
|
1624 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
|
|
1625 // Also jump over aligned copy after the copy with shift completed.
|
|
1626
|
|
1627 copy_16_bytes_forward_with_shift(from, to, count, 8, L_copy_2_bytes);
|
|
1628 }
|
|
1629
|
|
1630 // Both array are 8 bytes aligned, copy 16 bytes at a time
|
|
1631 __ and3(count, 3, G4); // Save
|
|
1632 __ srl(count, 2, count);
|
|
1633 generate_disjoint_long_copy_core(aligned);
|
|
1634 __ mov(G4, count); // restore
|
|
1635
|
|
1636 // copy 1 element at a time
|
|
1637 __ BIND(L_copy_2_bytes);
|
|
1638 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
|
|
1639 __ delayed()->nop();
|
|
1640 __ align(16);
|
|
1641 __ BIND(L_copy_2_bytes_loop);
|
|
1642 __ lduh(from, offset, O3);
|
|
1643 __ deccc(count);
|
|
1644 __ sth(O3, to, offset);
|
|
1645 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
|
|
1646 __ delayed()->inc(offset, 2);
|
|
1647
|
|
1648 __ BIND(L_exit);
|
|
1649 // O3, O4 are used as temp registers
|
|
1650 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
|
|
1651 __ retl();
|
|
1652 __ delayed()->mov(G0, O0); // return 0
|
|
1653 return start;
|
|
1654 }
|
|
1655
|
|
1656 //
|
|
1657 // Generate stub for conjoint short copy. If "aligned" is true, the
|
|
1658 // "from" and "to" addresses are assumed to be heapword aligned.
|
|
1659 //
|
|
1660 // Arguments for generated stub:
|
|
1661 // from: O0
|
|
1662 // to: O1
|
|
1663 // count: O2 treated as signed
|
|
1664 //
|
|
1665 address generate_conjoint_short_copy(bool aligned, const char * name) {
|
|
1666 // Do reverse copy.
|
|
1667
|
|
1668 __ align(CodeEntryAlignment);
|
|
1669 StubCodeMark mark(this, "StubRoutines", name);
|
|
1670 address start = __ pc();
|
|
1671 address nooverlap_target = aligned ?
|
|
1672 StubRoutines::arrayof_jshort_disjoint_arraycopy() :
|
|
1673 disjoint_short_copy_entry;
|
|
1674
|
|
1675 Label L_skip_alignment, L_skip_alignment2, L_aligned_copy;
|
|
1676 Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit;
|
|
1677
|
|
1678 const Register from = O0; // source array address
|
|
1679 const Register to = O1; // destination array address
|
|
1680 const Register count = O2; // elements count
|
|
1681 const Register end_from = from; // source array end address
|
|
1682 const Register end_to = to; // destination array end address
|
|
1683
|
|
1684 const Register byte_count = O3; // bytes count to copy
|
|
1685
|
|
1686 assert_clean_int(count, O3); // Make sure 'count' is clean int.
|
|
1687
|
|
1688 if (!aligned) short_copy_entry = __ pc();
|
|
1689 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
|
|
1690 if (!aligned) BLOCK_COMMENT("Entry:");
|
|
1691
|
|
1692 array_overlap_test(nooverlap_target, 1);
|
|
1693
|
|
1694 __ sllx(count, LogBytesPerShort, byte_count);
|
|
1695 __ add(to, byte_count, end_to); // offset after last copied element
|
|
1696
|
|
1697 // for short arrays, just do single element copy
|
|
1698 __ cmp(count, 11); // 8 + 3 (22 bytes)
|
|
1699 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
|
|
1700 __ delayed()->add(from, byte_count, end_from);
|
|
1701
|
|
1702 {
|
|
1703 // Align end of arrays since they could be not aligned even
|
|
1704 // when arrays itself are aligned.
|
|
1705
|
|
1706 // copy 1 element if necessary to align 'end_to' on an 4 bytes
|
|
1707 __ andcc(end_to, 3, G0);
|
|
1708 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
|
|
1709 __ delayed()->lduh(end_from, -2, O3);
|
|
1710 __ dec(end_from, 2);
|
|
1711 __ dec(end_to, 2);
|
|
1712 __ dec(count);
|
|
1713 __ sth(O3, end_to, 0);
|
|
1714 __ BIND(L_skip_alignment);
|
|
1715
|
|
1716 // copy 2 elements to align 'end_to' on an 8 byte boundary
|
|
1717 __ andcc(end_to, 7, G0);
|
|
1718 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
|
|
1719 __ delayed()->lduh(end_from, -2, O3);
|
|
1720 __ dec(count, 2);
|
|
1721 __ lduh(end_from, -4, O4);
|
|
1722 __ dec(end_from, 4);
|
|
1723 __ dec(end_to, 4);
|
|
1724 __ sth(O3, end_to, 2);
|
|
1725 __ sth(O4, end_to, 0);
|
|
1726 __ BIND(L_skip_alignment2);
|
|
1727 }
|
|
1728 #ifdef _LP64
|
|
1729 if (aligned) {
|
|
1730 // Both arrays are aligned to 8-bytes in 64-bits VM.
|
|
1731 // The 'count' is decremented in copy_16_bytes_backward_with_shift()
|
|
1732 // in unaligned case.
|
|
1733 __ dec(count, 8);
|
|
1734 } else
|
|
1735 #endif
|
|
1736 {
|
|
1737 // Copy with shift 16 bytes per iteration if arrays do not have
|
|
1738 // the same alignment mod 8, otherwise jump to the next
|
|
1739 // code for aligned copy (and substracting 8 from 'count' before jump).
|
|
1740 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
|
|
1741 // Also jump over aligned copy after the copy with shift completed.
|
|
1742
|
|
1743 copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
|
|
1744 L_aligned_copy, L_copy_2_bytes);
|
|
1745 }
|
|
1746 // copy 4 elements (16 bytes) at a time
|
|
1747 __ align(16);
|
|
1748 __ BIND(L_aligned_copy);
|
|
1749 __ dec(end_from, 16);
|
|
1750 __ ldx(end_from, 8, O3);
|
|
1751 __ ldx(end_from, 0, O4);
|
|
1752 __ dec(end_to, 16);
|
|
1753 __ deccc(count, 8);
|
|
1754 __ stx(O3, end_to, 8);
|
|
1755 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
|
|
1756 __ delayed()->stx(O4, end_to, 0);
|
|
1757 __ inc(count, 8);
|
|
1758
|
|
1759 // copy 1 element (2 bytes) at a time
|
|
1760 __ BIND(L_copy_2_bytes);
|
|
1761 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
|
|
1762 __ delayed()->nop();
|
|
1763 __ BIND(L_copy_2_bytes_loop);
|
|
1764 __ dec(end_from, 2);
|
|
1765 __ dec(end_to, 2);
|
|
1766 __ lduh(end_from, 0, O4);
|
|
1767 __ deccc(count);
|
|
1768 __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
|
|
1769 __ delayed()->sth(O4, end_to, 0);
|
|
1770
|
|
1771 __ BIND(L_exit);
|
|
1772 // O3, O4 are used as temp registers
|
|
1773 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
|
|
1774 __ retl();
|
|
1775 __ delayed()->mov(G0, O0); // return 0
|
|
1776 return start;
|
|
1777 }
|
|
1778
|
|
1779 //
|
|
1780 // Generate core code for disjoint int copy (and oop copy on 32-bit).
|
|
1781 // If "aligned" is true, the "from" and "to" addresses are assumed
|
|
1782 // to be heapword aligned.
|
|
1783 //
|
|
1784 // Arguments:
|
|
1785 // from: O0
|
|
1786 // to: O1
|
|
1787 // count: O2 treated as signed
|
|
1788 //
|
|
1789 void generate_disjoint_int_copy_core(bool aligned) {
|
|
1790
|
|
1791 Label L_skip_alignment, L_aligned_copy;
|
|
1792 Label L_copy_16_bytes, L_copy_4_bytes, L_copy_4_bytes_loop, L_exit;
|
|
1793
|
|
1794 const Register from = O0; // source array address
|
|
1795 const Register to = O1; // destination array address
|
|
1796 const Register count = O2; // elements count
|
|
1797 const Register offset = O5; // offset from start of arrays
|
|
1798 // O3, O4, G3, G4 are used as temp registers
|
|
1799
|
|
1800 // 'aligned' == true when it is known statically during compilation
|
|
1801 // of this arraycopy call site that both 'from' and 'to' addresses
|
|
1802 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
|
|
1803 //
|
|
1804 // Aligned arrays have 4 bytes alignment in 32-bits VM
|
|
1805 // and 8 bytes - in 64-bits VM.
|
|
1806 //
|
|
1807 #ifdef _LP64
|
|
1808 if (!aligned)
|
|
1809 #endif
|
|
1810 {
|
|
1811 // The next check could be put under 'ifndef' since the code in
|
|
1812 // generate_disjoint_long_copy_core() has own checks and set 'offset'.
|
|
1813
|
|
1814 // for short arrays, just do single element copy
|
|
1815 __ cmp(count, 5); // 4 + 1 (20 bytes)
|
|
1816 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes);
|
|
1817 __ delayed()->mov(G0, offset);
|
|
1818
|
|
1819 // copy 1 element to align 'to' on an 8 byte boundary
|
|
1820 __ andcc(to, 7, G0);
|
|
1821 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
|
|
1822 __ delayed()->ld(from, 0, O3);
|
|
1823 __ inc(from, 4);
|
|
1824 __ inc(to, 4);
|
|
1825 __ dec(count);
|
|
1826 __ st(O3, to, -4);
|
|
1827 __ BIND(L_skip_alignment);
|
|
1828
|
|
1829 // if arrays have same alignment mod 8, do 4 elements copy
|
|
1830 __ andcc(from, 7, G0);
|
|
1831 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
|
|
1832 __ delayed()->ld(from, 0, O3);
|
|
1833
|
|
1834 //
|
|
1835 // Load 2 aligned 8-bytes chunks and use one from previous iteration
|
|
1836 // to form 2 aligned 8-bytes chunks to store.
|
|
1837 //
|
|
1838 // copy_16_bytes_forward_with_shift() is not used here since this
|
|
1839 // code is more optimal.
|
|
1840
|
|
1841 // copy with shift 4 elements (16 bytes) at a time
|
|
1842 __ dec(count, 4); // The cmp at the beginning guaranty count >= 4
|
|
1843
|
|
1844 __ align(16);
|
|
1845 __ BIND(L_copy_16_bytes);
|
|
1846 __ ldx(from, 4, O4);
|
|
1847 __ deccc(count, 4); // Can we do next iteration after this one?
|
|
1848 __ ldx(from, 12, G4);
|
|
1849 __ inc(to, 16);
|
|
1850 __ inc(from, 16);
|
|
1851 __ sllx(O3, 32, O3);
|
|
1852 __ srlx(O4, 32, G3);
|
|
1853 __ bset(G3, O3);
|
|
1854 __ stx(O3, to, -16);
|
|
1855 __ sllx(O4, 32, O4);
|
|
1856 __ srlx(G4, 32, G3);
|
|
1857 __ bset(G3, O4);
|
|
1858 __ stx(O4, to, -8);
|
|
1859 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
|
|
1860 __ delayed()->mov(G4, O3);
|
|
1861
|
|
1862 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
|
|
1863 __ delayed()->inc(count, 4); // restore 'count'
|
|
1864
|
|
1865 __ BIND(L_aligned_copy);
|
|
1866 }
|
|
1867 // copy 4 elements (16 bytes) at a time
|
|
1868 __ and3(count, 1, G4); // Save
|
|
1869 __ srl(count, 1, count);
|
|
1870 generate_disjoint_long_copy_core(aligned);
|
|
1871 __ mov(G4, count); // Restore
|
|
1872
|
|
1873 // copy 1 element at a time
|
|
1874 __ BIND(L_copy_4_bytes);
|
|
1875 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
|
|
1876 __ delayed()->nop();
|
|
1877 __ BIND(L_copy_4_bytes_loop);
|
|
1878 __ ld(from, offset, O3);
|
|
1879 __ deccc(count);
|
|
1880 __ st(O3, to, offset);
|
|
1881 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop);
|
|
1882 __ delayed()->inc(offset, 4);
|
|
1883 __ BIND(L_exit);
|
|
1884 }
|
|
1885
|
|
1886 //
|
|
1887 // Generate stub for disjoint int copy. If "aligned" is true, the
|
|
1888 // "from" and "to" addresses are assumed to be heapword aligned.
|
|
1889 //
|
|
1890 // Arguments for generated stub:
|
|
1891 // from: O0
|
|
1892 // to: O1
|
|
1893 // count: O2 treated as signed
|
|
1894 //
|
|
1895 address generate_disjoint_int_copy(bool aligned, const char * name) {
|
|
1896 __ align(CodeEntryAlignment);
|
|
1897 StubCodeMark mark(this, "StubRoutines", name);
|
|
1898 address start = __ pc();
|
|
1899
|
|
1900 const Register count = O2;
|
|
1901 assert_clean_int(count, O3); // Make sure 'count' is clean int.
|
|
1902
|
|
1903 if (!aligned) disjoint_int_copy_entry = __ pc();
|
|
1904 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
|
|
1905 if (!aligned) BLOCK_COMMENT("Entry:");
|
|
1906
|
|
1907 generate_disjoint_int_copy_core(aligned);
|
|
1908
|
|
1909 // O3, O4 are used as temp registers
|
|
1910 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
|
|
1911 __ retl();
|
|
1912 __ delayed()->mov(G0, O0); // return 0
|
|
1913 return start;
|
|
1914 }
|
|
1915
|
|
1916 //
|
|
1917 // Generate core code for conjoint int copy (and oop copy on 32-bit).
|
|
1918 // If "aligned" is true, the "from" and "to" addresses are assumed
|
|
1919 // to be heapword aligned.
|
|
1920 //
|
|
1921 // Arguments:
|
|
1922 // from: O0
|
|
1923 // to: O1
|
|
1924 // count: O2 treated as signed
|
|
1925 //
|
|
1926 void generate_conjoint_int_copy_core(bool aligned) {
|
|
1927 // Do reverse copy.
|
|
1928
|
|
1929 Label L_skip_alignment, L_aligned_copy;
|
|
1930 Label L_copy_16_bytes, L_copy_4_bytes, L_copy_4_bytes_loop, L_exit;
|
|
1931
|
|
1932 const Register from = O0; // source array address
|
|
1933 const Register to = O1; // destination array address
|
|
1934 const Register count = O2; // elements count
|
|
1935 const Register end_from = from; // source array end address
|
|
1936 const Register end_to = to; // destination array end address
|
|
1937 // O3, O4, O5, G3 are used as temp registers
|
|
1938
|
|
1939 const Register byte_count = O3; // bytes count to copy
|
|
1940
|
|
1941 __ sllx(count, LogBytesPerInt, byte_count);
|
|
1942 __ add(to, byte_count, end_to); // offset after last copied element
|
|
1943
|
|
1944 __ cmp(count, 5); // for short arrays, just do single element copy
|
|
1945 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes);
|
|
1946 __ delayed()->add(from, byte_count, end_from);
|
|
1947
|
|
1948 // copy 1 element to align 'to' on an 8 byte boundary
|
|
1949 __ andcc(end_to, 7, G0);
|
|
1950 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
|
|
1951 __ delayed()->nop();
|
|
1952 __ dec(count);
|
|
1953 __ dec(end_from, 4);
|
|
1954 __ dec(end_to, 4);
|
|
1955 __ ld(end_from, 0, O4);
|
|
1956 __ st(O4, end_to, 0);
|
|
1957 __ BIND(L_skip_alignment);
|
|
1958
|
|
1959 // Check if 'end_from' and 'end_to' has the same alignment.
|
|
1960 __ andcc(end_from, 7, G0);
|
|
1961 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
|
|
1962 __ delayed()->dec(count, 4); // The cmp at the start guaranty cnt >= 4
|
|
1963
|
|
1964 // copy with shift 4 elements (16 bytes) at a time
|
|
1965 //
|
|
1966 // Load 2 aligned 8-bytes chunks and use one from previous iteration
|
|
1967 // to form 2 aligned 8-bytes chunks to store.
|
|
1968 //
|
|
1969 __ ldx(end_from, -4, O3);
|
|
1970 __ align(16);
|
|
1971 __ BIND(L_copy_16_bytes);
|
|
1972 __ ldx(end_from, -12, O4);
|
|
1973 __ deccc(count, 4);
|
|
1974 __ ldx(end_from, -20, O5);
|
|
1975 __ dec(end_to, 16);
|
|
1976 __ dec(end_from, 16);
|
|
1977 __ srlx(O3, 32, O3);
|
|
1978 __ sllx(O4, 32, G3);
|
|
1979 __ bset(G3, O3);
|
|
1980 __ stx(O3, end_to, 8);
|
|
1981 __ srlx(O4, 32, O4);
|
|
1982 __ sllx(O5, 32, G3);
|
|
1983 __ bset(O4, G3);
|
|
1984 __ stx(G3, end_to, 0);
|
|
1985 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
|
|
1986 __ delayed()->mov(O5, O3);
|
|
1987
|
|
1988 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
|
|
1989 __ delayed()->inc(count, 4);
|
|
1990
|
|
1991 // copy 4 elements (16 bytes) at a time
|
|
1992 __ align(16);
|
|
1993 __ BIND(L_aligned_copy);
|
|
1994 __ dec(end_from, 16);
|
|
1995 __ ldx(end_from, 8, O3);
|
|
1996 __ ldx(end_from, 0, O4);
|
|
1997 __ dec(end_to, 16);
|
|
1998 __ deccc(count, 4);
|
|
1999 __ stx(O3, end_to, 8);
|
|
2000 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
|
|
2001 __ delayed()->stx(O4, end_to, 0);
|
|
2002 __ inc(count, 4);
|
|
2003
|
|
2004 // copy 1 element (4 bytes) at a time
|
|
2005 __ BIND(L_copy_4_bytes);
|
|
2006 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
|
|
2007 __ delayed()->nop();
|
|
2008 __ BIND(L_copy_4_bytes_loop);
|
|
2009 __ dec(end_from, 4);
|
|
2010 __ dec(end_to, 4);
|
|
2011 __ ld(end_from, 0, O4);
|
|
2012 __ deccc(count);
|
|
2013 __ brx(Assembler::greater, false, Assembler::pt, L_copy_4_bytes_loop);
|
|
2014 __ delayed()->st(O4, end_to, 0);
|
|
2015 __ BIND(L_exit);
|
|
2016 }
|
|
2017
|
|
2018 //
|
|
2019 // Generate stub for conjoint int copy. If "aligned" is true, the
|
|
2020 // "from" and "to" addresses are assumed to be heapword aligned.
|
|
2021 //
|
|
2022 // Arguments for generated stub:
|
|
2023 // from: O0
|
|
2024 // to: O1
|
|
2025 // count: O2 treated as signed
|
|
2026 //
|
|
2027 address generate_conjoint_int_copy(bool aligned, const char * name) {
|
|
2028 __ align(CodeEntryAlignment);
|
|
2029 StubCodeMark mark(this, "StubRoutines", name);
|
|
2030 address start = __ pc();
|
|
2031
|
|
2032 address nooverlap_target = aligned ?
|
|
2033 StubRoutines::arrayof_jint_disjoint_arraycopy() :
|
|
2034 disjoint_int_copy_entry;
|
|
2035
|
|
2036 assert_clean_int(O2, O3); // Make sure 'count' is clean int.
|
|
2037
|
|
2038 if (!aligned) int_copy_entry = __ pc();
|
|
2039 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
|
|
2040 if (!aligned) BLOCK_COMMENT("Entry:");
|
|
2041
|
|
2042 array_overlap_test(nooverlap_target, 2);
|
|
2043
|
|
2044 generate_conjoint_int_copy_core(aligned);
|
|
2045
|
|
2046 // O3, O4 are used as temp registers
|
|
2047 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
|
|
2048 __ retl();
|
|
2049 __ delayed()->mov(G0, O0); // return 0
|
|
2050 return start;
|
|
2051 }
|
|
2052
|
|
2053 //
|
|
2054 // Generate core code for disjoint long copy (and oop copy on 64-bit).
|
|
2055 // "aligned" is ignored, because we must make the stronger
|
|
2056 // assumption that both addresses are always 64-bit aligned.
|
|
2057 //
|
|
2058 // Arguments:
|
|
2059 // from: O0
|
|
2060 // to: O1
|
|
2061 // count: O2 treated as signed
|
|
2062 //
|
|
2063 void generate_disjoint_long_copy_core(bool aligned) {
|
|
2064 Label L_copy_8_bytes, L_copy_16_bytes, L_exit;
|
|
2065 const Register from = O0; // source array address
|
|
2066 const Register to = O1; // destination array address
|
|
2067 const Register count = O2; // elements count
|
|
2068 const Register offset0 = O4; // element offset
|
|
2069 const Register offset8 = O5; // next element offset
|
|
2070
|
|
2071 __ deccc(count, 2);
|
|
2072 __ mov(G0, offset0); // offset from start of arrays (0)
|
|
2073 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes );
|
|
2074 __ delayed()->add(offset0, 8, offset8);
|
|
2075 __ align(16);
|
|
2076 __ BIND(L_copy_16_bytes);
|
|
2077 __ ldx(from, offset0, O3);
|
|
2078 __ ldx(from, offset8, G3);
|
|
2079 __ deccc(count, 2);
|
|
2080 __ stx(O3, to, offset0);
|
|
2081 __ inc(offset0, 16);
|
|
2082 __ stx(G3, to, offset8);
|
|
2083 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
|
|
2084 __ delayed()->inc(offset8, 16);
|
|
2085
|
|
2086 __ BIND(L_copy_8_bytes);
|
|
2087 __ inccc(count, 2);
|
|
2088 __ brx(Assembler::zero, true, Assembler::pn, L_exit );
|
|
2089 __ delayed()->mov(offset0, offset8); // Set O5 used by other stubs
|
|
2090 __ ldx(from, offset0, O3);
|
|
2091 __ stx(O3, to, offset0);
|
|
2092 __ BIND(L_exit);
|
|
2093 }
|
|
2094
|
|
2095 //
|
|
2096 // Generate stub for disjoint long copy.
|
|
2097 // "aligned" is ignored, because we must make the stronger
|
|
2098 // assumption that both addresses are always 64-bit aligned.
|
|
2099 //
|
|
2100 // Arguments for generated stub:
|
|
2101 // from: O0
|
|
2102 // to: O1
|
|
2103 // count: O2 treated as signed
|
|
2104 //
|
|
2105 address generate_disjoint_long_copy(bool aligned, const char * name) {
|
|
2106 __ align(CodeEntryAlignment);
|
|
2107 StubCodeMark mark(this, "StubRoutines", name);
|
|
2108 address start = __ pc();
|
|
2109
|
|
2110 assert_clean_int(O2, O3); // Make sure 'count' is clean int.
|
|
2111
|
|
2112 if (!aligned) disjoint_long_copy_entry = __ pc();
|
|
2113 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
|
|
2114 if (!aligned) BLOCK_COMMENT("Entry:");
|
|
2115
|
|
2116 generate_disjoint_long_copy_core(aligned);
|
|
2117
|
|
2118 // O3, O4 are used as temp registers
|
|
2119 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
|
|
2120 __ retl();
|
|
2121 __ delayed()->mov(G0, O0); // return 0
|
|
2122 return start;
|
|
2123 }
|
|
2124
|
|
2125 //
|
|
2126 // Generate core code for conjoint long copy (and oop copy on 64-bit).
|
|
2127 // "aligned" is ignored, because we must make the stronger
|
|
2128 // assumption that both addresses are always 64-bit aligned.
|
|
2129 //
|
|
2130 // Arguments:
|
|
2131 // from: O0
|
|
2132 // to: O1
|
|
2133 // count: O2 treated as signed
|
|
2134 //
|
|
2135 void generate_conjoint_long_copy_core(bool aligned) {
|
|
2136 // Do reverse copy.
|
|
2137 Label L_copy_8_bytes, L_copy_16_bytes, L_exit;
|
|
2138 const Register from = O0; // source array address
|
|
2139 const Register to = O1; // destination array address
|
|
2140 const Register count = O2; // elements count
|
|
2141 const Register offset8 = O4; // element offset
|
|
2142 const Register offset0 = O5; // previous element offset
|
|
2143
|
|
2144 __ subcc(count, 1, count);
|
|
2145 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_8_bytes );
|
|
2146 __ delayed()->sllx(count, LogBytesPerLong, offset8);
|
|
2147 __ sub(offset8, 8, offset0);
|
|
2148 __ align(16);
|
|
2149 __ BIND(L_copy_16_bytes);
|
|
2150 __ ldx(from, offset8, O2);
|
|
2151 __ ldx(from, offset0, O3);
|
|
2152 __ stx(O2, to, offset8);
|
|
2153 __ deccc(offset8, 16); // use offset8 as counter
|
|
2154 __ stx(O3, to, offset0);
|
|
2155 __ brx(Assembler::greater, false, Assembler::pt, L_copy_16_bytes);
|
|
2156 __ delayed()->dec(offset0, 16);
|
|
2157
|
|
2158 __ BIND(L_copy_8_bytes);
|
|
2159 __ brx(Assembler::negative, false, Assembler::pn, L_exit );
|
|
2160 __ delayed()->nop();
|
|
2161 __ ldx(from, 0, O3);
|
|
2162 __ stx(O3, to, 0);
|
|
2163 __ BIND(L_exit);
|
|
2164 }
|
|
2165
|
|
2166 // Generate stub for conjoint long copy.
|
|
2167 // "aligned" is ignored, because we must make the stronger
|
|
2168 // assumption that both addresses are always 64-bit aligned.
|
|
2169 //
|
|
2170 // Arguments for generated stub:
|
|
2171 // from: O0
|
|
2172 // to: O1
|
|
2173 // count: O2 treated as signed
|
|
2174 //
|
|
2175 address generate_conjoint_long_copy(bool aligned, const char * name) {
|
|
2176 __ align(CodeEntryAlignment);
|
|
2177 StubCodeMark mark(this, "StubRoutines", name);
|
|
2178 address start = __ pc();
|
|
2179
|
|
2180 assert(!aligned, "usage");
|
|
2181 address nooverlap_target = disjoint_long_copy_entry;
|
|
2182
|
|
2183 assert_clean_int(O2, O3); // Make sure 'count' is clean int.
|
|
2184
|
|
2185 if (!aligned) long_copy_entry = __ pc();
|
|
2186 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
|
|
2187 if (!aligned) BLOCK_COMMENT("Entry:");
|
|
2188
|
|
2189 array_overlap_test(nooverlap_target, 3);
|
|
2190
|
|
2191 generate_conjoint_long_copy_core(aligned);
|
|
2192
|
|
2193 // O3, O4 are used as temp registers
|
|
2194 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
|
|
2195 __ retl();
|
|
2196 __ delayed()->mov(G0, O0); // return 0
|
|
2197 return start;
|
|
2198 }
|
|
2199
|
|
2200 // Generate stub for disjoint oop copy. If "aligned" is true, the
|
|
2201 // "from" and "to" addresses are assumed to be heapword aligned.
|
|
2202 //
|
|
2203 // Arguments for generated stub:
|
|
2204 // from: O0
|
|
2205 // to: O1
|
|
2206 // count: O2 treated as signed
|
|
2207 //
|
|
2208 address generate_disjoint_oop_copy(bool aligned, const char * name) {
|
|
2209
|
|
2210 const Register from = O0; // source array address
|
|
2211 const Register to = O1; // destination array address
|
|
2212 const Register count = O2; // elements count
|
|
2213
|
|
2214 __ align(CodeEntryAlignment);
|
|
2215 StubCodeMark mark(this, "StubRoutines", name);
|
|
2216 address start = __ pc();
|
|
2217
|
|
2218 assert_clean_int(count, O3); // Make sure 'count' is clean int.
|
|
2219
|
|
2220 if (!aligned) disjoint_oop_copy_entry = __ pc();
|
|
2221 // caller can pass a 64-bit byte count here
|
|
2222 if (!aligned) BLOCK_COMMENT("Entry:");
|
|
2223
|
|
2224 // save arguments for barrier generation
|
|
2225 __ mov(to, G1);
|
|
2226 __ mov(count, G5);
|
|
2227 gen_write_ref_array_pre_barrier(G1, G5);
|
|
2228 #ifdef _LP64
|
|
2229 generate_disjoint_long_copy_core(aligned);
|
|
2230 #else
|
|
2231 generate_disjoint_int_copy_core(aligned);
|
|
2232 #endif
|
|
2233 // O0 is used as temp register
|
|
2234 gen_write_ref_array_post_barrier(G1, G5, O0);
|
|
2235
|
|
2236 // O3, O4 are used as temp registers
|
|
2237 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
|
|
2238 __ retl();
|
|
2239 __ delayed()->mov(G0, O0); // return 0
|
|
2240 return start;
|
|
2241 }
|
|
2242
|
|
2243 // Generate stub for conjoint oop copy. If "aligned" is true, the
|
|
2244 // "from" and "to" addresses are assumed to be heapword aligned.
|
|
2245 //
|
|
2246 // Arguments for generated stub:
|
|
2247 // from: O0
|
|
2248 // to: O1
|
|
2249 // count: O2 treated as signed
|
|
2250 //
|
|
2251 address generate_conjoint_oop_copy(bool aligned, const char * name) {
|
|
2252
|
|
2253 const Register from = O0; // source array address
|
|
2254 const Register to = O1; // destination array address
|
|
2255 const Register count = O2; // elements count
|
|
2256
|
|
2257 __ align(CodeEntryAlignment);
|
|
2258 StubCodeMark mark(this, "StubRoutines", name);
|
|
2259 address start = __ pc();
|
|
2260
|
|
2261 assert_clean_int(count, O3); // Make sure 'count' is clean int.
|
|
2262
|
|
2263 if (!aligned) oop_copy_entry = __ pc();
|
|
2264 // caller can pass a 64-bit byte count here
|
|
2265 if (!aligned) BLOCK_COMMENT("Entry:");
|
|
2266
|
|
2267 // save arguments for barrier generation
|
|
2268 __ mov(to, G1);
|
|
2269 __ mov(count, G5);
|
|
2270
|
|
2271 gen_write_ref_array_pre_barrier(G1, G5);
|
|
2272
|
|
2273 address nooverlap_target = aligned ?
|
|
2274 StubRoutines::arrayof_oop_disjoint_arraycopy() :
|
|
2275 disjoint_oop_copy_entry;
|
|
2276
|
|
2277 array_overlap_test(nooverlap_target, LogBytesPerWord);
|
|
2278
|
|
2279 #ifdef _LP64
|
|
2280 generate_conjoint_long_copy_core(aligned);
|
|
2281 #else
|
|
2282 generate_conjoint_int_copy_core(aligned);
|
|
2283 #endif
|
|
2284
|
|
2285 // O0 is used as temp register
|
|
2286 gen_write_ref_array_post_barrier(G1, G5, O0);
|
|
2287
|
|
2288 // O3, O4 are used as temp registers
|
|
2289 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
|
|
2290 __ retl();
|
|
2291 __ delayed()->mov(G0, O0); // return 0
|
|
2292 return start;
|
|
2293 }
|
|
2294
|
|
2295
|
|
2296 // Helper for generating a dynamic type check.
|
|
2297 // Smashes only the given temp registers.
|
|
2298 void generate_type_check(Register sub_klass,
|
|
2299 Register super_check_offset,
|
|
2300 Register super_klass,
|
|
2301 Register temp,
|
|
2302 Label& L_success,
|
|
2303 Register deccc_hack = noreg) {
|
|
2304 assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
|
|
2305
|
|
2306 BLOCK_COMMENT("type_check:");
|
|
2307
|
|
2308 Label L_miss;
|
|
2309
|
|
2310 assert_clean_int(super_check_offset, temp);
|
|
2311
|
|
2312 // maybe decrement caller's trip count:
|
|
2313 #define DELAY_SLOT delayed(); \
|
|
2314 { if (deccc_hack == noreg) __ nop(); else __ deccc(deccc_hack); }
|
|
2315
|
|
2316 // if the pointers are equal, we are done (e.g., String[] elements)
|
|
2317 __ cmp(sub_klass, super_klass);
|
|
2318 __ brx(Assembler::equal, true, Assembler::pt, L_success);
|
|
2319 __ DELAY_SLOT;
|
|
2320
|
|
2321 // check the supertype display:
|
|
2322 __ ld_ptr(sub_klass, super_check_offset, temp); // query the super type
|
|
2323 __ cmp(super_klass, temp); // test the super type
|
|
2324 __ brx(Assembler::equal, true, Assembler::pt, L_success);
|
|
2325 __ DELAY_SLOT;
|
|
2326
|
|
2327 int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
|
|
2328 Klass::secondary_super_cache_offset_in_bytes());
|
|
2329 __ cmp(super_klass, sc_offset);
|
|
2330 __ brx(Assembler::notEqual, true, Assembler::pt, L_miss);
|
|
2331 __ delayed()->nop();
|
|
2332
|
|
2333 __ save_frame(0);
|
|
2334 __ mov(sub_klass->after_save(), O1);
|
|
2335 // mov(super_klass->after_save(), O2); //fill delay slot
|
|
2336 assert(StubRoutines::Sparc::_partial_subtype_check != NULL, "order of generation");
|
|
2337 __ call(StubRoutines::Sparc::_partial_subtype_check);
|
|
2338 __ delayed()->mov(super_klass->after_save(), O2);
|
|
2339 __ restore();
|
|
2340
|
|
2341 // Upon return, the condition codes are already set.
|
|
2342 __ brx(Assembler::equal, true, Assembler::pt, L_success);
|
|
2343 __ DELAY_SLOT;
|
|
2344
|
|
2345 #undef DELAY_SLOT
|
|
2346
|
|
2347 // Fall through on failure!
|
|
2348 __ BIND(L_miss);
|
|
2349 }
|
|
2350
|
|
2351
|
|
2352 // Generate stub for checked oop copy.
|
|
2353 //
|
|
2354 // Arguments for generated stub:
|
|
2355 // from: O0
|
|
2356 // to: O1
|
|
2357 // count: O2 treated as signed
|
|
2358 // ckoff: O3 (super_check_offset)
|
|
2359 // ckval: O4 (super_klass)
|
|
2360 // ret: O0 zero for success; (-1^K) where K is partial transfer count
|
|
2361 //
|
|
2362 address generate_checkcast_copy(const char* name) {
|
|
2363
|
|
2364 const Register O0_from = O0; // source array address
|
|
2365 const Register O1_to = O1; // destination array address
|
|
2366 const Register O2_count = O2; // elements count
|
|
2367 const Register O3_ckoff = O3; // super_check_offset
|
|
2368 const Register O4_ckval = O4; // super_klass
|
|
2369
|
|
2370 const Register O5_offset = O5; // loop var, with stride wordSize
|
|
2371 const Register G1_remain = G1; // loop var, with stride -1
|
|
2372 const Register G3_oop = G3; // actual oop copied
|
|
2373 const Register G4_klass = G4; // oop._klass
|
|
2374 const Register G5_super = G5; // oop._klass._primary_supers[ckval]
|
|
2375
|
|
2376 __ align(CodeEntryAlignment);
|
|
2377 StubCodeMark mark(this, "StubRoutines", name);
|
|
2378 address start = __ pc();
|
|
2379
|
|
2380 int klass_off = oopDesc::klass_offset_in_bytes();
|
|
2381
|
|
2382 gen_write_ref_array_pre_barrier(G1, G5);
|
|
2383
|
|
2384
|
|
2385 #ifdef ASSERT
|
|
2386 // We sometimes save a frame (see partial_subtype_check below).
|
|
2387 // If this will cause trouble, let's fail now instead of later.
|
|
2388 __ save_frame(0);
|
|
2389 __ restore();
|
|
2390 #endif
|
|
2391
|
|
2392 #ifdef ASSERT
|
|
2393 // caller guarantees that the arrays really are different
|
|
2394 // otherwise, we would have to make conjoint checks
|
|
2395 { Label L;
|
|
2396 __ mov(O3, G1); // spill: overlap test smashes O3
|
|
2397 __ mov(O4, G4); // spill: overlap test smashes O4
|
|
2398 array_overlap_test(L, LogBytesPerWord);
|
|
2399 __ stop("checkcast_copy within a single array");
|
|
2400 __ bind(L);
|
|
2401 __ mov(G1, O3);
|
|
2402 __ mov(G4, O4);
|
|
2403 }
|
|
2404 #endif //ASSERT
|
|
2405
|
|
2406 assert_clean_int(O2_count, G1); // Make sure 'count' is clean int.
|
|
2407
|
|
2408 checkcast_copy_entry = __ pc();
|
|
2409 // caller can pass a 64-bit byte count here (from generic stub)
|
|
2410 BLOCK_COMMENT("Entry:");
|
|
2411
|
|
2412 Label load_element, store_element, do_card_marks, fail, done;
|
|
2413 __ addcc(O2_count, 0, G1_remain); // initialize loop index, and test it
|
|
2414 __ brx(Assembler::notZero, false, Assembler::pt, load_element);
|
|
2415 __ delayed()->mov(G0, O5_offset); // offset from start of arrays
|
|
2416
|
|
2417 // Empty array: Nothing to do.
|
|
2418 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4);
|
|
2419 __ retl();
|
|
2420 __ delayed()->set(0, O0); // return 0 on (trivial) success
|
|
2421
|
|
2422 // ======== begin loop ========
|
|
2423 // (Loop is rotated; its entry is load_element.)
|
|
2424 // Loop variables:
|
|
2425 // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays
|
|
2426 // (O2 = len; O2 != 0; O2--) --- number of oops *remaining*
|
|
2427 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super
|
|
2428 __ align(16);
|
|
2429
|
|
2430 __ bind(store_element);
|
|
2431 // deccc(G1_remain); // decrement the count (hoisted)
|
|
2432 __ st_ptr(G3_oop, O1_to, O5_offset); // store the oop
|
|
2433 __ inc(O5_offset, wordSize); // step to next offset
|
|
2434 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
|
|
2435 __ delayed()->set(0, O0); // return -1 on success
|
|
2436
|
|
2437 // ======== loop entry is here ========
|
|
2438 __ bind(load_element);
|
|
2439 __ ld_ptr(O0_from, O5_offset, G3_oop); // load the oop
|
|
2440 __ br_null(G3_oop, true, Assembler::pt, store_element);
|
|
2441 __ delayed()->deccc(G1_remain); // decrement the count
|
|
2442
|
|
2443 __ ld_ptr(G3_oop, klass_off, G4_klass); // query the object klass
|
|
2444
|
|
2445 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
|
|
2446 // branch to this on success:
|
|
2447 store_element,
|
|
2448 // decrement this on success:
|
|
2449 G1_remain);
|
|
2450 // ======== end loop ========
|
|
2451
|
|
2452 // It was a real error; we must depend on the caller to finish the job.
|
|
2453 // Register G1 has number of *remaining* oops, O2 number of *total* oops.
|
|
2454 // Emit GC store barriers for the oops we have copied (O2 minus G1),
|
|
2455 // and report their number to the caller.
|
|
2456 __ bind(fail);
|
|
2457 __ subcc(O2_count, G1_remain, O2_count);
|
|
2458 __ brx(Assembler::zero, false, Assembler::pt, done);
|
|
2459 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller
|
|
2460
|
|
2461 __ bind(do_card_marks);
|
|
2462 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2]
|
|
2463
|
|
2464 __ bind(done);
|
|
2465 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4);
|
|
2466 __ retl();
|
|
2467 __ delayed()->nop(); // return value in 00
|
|
2468
|
|
2469 return start;
|
|
2470 }
|
|
2471
|
|
2472
|
|
2473 // Generate 'unsafe' array copy stub
|
|
2474 // Though just as safe as the other stubs, it takes an unscaled
|
|
2475 // size_t argument instead of an element count.
|
|
2476 //
|
|
2477 // Arguments for generated stub:
|
|
2478 // from: O0
|
|
2479 // to: O1
|
|
2480 // count: O2 byte count, treated as ssize_t, can be zero
|
|
2481 //
|
|
2482 // Examines the alignment of the operands and dispatches
|
|
2483 // to a long, int, short, or byte copy loop.
|
|
2484 //
|
|
2485 address generate_unsafe_copy(const char* name) {
|
|
2486
|
|
2487 const Register O0_from = O0; // source array address
|
|
2488 const Register O1_to = O1; // destination array address
|
|
2489 const Register O2_count = O2; // elements count
|
|
2490
|
|
2491 const Register G1_bits = G1; // test copy of low bits
|
|
2492
|
|
2493 __ align(CodeEntryAlignment);
|
|
2494 StubCodeMark mark(this, "StubRoutines", name);
|
|
2495 address start = __ pc();
|
|
2496
|
|
2497 // bump this on entry, not on exit:
|
|
2498 inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr, G1, G3);
|
|
2499
|
|
2500 __ or3(O0_from, O1_to, G1_bits);
|
|
2501 __ or3(O2_count, G1_bits, G1_bits);
|
|
2502
|
|
2503 __ btst(BytesPerLong-1, G1_bits);
|
|
2504 __ br(Assembler::zero, true, Assembler::pt,
|
|
2505 long_copy_entry, relocInfo::runtime_call_type);
|
|
2506 // scale the count on the way out:
|
|
2507 __ delayed()->srax(O2_count, LogBytesPerLong, O2_count);
|
|
2508
|
|
2509 __ btst(BytesPerInt-1, G1_bits);
|
|
2510 __ br(Assembler::zero, true, Assembler::pt,
|
|
2511 int_copy_entry, relocInfo::runtime_call_type);
|
|
2512 // scale the count on the way out:
|
|
2513 __ delayed()->srax(O2_count, LogBytesPerInt, O2_count);
|
|
2514
|
|
2515 __ btst(BytesPerShort-1, G1_bits);
|
|
2516 __ br(Assembler::zero, true, Assembler::pt,
|
|
2517 short_copy_entry, relocInfo::runtime_call_type);
|
|
2518 // scale the count on the way out:
|
|
2519 __ delayed()->srax(O2_count, LogBytesPerShort, O2_count);
|
|
2520
|
|
2521 __ br(Assembler::always, false, Assembler::pt,
|
|
2522 byte_copy_entry, relocInfo::runtime_call_type);
|
|
2523 __ delayed()->nop();
|
|
2524
|
|
2525 return start;
|
|
2526 }
|
|
2527
|
|
2528
|
|
2529 // Perform range checks on the proposed arraycopy.
|
|
2530 // Kills the two temps, but nothing else.
|
|
2531 // Also, clean the sign bits of src_pos and dst_pos.
|
|
2532 void arraycopy_range_checks(Register src, // source array oop (O0)
|
|
2533 Register src_pos, // source position (O1)
|
|
2534 Register dst, // destination array oo (O2)
|
|
2535 Register dst_pos, // destination position (O3)
|
|
2536 Register length, // length of copy (O4)
|
|
2537 Register temp1, Register temp2,
|
|
2538 Label& L_failed) {
|
|
2539 BLOCK_COMMENT("arraycopy_range_checks:");
|
|
2540
|
|
2541 // if (src_pos + length > arrayOop(src)->length() ) FAIL;
|
|
2542
|
|
2543 const Register array_length = temp1; // scratch
|
|
2544 const Register end_pos = temp2; // scratch
|
|
2545
|
|
2546 // Note: This next instruction may be in the delay slot of a branch:
|
|
2547 __ add(length, src_pos, end_pos); // src_pos + length
|
|
2548 __ lduw(src, arrayOopDesc::length_offset_in_bytes(), array_length);
|
|
2549 __ cmp(end_pos, array_length);
|
|
2550 __ br(Assembler::greater, false, Assembler::pn, L_failed);
|
|
2551
|
|
2552 // if (dst_pos + length > arrayOop(dst)->length() ) FAIL;
|
|
2553 __ delayed()->add(length, dst_pos, end_pos); // dst_pos + length
|
|
2554 __ lduw(dst, arrayOopDesc::length_offset_in_bytes(), array_length);
|
|
2555 __ cmp(end_pos, array_length);
|
|
2556 __ br(Assembler::greater, false, Assembler::pn, L_failed);
|
|
2557
|
|
2558 // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'.
|
|
2559 // Move with sign extension can be used since they are positive.
|
|
2560 __ delayed()->signx(src_pos, src_pos);
|
|
2561 __ signx(dst_pos, dst_pos);
|
|
2562
|
|
2563 BLOCK_COMMENT("arraycopy_range_checks done");
|
|
2564 }
|
|
2565
|
|
2566
|
|
2567 //
|
|
2568 // Generate generic array copy stubs
|
|
2569 //
|
|
2570 // Input:
|
|
2571 // O0 - src oop
|
|
2572 // O1 - src_pos
|
|
2573 // O2 - dst oop
|
|
2574 // O3 - dst_pos
|
|
2575 // O4 - element count
|
|
2576 //
|
|
2577 // Output:
|
|
2578 // O0 == 0 - success
|
|
2579 // O0 == -1 - need to call System.arraycopy
|
|
2580 //
|
|
2581 address generate_generic_copy(const char *name) {
|
|
2582
|
|
2583 Label L_failed, L_objArray;
|
|
2584
|
|
2585 // Input registers
|
|
2586 const Register src = O0; // source array oop
|
|
2587 const Register src_pos = O1; // source position
|
|
2588 const Register dst = O2; // destination array oop
|
|
2589 const Register dst_pos = O3; // destination position
|
|
2590 const Register length = O4; // elements count
|
|
2591
|
|
2592 // registers used as temp
|
|
2593 const Register G3_src_klass = G3; // source array klass
|
|
2594 const Register G4_dst_klass = G4; // destination array klass
|
|
2595 const Register G5_lh = G5; // layout handler
|
|
2596 const Register O5_temp = O5;
|
|
2597
|
|
2598 __ align(CodeEntryAlignment);
|
|
2599 StubCodeMark mark(this, "StubRoutines", name);
|
|
2600 address start = __ pc();
|
|
2601
|
|
2602 // bump this on entry, not on exit:
|
|
2603 inc_counter_np(SharedRuntime::_generic_array_copy_ctr, G1, G3);
|
|
2604
|
|
2605 // In principle, the int arguments could be dirty.
|
|
2606 //assert_clean_int(src_pos, G1);
|
|
2607 //assert_clean_int(dst_pos, G1);
|
|
2608 //assert_clean_int(length, G1);
|
|
2609
|
|
2610 //-----------------------------------------------------------------------
|
|
2611 // Assembler stubs will be used for this call to arraycopy
|
|
2612 // if the following conditions are met:
|
|
2613 //
|
|
2614 // (1) src and dst must not be null.
|
|
2615 // (2) src_pos must not be negative.
|
|
2616 // (3) dst_pos must not be negative.
|
|
2617 // (4) length must not be negative.
|
|
2618 // (5) src klass and dst klass should be the same and not NULL.
|
|
2619 // (6) src and dst should be arrays.
|
|
2620 // (7) src_pos + length must not exceed length of src.
|
|
2621 // (8) dst_pos + length must not exceed length of dst.
|
|
2622 BLOCK_COMMENT("arraycopy initial argument checks");
|
|
2623
|
|
2624 // if (src == NULL) return -1;
|
|
2625 __ br_null(src, false, Assembler::pn, L_failed);
|
|
2626
|
|
2627 // if (src_pos < 0) return -1;
|
|
2628 __ delayed()->tst(src_pos);
|
|
2629 __ br(Assembler::negative, false, Assembler::pn, L_failed);
|
|
2630 __ delayed()->nop();
|
|
2631
|
|
2632 // if (dst == NULL) return -1;
|
|
2633 __ br_null(dst, false, Assembler::pn, L_failed);
|
|
2634
|
|
2635 // if (dst_pos < 0) return -1;
|
|
2636 __ delayed()->tst(dst_pos);
|
|
2637 __ br(Assembler::negative, false, Assembler::pn, L_failed);
|
|
2638
|
|
2639 // if (length < 0) return -1;
|
|
2640 __ delayed()->tst(length);
|
|
2641 __ br(Assembler::negative, false, Assembler::pn, L_failed);
|
|
2642
|
|
2643 BLOCK_COMMENT("arraycopy argument klass checks");
|
|
2644 // get src->klass()
|
|
2645 __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass);
|
|
2646
|
|
2647 #ifdef ASSERT
|
|
2648 // assert(src->klass() != NULL);
|
|
2649 BLOCK_COMMENT("assert klasses not null");
|
|
2650 { Label L_a, L_b;
|
|
2651 __ br_notnull(G3_src_klass, false, Assembler::pt, L_b); // it is broken if klass is NULL
|
|
2652 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass);
|
|
2653 __ bind(L_a);
|
|
2654 __ stop("broken null klass");
|
|
2655 __ bind(L_b);
|
|
2656 __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also
|
|
2657 __ delayed()->mov(G0, G4_dst_klass); // scribble the temp
|
|
2658 BLOCK_COMMENT("assert done");
|
|
2659 }
|
|
2660 #endif
|
|
2661
|
|
2662 // Load layout helper
|
|
2663 //
|
|
2664 // |array_tag| | header_size | element_type | |log2_element_size|
|
|
2665 // 32 30 24 16 8 2 0
|
|
2666 //
|
|
2667 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
|
|
2668 //
|
|
2669
|
|
2670 int lh_offset = klassOopDesc::header_size() * HeapWordSize +
|
|
2671 Klass::layout_helper_offset_in_bytes();
|
|
2672
|
|
2673 // Load 32-bits signed value. Use br() instruction with it to check icc.
|
|
2674 __ lduw(G3_src_klass, lh_offset, G5_lh);
|
|
2675
|
|
2676 // Handle objArrays completely differently...
|
|
2677 juint objArray_lh = Klass::array_layout_helper(T_OBJECT);
|
|
2678 __ set(objArray_lh, O5_temp);
|
|
2679 __ cmp(G5_lh, O5_temp);
|
|
2680 __ br(Assembler::equal, false, Assembler::pt, L_objArray);
|
|
2681 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass);
|
|
2682
|
|
2683 // if (src->klass() != dst->klass()) return -1;
|
|
2684 __ cmp(G3_src_klass, G4_dst_klass);
|
|
2685 __ brx(Assembler::notEqual, false, Assembler::pn, L_failed);
|
|
2686 __ delayed()->nop();
|
|
2687
|
|
2688 // if (!src->is_Array()) return -1;
|
|
2689 __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0
|
|
2690 __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed);
|
|
2691
|
|
2692 // At this point, it is known to be a typeArray (array_tag 0x3).
|
|
2693 #ifdef ASSERT
|
|
2694 __ delayed()->nop();
|
|
2695 { Label L;
|
|
2696 jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
|
|
2697 __ set(lh_prim_tag_in_place, O5_temp);
|
|
2698 __ cmp(G5_lh, O5_temp);
|
|
2699 __ br(Assembler::greaterEqual, false, Assembler::pt, L);
|
|
2700 __ delayed()->nop();
|
|
2701 __ stop("must be a primitive array");
|
|
2702 __ bind(L);
|
|
2703 }
|
|
2704 #else
|
|
2705 __ delayed(); // match next insn to prev branch
|
|
2706 #endif
|
|
2707
|
|
2708 arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
|
|
2709 O5_temp, G4_dst_klass, L_failed);
|
|
2710
|
|
2711 // typeArrayKlass
|
|
2712 //
|
|
2713 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
|
|
2714 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
|
|
2715 //
|
|
2716
|
|
2717 const Register G4_offset = G4_dst_klass; // array offset
|
|
2718 const Register G3_elsize = G3_src_klass; // log2 element size
|
|
2719
|
|
2720 __ srl(G5_lh, Klass::_lh_header_size_shift, G4_offset);
|
|
2721 __ and3(G4_offset, Klass::_lh_header_size_mask, G4_offset); // array_offset
|
|
2722 __ add(src, G4_offset, src); // src array offset
|
|
2723 __ add(dst, G4_offset, dst); // dst array offset
|
|
2724 __ and3(G5_lh, Klass::_lh_log2_element_size_mask, G3_elsize); // log2 element size
|
|
2725
|
|
2726 // next registers should be set before the jump to corresponding stub
|
|
2727 const Register from = O0; // source array address
|
|
2728 const Register to = O1; // destination array address
|
|
2729 const Register count = O2; // elements count
|
|
2730
|
|
2731 // 'from', 'to', 'count' registers should be set in this order
|
|
2732 // since they are the same as 'src', 'src_pos', 'dst'.
|
|
2733
|
|
2734 BLOCK_COMMENT("scale indexes to element size");
|
|
2735 __ sll_ptr(src_pos, G3_elsize, src_pos);
|
|
2736 __ sll_ptr(dst_pos, G3_elsize, dst_pos);
|
|
2737 __ add(src, src_pos, from); // src_addr
|
|
2738 __ add(dst, dst_pos, to); // dst_addr
|
|
2739
|
|
2740 BLOCK_COMMENT("choose copy loop based on element size");
|
|
2741 __ cmp(G3_elsize, 0);
|
|
2742 __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jbyte_arraycopy);
|
|
2743 __ delayed()->signx(length, count); // length
|
|
2744
|
|
2745 __ cmp(G3_elsize, LogBytesPerShort);
|
|
2746 __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jshort_arraycopy);
|
|
2747 __ delayed()->signx(length, count); // length
|
|
2748
|
|
2749 __ cmp(G3_elsize, LogBytesPerInt);
|
|
2750 __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jint_arraycopy);
|
|
2751 __ delayed()->signx(length, count); // length
|
|
2752 #ifdef ASSERT
|
|
2753 { Label L;
|
|
2754 __ cmp(G3_elsize, LogBytesPerLong);
|
|
2755 __ br(Assembler::equal, false, Assembler::pt, L);
|
|
2756 __ delayed()->nop();
|
|
2757 __ stop("must be long copy, but elsize is wrong");
|
|
2758 __ bind(L);
|
|
2759 }
|
|
2760 #endif
|
|
2761 __ br(Assembler::always,false,Assembler::pt,StubRoutines::_jlong_arraycopy);
|
|
2762 __ delayed()->signx(length, count); // length
|
|
2763
|
|
2764 // objArrayKlass
|
|
2765 __ BIND(L_objArray);
|
|
2766 // live at this point: G3_src_klass, G4_dst_klass, src[_pos], dst[_pos], length
|
|
2767
|
|
2768 Label L_plain_copy, L_checkcast_copy;
|
|
2769 // test array classes for subtyping
|
|
2770 __ cmp(G3_src_klass, G4_dst_klass); // usual case is exact equality
|
|
2771 __ brx(Assembler::notEqual, true, Assembler::pn, L_checkcast_copy);
|
|
2772 __ delayed()->lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted from below
|
|
2773
|
|
2774 // Identically typed arrays can be copied without element-wise checks.
|
|
2775 arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
|
|
2776 O5_temp, G5_lh, L_failed);
|
|
2777
|
|
2778 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset
|
|
2779 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset
|
|
2780 __ sll_ptr(src_pos, LogBytesPerOop, src_pos);
|
|
2781 __ sll_ptr(dst_pos, LogBytesPerOop, dst_pos);
|
|
2782 __ add(src, src_pos, from); // src_addr
|
|
2783 __ add(dst, dst_pos, to); // dst_addr
|
|
2784 __ BIND(L_plain_copy);
|
|
2785 __ br(Assembler::always, false, Assembler::pt,StubRoutines::_oop_arraycopy);
|
|
2786 __ delayed()->signx(length, count); // length
|
|
2787
|
|
2788 __ BIND(L_checkcast_copy);
|
|
2789 // live at this point: G3_src_klass, G4_dst_klass
|
|
2790 {
|
|
2791 // Before looking at dst.length, make sure dst is also an objArray.
|
|
2792 // lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted to delay slot
|
|
2793 __ cmp(G5_lh, O5_temp);
|
|
2794 __ br(Assembler::notEqual, false, Assembler::pn, L_failed);
|
|
2795
|
|
2796 // It is safe to examine both src.length and dst.length.
|
|
2797 __ delayed(); // match next insn to prev branch
|
|
2798 arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
|
|
2799 O5_temp, G5_lh, L_failed);
|
|
2800
|
|
2801 // Marshal the base address arguments now, freeing registers.
|
|
2802 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset
|
|
2803 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset
|
|
2804 __ sll_ptr(src_pos, LogBytesPerOop, src_pos);
|
|
2805 __ sll_ptr(dst_pos, LogBytesPerOop, dst_pos);
|
|
2806 __ add(src, src_pos, from); // src_addr
|
|
2807 __ add(dst, dst_pos, to); // dst_addr
|
|
2808 __ signx(length, count); // length (reloaded)
|
|
2809
|
|
2810 Register sco_temp = O3; // this register is free now
|
|
2811 assert_different_registers(from, to, count, sco_temp,
|
|
2812 G4_dst_klass, G3_src_klass);
|
|
2813
|
|
2814 // Generate the type check.
|
|
2815 int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
|
|
2816 Klass::super_check_offset_offset_in_bytes());
|
|
2817 __ lduw(G4_dst_klass, sco_offset, sco_temp);
|
|
2818 generate_type_check(G3_src_klass, sco_temp, G4_dst_klass,
|
|
2819 O5_temp, L_plain_copy);
|
|
2820
|
|
2821 // Fetch destination element klass from the objArrayKlass header.
|
|
2822 int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
|
|
2823 objArrayKlass::element_klass_offset_in_bytes());
|
|
2824
|
|
2825 // the checkcast_copy loop needs two extra arguments:
|
|
2826 __ ld_ptr(G4_dst_klass, ek_offset, O4); // dest elem klass
|
|
2827 // lduw(O4, sco_offset, O3); // sco of elem klass
|
|
2828
|
|
2829 __ br(Assembler::always, false, Assembler::pt, checkcast_copy_entry);
|
|
2830 __ delayed()->lduw(O4, sco_offset, O3);
|
|
2831 }
|
|
2832
|
|
2833 __ BIND(L_failed);
|
|
2834 __ retl();
|
|
2835 __ delayed()->sub(G0, 1, O0); // return -1
|
|
2836 return start;
|
|
2837 }
|
|
2838
|
|
2839 void generate_arraycopy_stubs() {
|
|
2840
|
|
2841 // Note: the disjoint stubs must be generated first, some of
|
|
2842 // the conjoint stubs use them.
|
|
2843 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
|
|
2844 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
|
|
2845 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, "jint_disjoint_arraycopy");
|
|
2846 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
|
|
2847 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy");
|
|
2848 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy");
|
|
2849 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy");
|
|
2850 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, "arrayof_jint_disjoint_arraycopy");
|
|
2851 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy");
|
|
2852 StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy(true, "arrayof_oop_disjoint_arraycopy");
|
|
2853
|
|
2854 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
|
|
2855 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
|
|
2856 StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, "jint_arraycopy");
|
|
2857 StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy");
|
|
2858 StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy(false, "oop_arraycopy");
|
|
2859 StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, "arrayof_jbyte_arraycopy");
|
|
2860 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy");
|
|
2861 #ifdef _LP64
|
|
2862 // since sizeof(jint) < sizeof(HeapWord), there's a different flavor:
|
|
2863 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, "arrayof_jint_arraycopy");
|
|
2864 #else
|
|
2865 StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy;
|
|
2866 #endif
|
|
2867 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
|
|
2868 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy;
|
|
2869
|
|
2870 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy");
|
|
2871 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy");
|
|
2872 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy");
|
|
2873 }
|
|
2874
|
|
2875 void generate_initial() {
|
|
2876 // Generates all stubs and initializes the entry points
|
|
2877
|
|
2878 //------------------------------------------------------------------------------------------------------------------------
|
|
2879 // entry points that exist in all platforms
|
|
2880 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
|
|
2881 // the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp.
|
|
2882 StubRoutines::_forward_exception_entry = generate_forward_exception();
|
|
2883
|
|
2884 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
|
|
2885 StubRoutines::_catch_exception_entry = generate_catch_exception();
|
|
2886
|
|
2887 //------------------------------------------------------------------------------------------------------------------------
|
|
2888 // entry points that are platform specific
|
|
2889 StubRoutines::Sparc::_test_stop_entry = generate_test_stop();
|
|
2890
|
|
2891 StubRoutines::Sparc::_stop_subroutine_entry = generate_stop_subroutine();
|
|
2892 StubRoutines::Sparc::_flush_callers_register_windows_entry = generate_flush_callers_register_windows();
|
|
2893
|
|
2894 #if !defined(COMPILER2) && !defined(_LP64)
|
|
2895 StubRoutines::_atomic_xchg_entry = generate_atomic_xchg();
|
|
2896 StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg();
|
|
2897 StubRoutines::_atomic_add_entry = generate_atomic_add();
|
|
2898 StubRoutines::_atomic_xchg_ptr_entry = StubRoutines::_atomic_xchg_entry;
|
|
2899 StubRoutines::_atomic_cmpxchg_ptr_entry = StubRoutines::_atomic_cmpxchg_entry;
|
|
2900 StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();
|
|
2901 StubRoutines::_atomic_add_ptr_entry = StubRoutines::_atomic_add_entry;
|
|
2902 StubRoutines::_fence_entry = generate_fence();
|
|
2903 #endif // COMPILER2 !=> _LP64
|
|
2904
|
|
2905 StubRoutines::Sparc::_partial_subtype_check = generate_partial_subtype_check();
|
|
2906 }
|
|
2907
|
|
2908
|
|
2909 void generate_all() {
|
|
2910 // Generates all stubs and initializes the entry points
|
|
2911
|
|
2912 // These entry points require SharedInfo::stack0 to be set up in non-core builds
|
|
2913 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false);
|
|
2914 StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException), true);
|
|
2915 StubRoutines::_throw_NullPointerException_entry = generate_throw_exception("NullPointerException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true);
|
|
2916 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
|
|
2917 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false);
|
|
2918
|
|
2919 StubRoutines::_handler_for_unsafe_access_entry =
|
|
2920 generate_handler_for_unsafe_access();
|
|
2921
|
|
2922 // support for verify_oop (must happen after universe_init)
|
|
2923 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop_subroutine();
|
|
2924
|
|
2925 // arraycopy stubs used by compilers
|
|
2926 generate_arraycopy_stubs();
|
|
2927 }
|
|
2928
|
|
2929
|
|
2930 public:
|
|
2931 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
|
|
2932 // replace the standard masm with a special one:
|
|
2933 _masm = new MacroAssembler(code);
|
|
2934
|
|
2935 _stub_count = !all ? 0x100 : 0x200;
|
|
2936 if (all) {
|
|
2937 generate_all();
|
|
2938 } else {
|
|
2939 generate_initial();
|
|
2940 }
|
|
2941
|
|
2942 // make sure this stub is available for all local calls
|
|
2943 if (_atomic_add_stub.is_unbound()) {
|
|
2944 // generate a second time, if necessary
|
|
2945 (void) generate_atomic_add();
|
|
2946 }
|
|
2947 }
|
|
2948
|
|
2949
|
|
2950 private:
|
|
2951 int _stub_count;
|
|
2952 void stub_prolog(StubCodeDesc* cdesc) {
|
|
2953 # ifdef ASSERT
|
|
2954 // put extra information in the stub code, to make it more readable
|
|
2955 #ifdef _LP64
|
|
2956 // Write the high part of the address
|
|
2957 // [RGV] Check if there is a dependency on the size of this prolog
|
|
2958 __ emit_data((intptr_t)cdesc >> 32, relocInfo::none);
|
|
2959 #endif
|
|
2960 __ emit_data((intptr_t)cdesc, relocInfo::none);
|
|
2961 __ emit_data(++_stub_count, relocInfo::none);
|
|
2962 # endif
|
|
2963 align(true);
|
|
2964 }
|
|
2965
|
|
2966 void align(bool at_header = false) {
|
|
2967 // %%%%% move this constant somewhere else
|
|
2968 // UltraSPARC cache line size is 8 instructions:
|
|
2969 const unsigned int icache_line_size = 32;
|
|
2970 const unsigned int icache_half_line_size = 16;
|
|
2971
|
|
2972 if (at_header) {
|
|
2973 while ((intptr_t)(__ pc()) % icache_line_size != 0) {
|
|
2974 __ emit_data(0, relocInfo::none);
|
|
2975 }
|
|
2976 } else {
|
|
2977 while ((intptr_t)(__ pc()) % icache_half_line_size != 0) {
|
|
2978 __ nop();
|
|
2979 }
|
|
2980 }
|
|
2981 }
|
|
2982
|
|
2983 }; // end class declaration
|
|
2984
|
|
2985
|
|
2986 address StubGenerator::disjoint_byte_copy_entry = NULL;
|
|
2987 address StubGenerator::disjoint_short_copy_entry = NULL;
|
|
2988 address StubGenerator::disjoint_int_copy_entry = NULL;
|
|
2989 address StubGenerator::disjoint_long_copy_entry = NULL;
|
|
2990 address StubGenerator::disjoint_oop_copy_entry = NULL;
|
|
2991
|
|
2992 address StubGenerator::byte_copy_entry = NULL;
|
|
2993 address StubGenerator::short_copy_entry = NULL;
|
|
2994 address StubGenerator::int_copy_entry = NULL;
|
|
2995 address StubGenerator::long_copy_entry = NULL;
|
|
2996 address StubGenerator::oop_copy_entry = NULL;
|
|
2997
|
|
2998 address StubGenerator::checkcast_copy_entry = NULL;
|
|
2999
|
|
3000 void StubGenerator_generate(CodeBuffer* code, bool all) {
|
|
3001 StubGenerator g(code, all);
|
|
3002 }
|